00001
00785 { 0, 2, 2, 2, 2, 1, 1},
00786 { 2, 0, 2, 2, 2, INF, INF},
00787 { 2, 2, 0, 1, 2, INF, INF},
00788 { 2, 2, 1, 0, 2, INF, INF},
00789 { 2, 2, 2, 2, 0, INF, INF},
00790 { 1, INF, INF, INF, INF, 0, INF},
00791 { 1, INF, INF, INF, INF, INF, 0},
00792
00793
00794
00795 { 0, 100, 5, 5, 75, 5, 5},
00796 { 100, 0, 8, 8, 8, INF, INF},
00797 { 5, 8, 0, 3, 8, INF, INF},
00798 { 5, 8, 3, 0, 8, INF, INF},
00799 { 75, 8, 8, 8, 0, INF, INF},
00800 { 5, INF, INF, INF, INF, 0, INF},
00801 { 5, INF, INF, INF, INF, INF, 0},
00802 \endverbatim
00803
00804 The lower matrix uses the costs given in \ref shapiro_90 "Shapiro (1990)".
00805 All distance functions use the following global variables:
00806
00807 \verbatim
00808 int cost_matrix;
00809 \endverbatim
00810 \copybrief cost_matrix
00811
00812 \verbatim
00813 int edit_backtrack;
00814 \endverbatim
00815 \copybrief edit_backtrack
00816
00817 \verbatim
00818 char *aligned_line[4];
00819 \endverbatim
00820 \copybrief aligned_line
00821
00822 \see utils.h, dist_vars.h and stringdist.h for more details
00823
00824 <h3>Functions for Tree Edit Distances</h3>
00825
00826 \verbatim
00827 Tree *make_tree (char *struc)
00828 \endverbatim
00829 \copybrief make_tree()
00830
00831 \verbatim
00832 float tree_edit_distance (Tree *T1,
00833 Tree *T2)
00834 \endverbatim
00835 \copybrief tree_edit_distance()
00836
00837 \verbatim
00838 void free_tree(Tree *t)
00839 \endverbatim
00840 \copybrief free_tree()
00841
00842 \see dist_vars.h and treedist.h for prototypes and more detailed descriptions
00843
00844 <h3>Functions for String Alignment</h3>
00845
00846 \verbatim
00847 swString *Make_swString (char *string)
00848 \endverbatim
00849 \copybrief Make_swString()
00850
00851 \verbatim
00852 float string_edit_distance (swString *T1,
00853 swString *T2)
00854 \endverbatim
00855 \copybrief string_edit_distance()
00856
00857 \see dist_vars.h and stringdist.h for prototypes and more detailed descriptions
00858
00859 <h3>Functions for Comparison of Base Pair Probabilities</h3>
00860
00861 For comparison of base pair probability matrices, the matrices are first
00862 condensed into probability profiles which are the compared by alignment.
00863
00864 \verbatim
00865 float *Make_bp_profile_bppm ( double *bppm,
00866 int length)
00867 \endverbatim
00868 \copybrief Make_bp_profile_bppm()
00869
00870 \verbatim
00871 float profile_edit_distance ( const float *T1,
00872 const float *T2)
00873 \endverbatim
00874 \copybrief profile_edit_distance()
00875
00876 \see ProfileDist.h for prototypes and more details of the above functions
00877
00878 \ref mp_utils "Next Page: Utilities"
00879
00880 \page mp_utils Utilities - Odds and Ends
00881
00882 \anchor toc
00883
00884 <h3>Table of Contents</h3>
00885 <hr>
00886
00887 \li \ref utils_ss
00888 \li \ref utils_dot
00889 \li \ref utils_aln
00890 \li \ref utils_seq
00891 \li \ref utils_struc
00892 \li \ref utils_misc
00893
00894 <hr>
00895
00896 \section utils_ss Producing secondary structure graphs
00897
00898 \verbatim
00899 int PS_rna_plot ( char *string,
00900 char *structure,
00901 char *file)
00902 \endverbatim
00903 \copybrief PS_rna_plot()
00904
00905 \verbatim
00906 int PS_rna_plot_a (
00907 char *string,
00908 char *structure,
00909 char *file,
00910 char *pre,
00911 char *post)
00912 \endverbatim
00913 \copybrief PS_rna_plot_a()
00914
00915 \verbatim
00916 int gmlRNA (char *string,
00917 char *structure,
00918 char *ssfile,
00919 char option)
00920 \endverbatim
00921 \copybrief gmlRNA()
00922
00923 \verbatim
00924 int ssv_rna_plot (char *string,
00925 char *structure,
00926 char *ssfile)
00927 \endverbatim
00928 \copybrief ssv_rna_plot()
00929
00930 \verbatim
00931 int svg_rna_plot (char *string,
00932 char *structure,
00933 char *ssfile)
00934 \endverbatim
00935 \copybrief svg_rna_plot()
00936
00937 \verbatim
00938 int xrna_plot ( char *string,
00939 char *structure,
00940 char *ssfile)
00941 \endverbatim
00942 \copybrief xrna_plot()
00943
00944 \verbatim
00945 int rna_plot_type
00946 \endverbatim
00947 \copybrief rna_plot_type
00948
00949 Two low-level functions provide direct access to the graph lauyouting
00950 algorithms:
00951
00952 \verbatim
00953 int simple_xy_coordinates ( short *pair_table,
00954 float *X,
00955 float *Y)
00956 \endverbatim
00957 \copybrief simple_xy_coordinates()
00958
00959 \verbatim
00960 int naview_xy_coordinates ( short *pair_table,
00961 float *X,
00962 float *Y)
00963 \endverbatim
00964 \copybrief naview_xy_coordinates()
00965
00966 \see PS_dot.h and naview.h for more detailed descriptions.
00967
00968 \htmlonly
00969 <hr>
00970 <a href="#toc">Table of Contents</a>
00971 <hr>
00972 \endhtmlonly
00973
00974 \section utils_dot Producing (colored) dot plots for base pair probabilities
00975
00976 \verbatim
00977 int PS_color_dot_plot ( char *string,
00978 cpair *pi,
00979 char *filename)
00980 \endverbatim
00981 \copybrief PS_color_dot_plot()
00982
00983 \verbatim
00984 int PS_color_dot_plot_turn (char *seq,
00985 cpair *pi,
00986 char *filename,
00987 int winSize)
00988 \endverbatim
00989 \copybrief PS_color_dot_plot_turn()
00990
00991 \verbatim
00992 int PS_dot_plot_list (char *seq,
00993 char *filename,
00994 plist *pl,
00995 plist *mf,
00996 char *comment)
00997 \endverbatim
00998 \copybrief PS_dot_plot_list()
00999
01000 \verbatim
01001 int PS_dot_plot_turn (char *seq,
01002 struct plist *pl,
01003 char *filename,
01004 int winSize)
01005 \endverbatim
01006 \copybrief PS_dot_plot_turn()
01007
01008 \see PS_dot.h for more detailed descriptions.
01009
01010 \section utils_aln Producing (colored) alignments
01011
01012 \verbatim
01013 int PS_color_aln (
01014 const char *structure,
01015 const char *filename,
01016 const char *seqs[],
01017 const char *names[])
01018 \endverbatim
01019 \copybrief PS_color_aln()
01020
01021 \htmlonly
01022 <hr>
01023 <a href="#toc">Table of Contents</a>
01024 <hr>
01025 \endhtmlonly
01026
01027 \section utils_seq RNA sequence related utilities
01028
01029 Several functions provide useful applications to RNA sequences
01030
01031 \verbatim
01032 char *random_string (int l,
01033 const char symbols[])
01034 \endverbatim
01035 \copybrief random_string()
01036
01037 \verbatim
01038 int hamming ( const char *s1,
01039 const char *s2)
01040 \endverbatim
01041 \copybrief hamming()
01042
01043 \verbatim
01044 void str_DNA2RNA(char *sequence);
01045 \endverbatim
01046 \copybrief str_DNA2RNA()
01047
01048 \verbatim
01049 void str_uppercase(char *sequence);
01050 \endverbatim
01051 \copybrief str_uppercase()
01052
01053 \htmlonly
01054 <hr>
01055 <a href="#toc">Table of Contents</a>
01056 <hr>
01057 \endhtmlonly
01058
01059 \section utils_struc RNA secondary structure related utilities
01060
01061 \verbatim
01062 char *pack_structure (const char *struc)
01063 \endverbatim
01064 \copybrief pack_structure()
01065
01066 \verbatim
01067 char *unpack_structure (const char *packed)
01068 \endverbatim
01069 \copybrief unpack_structure()
01070
01071 \verbatim
01072 short *make_pair_table (const char *structure)
01073 \endverbatim
01074 \copybrief make_pair_table()
01075
01076 \verbatim
01077 short *copy_pair_table (const short *pt)
01078 \endverbatim
01079 \copybrief copy_pair_table()
01080
01081 \htmlonly
01082 <hr>
01083 <a href="#toc">Table of Contents</a>
01084 <hr>
01085 \endhtmlonly
01086
01087 \section utils_misc Miscellaneous Utilities
01088
01089 \verbatim
01090 void print_tty_input_seq (void)
01091 \endverbatim
01092 \copybrief print_tty_input_seq()
01093
01094 \verbatim
01095 void print_tty_constraint_full (void)
01096 \endverbatim
01097 \copybrief print_tty_constraint_full()
01098
01099 \verbatim
01100 void print_tty_constraint (unsigned int option)
01101 \endverbatim
01102 \copybrief print_tty_constraint()
01103
01104 \verbatim
01105 int *get_iindx (unsigned int length)
01106 \endverbatim
01107 \copybrief get_iindx()
01108
01109 \verbatim
01110 int *get_indx (unsigned int length)
01111 \endverbatim
01112 \copybrief get_indx()
01113
01114 \verbatim
01115 void constrain_ptypes (
01116 const char *constraint,
01117 unsigned int length,
01118 char *ptype,
01119 int *BP,
01120 int min_loop_size,
01121 unsigned int idx_type)
01122 \endverbatim
01123 \copybrief constrain_ptypes()
01124
01125 \verbatim
01126 char *get_line(FILE *fp);
01127 \endverbatim
01128 \copybrief get_line()
01129
01130 \verbatim
01131 unsigned int read_record(
01132 char **header,
01133 char **sequence,
01134 char ***rest,
01135 unsigned int options);
01136 \endverbatim
01137 \copybrief read_record()
01138
01139 \verbatim
01140 char *time_stamp (void)
01141 \endverbatim
01142 \copybrief time_stamp()
01143
01144 \verbatim
01145 void warn_user (const char message[])
01146 \endverbatim
01147 \copybrief warn_user()
01148
01149 \verbatim
01150 void nrerror (const char message[])
01151 \endverbatim
01152 \copybrief nrerror()
01153
01154 \verbatim
01155 void init_rand (void)
01156 \endverbatim
01157 \copybrief init_rand()
01158
01159 \verbatim
01160 unsigned short xsubi[3];
01161 \endverbatim
01162 \copybrief xsubi
01163
01164 \verbatim
01165 double urn (void)
01166 \endverbatim
01167 \copybrief urn()
01168
01169 \verbatim
01170 int int_urn (int from, int to)
01171 \endverbatim
01172 \copybrief int_urn()
01173
01174 \verbatim
01175 void *space (unsigned size)
01176 \endverbatim
01177 \copybrief space()
01178
01179 \verbatim
01180 void *xrealloc ( void *p,
01181 unsigned size)
01182 \endverbatim
01183 \copybrief xrealloc()
01184
01185 \see utils.h for a complete overview and detailed description of the utility functions
01186
01187 \htmlonly
01188 <hr>
01189 <a href="#toc">Table of Contents</a>
01190 <hr>
01191 \endhtmlonly
01192
01193 \ref mp_example "Next Page: Examples"
01194
01195 \page mp_example Example - A Small Example Program
01196
01197 The following program exercises most commonly used functions of the library.
01198 The program folds two sequences using both the mfe and partition function
01199 algorithms and calculates the tree edit and profile distance of the
01200 resulting structures and base pairing probabilities.
01201
01202 \verbatim
01203 #include <stdio.h>
01204 #include <math.h>
01205 #include "utils.h"
01206 #include "fold_vars.h"
01207 #include "fold.h"
01208 #include "part_func.h"
01209 #include "inverse.h"
01210 #include "RNAstruct.h"
01211 #include "treedist.h"
01212 #include "stringdist.h"
01213 #include "ProfileDist.h"
01214
01215 void main()
01216 {
01217 char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC",
01218 *struct1,* struct2,* xstruc;
01219 float e1, e2, tree_dist, string_dist, profile_dist, kT;
01220 Tree *T1, *T2;
01221 swString *S1, *S2;
01222 float **pf1, **pf2;
01223 FLT_OR_DBL *bppm;
01224
01225 temperature = 30.;
01226
01227
01228 struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1));
01229 e1 = fold(seq1, struct1);
01230
01231 struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1));
01232 e2 = fold(seq2, struct2);
01233
01234 free_arrays();
01235
01236
01237 xstruc = expand_Full(struct1);
01238 T1 = make_tree(xstruc);
01239 S1 = Make_swString(xstruc);
01240 free(xstruc);
01241
01242 xstruc = expand_Full(struct2);
01243 T2 = make_tree(xstruc);
01244 S2 = Make_swString(xstruc);
01245 free(xstruc);
01246
01247
01248 edit_backtrack = 1;
01249 tree_dist = tree_edit_distance(T1, T2);
01250 free_tree(T1); free_tree(T2);
01251 unexpand_aligned_F(aligned_line);
01252 printf("%s\n%s %3.2f\n", aligned_line[0], aligned_line[1], tree_dist);
01253
01254
01255 string_dist = string_edit_distance(S1, S2);
01256 free(S1); free(S2);
01257 printf("%s mfe=%5.2f\n%s mfe=%5.2f dist=%3.2f\n",
01258 aligned_line[0], e1, aligned_line[1], e2, string_dist);
01259
01260
01261
01262 kT = (temperature+273.15)*1.98717/1000.;
01263 pf_scale = exp(-e1/kT/strlen(seq1));
01264
01265
01266 e1 = pf_fold(seq1, struct1);
01267
01268 bppm = export_bppm();
01269 pf1 = Make_bp_profile_bppm(bppm, strlen(seq1));
01270
01271 e2 = pf_fold(seq2, struct2);
01272
01273 bppm = export_bppm();
01274 pf2 = Make_bp_profile(strlen(seq2));
01275
01276 free_pf_arrays();
01277
01278 profile_dist = profile_edit_distance(pf1, pf2);
01279 printf("%s free energy=%5.2f\n%s free energy=%5.2f dist=%3.2f\n",
01280 aligned_line[0], e1, aligned_line[1], e2, profile_dist);
01281
01282 free_profile(pf1); free_profile(pf2);
01283 }
01284 \endverbatim
01285
01286 In a typical Unix environment you would compile this program using:
01287 \verbatim
01288 cc ${OPENMP_CFLAGS} -c example.c -I${hpath}
01289 \endverbatim
01290 and link using
01291 \verbatim
01292 cc ${OPENMP_CFLAGS} -o example -L${lpath} -lRNA -lm
01293 \endverbatim
01294 where \e ${hpath} and \e ${lpath} point to the location of the header
01295 files and library, respectively.
01296 \note As default, the RNAlib is compiled with build-in \e OpenMP multithreading
01297 support. Thus, when linking your own object files to the library you have to pass
01298 the compiler specific \e ${OPENMP_CFLAGS} (e.g. '-fopenmp' for \b gcc) even if your code does not
01299 use openmp specific code. However, in that case the \e OpenMP flags may be ommited when compiling
01300 example.c
01301
01302 \ref mp_ref "Next Page: References"
01303
01304 \page mp_ref References
01305
01306 -# \anchor mathews_04 D.H. Mathews, M. D. Disney, J.L. Childs, S.J. Schroeder, M. Zuker, D.H. Turner (2004)\n
01307 Incorporating chemical modification constraints into a dynamic programming algorithm for prediction of
01308 RNA secondary structure, Proc Natl Acad Sci U S A, 101(19):7287-92
01309 -# \anchor mathews_99 D.H. Mathews, J. Sabina, M. Zuker and H. Turner (1999)\n
01310 Expanded sequence dependence of thermodynamic parameters provides
01311 robust prediction of RNA secondary structure, JMB, 288: 911-940
01312 -# \anchor zuker_81 Zuker and P. Stiegler (1981)\n
01313 Optimal computer folding of large RNA sequences using
01314 thermodynamic and auxiliary information, Nucl Acid Res 9: 133-148
01315 -# \anchor dimitrov_04 D.A. Dimitrov, M.Zuker(2004)\n
01316 Prediction of hybridization and melting for double stranded nucleic
01317 acids, Biophysical J. 87: 215-226,
01318 -# \anchor mccaskill_90 J.S. McCaskill (1990)\n
01319 The equilibrium partition function and base pair binding
01320 probabilities for RNA secondary structures, Biopolymers 29: 1105-1119
01321 -# \anchor turner_88 D.H. Turner, N. Sugimoto and S.M. Freier (1988)\n
01322 RNA structure prediction, Ann Rev Biophys Biophys Chem 17: 167-192
01323 -# \anchor jaeger_89 J.A. Jaeger, D.H. Turner and M. Zuker (1989)\n
01324 Improved predictions of secondary structures for RNA,
01325 Proc. Natl. Acad. Sci. 86: 7706-7710
01326 -# \anchor he_91 L. He, R. Kierzek, J. SantaLucia, A.E. Walter and D.H. Turner (1991)\n
01327 Nearest-Neighbor Parameters For GU Mismatches,
01328 Biochemistry 30: 11124-11132
01329 -# \anchor peritz_91 A.E. Peritz, R. Kierzek, N, Sugimoto, D.H. Turner (1991)\n
01330 Thermodynamic Study of Internal Loops in Oligoribonucleotides ... ,
01331 Biochemistry 30: 6428--6435
01332 -# \anchor walter_94 A. Walter, D. Turner, J. Kim, M. Lyttle, P. Müller, D. Mathews and M. Zuker (1994)\n
01333 Coaxial stacking of helices enhances binding of Oligoribonucleotides..,
01334 Proc. Natl. Acad. Sci. 91: 9218-9222
01335 -# \anchor shapiro_88 B.A. Shapiro, (1988)\n
01336 An algorithm for comparing multiple RNA secondary structures,
01337 CABIOS 4, 381-393
01338 -# \anchor shapiro_90 B.A. Shapiro and K. Zhang (1990)\n
01339 Comparing multiple RNA secondary structures using tree comparison,
01340 CABIOS 6, 309-318
01341 -# \anchor bruccoleri_88 R. Bruccoleri and G. Heinrich (1988)\n
01342 An improved algorithm for nucleic acid secondary structure display,
01343 CABIOS 4, 167-173
01344 -# \anchor fontana_93a W. Fontana , D.A.M. Konings, P.F. Stadler, P. Schuster (1993) \n
01345 Statistics of RNA secondary structures, Biopolymers 33, 1389-1404
01346 -# \anchor fontana_93b W. Fontana, P.F. Stadler, E.G. Bornberg-Bauer, T. Griesmacher, I.L.
01347 Hofacker, M. Tacker, P. Tarazona, E.D. Weinberger, P. Schuster (1993)\n
01348 RNA folding and combinatory landscapes, Phys. Rev. E 47: 2083-2099
01349 -# \anchor hofacker_94a I.L. Hofacker, W. Fontana, P.F. Stadler, S. Bonhoeffer, M. Tacker, P.
01350 Schuster (1994) Fast Folding and Comparison of RNA Secondary Structures.
01351 Monatshefte f. Chemie 125: 167-188
01352 -# \anchor hofacker_94b I.L. Hofacker (1994) The Rules of the Evolutionary Game for RNA:
01353 A Statistical Characterization of the Sequence to Structure Mapping in RNA.
01354 PhD Thesis, University of Vienna.
01355 -# \anchor hofacker_02 I.L. Hofacker, M. Fekete, P.F. Stadler (2002).
01356 Secondary Structure Prediction for Aligned RNA Sequences.
01357 J. Mol. Biol. 319:1059-1066
01358 -# \anchor adams_79 D. Adams (1979)\n
01359 The hitchhiker's guide to the galaxy, Pan Books, London
01360
01361 **/
01362