00001
00002
00003
00004
00005
00006
00108
00109
00110
00111
00112
00113
00327 { 0, 2, 2, 2, 2, 1, 1},
00328 { 2, 0, 2, 2, 2, INF, INF},
00329 { 2, 2, 0, 1, 2, INF, INF},
00330 { 2, 2, 1, 0, 2, INF, INF},
00331 { 2, 2, 2, 2, 0, INF, INF},
00332 { 1, INF, INF, INF, INF, 0, INF},
00333 { 1, INF, INF, INF, INF, INF, 0},
00334
00335
00336
00337 { 0, 100, 5, 5, 75, 5, 5},
00338 { 100, 0, 8, 8, 8, INF, INF},
00339 { 5, 8, 0, 3, 8, INF, INF},
00340 { 5, 8, 3, 0, 8, INF, INF},
00341 { 75, 8, 8, 8, 0, INF, INF},
00342 { 5, INF, INF, INF, INF, 0, INF},
00343 { 5, INF, INF, INF, INF, INF, 0},
00344 \endverbatim
00345
00346 The lower matrix uses the costs given in \cite shapiro:1990.
00347 All distance functions use the following global variables:
00348
00349 \verbatim
00350 int cost_matrix;
00351 \endverbatim
00352 \copybrief cost_matrix
00353
00354 \verbatim
00355 int edit_backtrack;
00356 \endverbatim
00357 \copybrief edit_backtrack
00358
00359 \verbatim
00360 char *aligned_line[4];
00361 \endverbatim
00362 \copybrief aligned_line
00363
00364 \see utils.h, dist_vars.h and stringdist.h for more details
00365
00366 <h3>Functions for Tree Edit Distances</h3>
00367
00368 \verbatim
00369 Tree *make_tree (char *struc)
00370 \endverbatim
00371 \copybrief make_tree()
00372
00373 \verbatim
00374 float tree_edit_distance (Tree *T1,
00375 Tree *T2)
00376 \endverbatim
00377 \copybrief tree_edit_distance()
00378
00379 \verbatim
00380 void free_tree(Tree *t)
00381 \endverbatim
00382 \copybrief free_tree()
00383
00384 \see dist_vars.h and treedist.h for prototypes and more detailed descriptions
00385
00386 <h3>Functions for String Alignment</h3>
00387
00388 \verbatim
00389 swString *Make_swString (char *string)
00390 \endverbatim
00391 \copybrief Make_swString()
00392
00393 \verbatim
00394 float string_edit_distance (swString *T1,
00395 swString *T2)
00396 \endverbatim
00397 \copybrief string_edit_distance()
00398
00399 \see dist_vars.h and stringdist.h for prototypes and more detailed descriptions
00400
00401 <h3>Functions for Comparison of Base Pair Probabilities</h3>
00402
00403 For comparison of base pair probability matrices, the matrices are first
00404 condensed into probability profiles which are the compared by alignment.
00405
00406 \verbatim
00407 float *Make_bp_profile_bppm ( double *bppm,
00408 int length)
00409 \endverbatim
00410 \copybrief Make_bp_profile_bppm()
00411
00412 \verbatim
00413 float profile_edit_distance ( const float *T1,
00414 const float *T2)
00415 \endverbatim
00416 \copybrief profile_edit_distance()
00417
00418 \see ProfileDist.h for prototypes and more details of the above functions
00419
00420 \ref mp_utils "Next Page: Utilities"
00421
00422 \page mp_utils Utilities - Odds and Ends
00423
00424 \anchor toc
00425
00426 <h3>Table of Contents</h3>
00427 <hr>
00428
00429 \li \ref utils_ss
00430 \li \ref utils_dot
00431 \li \ref utils_aln
00432 \li \ref utils_seq
00433 \li \ref utils_struc
00434 \li \ref utils_misc
00435
00436 <hr>
00437
00438 \section utils_ss Producing secondary structure graphs
00439
00440 \verbatim
00441 int PS_rna_plot ( char *string,
00442 char *structure,
00443 char *file)
00444 \endverbatim
00445 \copybrief PS_rna_plot()
00446
00447 \verbatim
00448 int PS_rna_plot_a (
00449 char *string,
00450 char *structure,
00451 char *file,
00452 char *pre,
00453 char *post)
00454 \endverbatim
00455 \copybrief PS_rna_plot_a()
00456
00457 \verbatim
00458 int gmlRNA (char *string,
00459 char *structure,
00460 char *ssfile,
00461 char option)
00462 \endverbatim
00463 \copybrief gmlRNA()
00464
00465 \verbatim
00466 int ssv_rna_plot (char *string,
00467 char *structure,
00468 char *ssfile)
00469 \endverbatim
00470 \copybrief ssv_rna_plot()
00471
00472 \verbatim
00473 int svg_rna_plot (char *string,
00474 char *structure,
00475 char *ssfile)
00476 \endverbatim
00477 \copybrief svg_rna_plot()
00478
00479 \verbatim
00480 int xrna_plot ( char *string,
00481 char *structure,
00482 char *ssfile)
00483 \endverbatim
00484 \copybrief xrna_plot()
00485
00486 \verbatim
00487 int rna_plot_type
00488 \endverbatim
00489 \copybrief rna_plot_type
00490
00491 Two low-level functions provide direct access to the graph lauyouting
00492 algorithms:
00493
00494 \verbatim
00495 int simple_xy_coordinates ( short *pair_table,
00496 float *X,
00497 float *Y)
00498 \endverbatim
00499 \copybrief simple_xy_coordinates()
00500
00501 \verbatim
00502 int naview_xy_coordinates ( short *pair_table,
00503 float *X,
00504 float *Y)
00505 \endverbatim
00506 \copybrief naview_xy_coordinates()
00507
00508 \see PS_dot.h and naview.h for more detailed descriptions.
00509
00510 \htmlonly
00511 <hr>
00512 <a href="#toc">Table of Contents</a>
00513 <hr>
00514 \endhtmlonly
00515
00516 \section utils_dot Producing (colored) dot plots for base pair probabilities
00517
00518 \verbatim
00519 int PS_color_dot_plot ( char *string,
00520 cpair *pi,
00521 char *filename)
00522 \endverbatim
00523 \copybrief PS_color_dot_plot()
00524
00525 \verbatim
00526 int PS_color_dot_plot_turn (char *seq,
00527 cpair *pi,
00528 char *filename,
00529 int winSize)
00530 \endverbatim
00531 \copybrief PS_color_dot_plot_turn()
00532
00533 \verbatim
00534 int PS_dot_plot_list (char *seq,
00535 char *filename,
00536 plist *pl,
00537 plist *mf,
00538 char *comment)
00539 \endverbatim
00540 \copybrief PS_dot_plot_list()
00541
00542 \verbatim
00543 int PS_dot_plot_turn (char *seq,
00544 struct plist *pl,
00545 char *filename,
00546 int winSize)
00547 \endverbatim
00548 \copybrief PS_dot_plot_turn()
00549
00550 \see PS_dot.h for more detailed descriptions.
00551
00552 \section utils_aln Producing (colored) alignments
00553
00554 \verbatim
00555 int PS_color_aln (
00556 const char *structure,
00557 const char *filename,
00558 const char *seqs[],
00559 const char *names[])
00560 \endverbatim
00561 \copybrief PS_color_aln()
00562
00563 \htmlonly
00564 <hr>
00565 <a href="#toc">Table of Contents</a>
00566 <hr>
00567 \endhtmlonly
00568
00569 \section utils_seq RNA sequence related utilities
00570
00571 Several functions provide useful applications to RNA sequences
00572
00573 \verbatim
00574 char *random_string (int l,
00575 const char symbols[])
00576 \endverbatim
00577 \copybrief random_string()
00578
00579 \verbatim
00580 int hamming ( const char *s1,
00581 const char *s2)
00582 \endverbatim
00583 \copybrief hamming()
00584
00585 \verbatim
00586 void str_DNA2RNA(char *sequence);
00587 \endverbatim
00588 \copybrief str_DNA2RNA()
00589
00590 \verbatim
00591 void str_uppercase(char *sequence);
00592 \endverbatim
00593 \copybrief str_uppercase()
00594
00595 \htmlonly
00596 <hr>
00597 <a href="#toc">Table of Contents</a>
00598 <hr>
00599 \endhtmlonly
00600
00601 \section utils_struc RNA secondary structure related utilities
00602
00603 \verbatim
00604 char *pack_structure (const char *struc)
00605 \endverbatim
00606 \copybrief pack_structure()
00607
00608 \verbatim
00609 char *unpack_structure (const char *packed)
00610 \endverbatim
00611 \copybrief unpack_structure()
00612
00613 \verbatim
00614 short *make_pair_table (const char *structure)
00615 \endverbatim
00616 \copybrief make_pair_table()
00617
00618 \verbatim
00619 short *copy_pair_table (const short *pt)
00620 \endverbatim
00621 \copybrief copy_pair_table()
00622
00623 \htmlonly
00624 <hr>
00625 <a href="#toc">Table of Contents</a>
00626 <hr>
00627 \endhtmlonly
00628
00629 \section utils_misc Miscellaneous Utilities
00630
00631 \verbatim
00632 void print_tty_input_seq (void)
00633 \endverbatim
00634 \copybrief print_tty_input_seq()
00635
00636 \verbatim
00637 void print_tty_constraint_full (void)
00638 \endverbatim
00639 \copybrief print_tty_constraint_full()
00640
00641 \verbatim
00642 void print_tty_constraint (unsigned int option)
00643 \endverbatim
00644 \copybrief print_tty_constraint()
00645
00646 \verbatim
00647 int *get_iindx (unsigned int length)
00648 \endverbatim
00649 \copybrief get_iindx()
00650
00651 \verbatim
00652 int *get_indx (unsigned int length)
00653 \endverbatim
00654 \copybrief get_indx()
00655
00656 \verbatim
00657 void constrain_ptypes (
00658 const char *constraint,
00659 unsigned int length,
00660 char *ptype,
00661 int *BP,
00662 int min_loop_size,
00663 unsigned int idx_type)
00664 \endverbatim
00665 \copybrief constrain_ptypes()
00666
00667 \verbatim
00668 char *get_line(FILE *fp);
00669 \endverbatim
00670 \copybrief get_line()
00671
00672 \verbatim
00673 unsigned int read_record(
00674 char **header,
00675 char **sequence,
00676 char ***rest,
00677 unsigned int options);
00678 \endverbatim
00679 \copybrief read_record()
00680
00681 \verbatim
00682 char *time_stamp (void)
00683 \endverbatim
00684 \copybrief time_stamp()
00685
00686 \verbatim
00687 void warn_user (const char message[])
00688 \endverbatim
00689 \copybrief warn_user()
00690
00691 \verbatim
00692 void nrerror (const char message[])
00693 \endverbatim
00694 \copybrief nrerror()
00695
00696 \verbatim
00697 void init_rand (void)
00698 \endverbatim
00699 \copybrief init_rand()
00700
00701 \verbatim
00702 unsigned short xsubi[3];
00703 \endverbatim
00704 \copybrief xsubi
00705
00706 \verbatim
00707 double urn (void)
00708 \endverbatim
00709 \copybrief urn()
00710
00711 \verbatim
00712 int int_urn (int from, int to)
00713 \endverbatim
00714 \copybrief int_urn()
00715
00716 \verbatim
00717 void *space (unsigned size)
00718 \endverbatim
00719 \copybrief space()
00720
00721 \verbatim
00722 void *xrealloc ( void *p,
00723 unsigned size)
00724 \endverbatim
00725 \copybrief xrealloc()
00726
00727 \see utils.h for a complete overview and detailed description of the utility functions
00728
00729 \htmlonly
00730 <hr>
00731 <a href="#toc">Table of Contents</a>
00732 <hr>
00733 \endhtmlonly
00734
00735 \ref mp_example "Next Page: Examples"
00736
00737 \page mp_example Example - A Small Example Program
00738
00739 The following program exercises most commonly used functions of the library.
00740 The program folds two sequences using both the mfe and partition function
00741 algorithms and calculates the tree edit and profile distance of the
00742 resulting structures and base pairing probabilities.
00743
00744 \code{.c}
00745 #include <stdio.h>
00746 #include <stdlib.h>
00747 #include <math.h>
00748 #include <string.h>
00749 #include "utils.h"
00750 #include "fold_vars.h"
00751 #include "fold.h"
00752 #include "part_func.h"
00753 #include "inverse.h"
00754 #include "RNAstruct.h"
00755 #include "treedist.h"
00756 #include "stringdist.h"
00757 #include "profiledist.h"
00758
00759 void main()
00760 {
00761 char *seq1="CGCAGGGAUACCCGCG", *seq2="GCGCCCAUAGGGACGC",
00762 *struct1,* struct2,* xstruc;
00763 float e1, e2, tree_dist, string_dist, profile_dist, kT;
00764 Tree *T1, *T2;
00765 swString *S1, *S2;
00766 float *pf1, *pf2;
00767 FLT_OR_DBL *bppm;
00768
00769 temperature = 30.;
00770
00771
00772 struct1 = (char* ) space(sizeof(char)*(strlen(seq1)+1));
00773 e1 = fold(seq1, struct1);
00774
00775 struct2 = (char* ) space(sizeof(char)*(strlen(seq2)+1));
00776 e2 = fold(seq2, struct2);
00777
00778 free_arrays();
00779
00780
00781 xstruc = expand_Full(struct1);
00782 T1 = make_tree(xstruc);
00783 S1 = Make_swString(xstruc);
00784 free(xstruc);
00785
00786 xstruc = expand_Full(struct2);
00787 T2 = make_tree(xstruc);
00788 S2 = Make_swString(xstruc);
00789 free(xstruc);
00790
00791
00792 edit_backtrack = 1;
00793 tree_dist = tree_edit_distance(T1, T2);
00794 free_tree(T1); free_tree(T2);
00795 unexpand_aligned_F(aligned_line);
00796 printf("%s\n%s %3.2f\n", aligned_line[0], aligned_line[1], tree_dist);
00797
00798
00799 string_dist = string_edit_distance(S1, S2);
00800 free(S1); free(S2);
00801 printf("%s mfe=%5.2f\n%s mfe=%5.2f dist=%3.2f\n",
00802 aligned_line[0], e1, aligned_line[1], e2, string_dist);
00803
00804
00805
00806 kT = (temperature+273.15)*1.98717/1000.;
00807 pf_scale = exp(-e1/kT/strlen(seq1));
00808
00809
00810 e1 = pf_fold(seq1, struct1);
00811
00812 bppm = export_bppm();
00813 pf1 = Make_bp_profile_bppm(bppm, strlen(seq1));
00814
00815 e2 = pf_fold(seq2, struct2);
00816
00817 bppm = export_bppm();
00818 pf2 = Make_bp_profile_bppm(bppm, strlen(seq2));
00819
00820 free_pf_arrays();
00821
00822 profile_dist = profile_edit_distance(pf1, pf2);
00823 printf("%s free energy=%5.2f\n%s free energy=%5.2f dist=%3.2f\n",
00824 aligned_line[0], e1, aligned_line[1], e2, profile_dist);
00825
00826 free_profile(pf1); free_profile(pf2);
00827 }
00828 \endcode
00829
00830 In a typical Unix environment you would compile this program using:
00831 \verbatim
00832 cc ${OPENMP_CFLAGS} -c example.c -I${hpath}
00833 \endverbatim
00834 and link using
00835 \verbatim
00836 cc ${OPENMP_CFLAGS} -o example -L${lpath} -lRNA -lm
00837 \endverbatim
00838 where \e ${hpath} and \e ${lpath} point to the location of the header
00839 files and library, respectively.
00840 \note As default, the RNAlib is compiled with build-in \e OpenMP multithreading
00841 support. Thus, when linking your own object files to the library you have to pass
00842 the compiler specific \e ${OPENMP_CFLAGS} (e.g. '-fopenmp' for \b gcc) even if your code does not
00843 use openmp specific code. However, in that case the \e OpenMP flags may be ommited when compiling
00844 example.c
00845
00846
00847 **/
00848