1 #!/usr/local/bin/perl -w 2 3 # oligos.pl 4 # Create and analyze an overlapping series of oligos 5 # WI Biocomputing course - Bioinformatics for Biologists - October 2003 6 # Example of input taken as multiple arguments 7 8 # Check input and give info if arguments are missing 9 if (! $ARGV[3]) 10 { 11 print " 12 This program creates a series of oligos from 'sequence' 13 of length 'oligoLength' starting at startPosition nt and 14 ending at endPosition nt 15 USAGE: oligos.pl sequence oligoLength startPosition endPosition\n\n"; 16 17 exit(0); 18 } 19 else 20 { 21 $seq = $ARGV[0]; # sequence name 22 $mer = $ARGV[1]; # length of oligos to extract 23 $start = $ARGV[2]; # nt number for beginning of first oligo 24 $end = $ARGV[3]; # nt number for beginning of last oligo 25 } 26 27 # Get continuous sequence from sequence file 28 open (SEQ, $seq) || die "cannot open $seq for reading: $!"; 29 # make array of lines 30 @seq = <SEQ>; 31 32 # get header (defline) of sequence (first line of fasta file) 33 $defline = $seq[0]; 34 35 # Split definition line into fields separated by spaces 36 @defline_fields = split(/\s/, $defline); 37 38 $seq[0] = ""; # delete defline 39 $seq = join ("", @seq); # join($glue, @list) to get seq without defline 40 $seq =~ s/\s*//g; # delete spaces and end of lines 41 $seqLength = length ($seq); # get length of sequence (optional) 42 43 # $defline_fields[0] is the first part of defline (up to first space) 44 print "Oligos ($mer mers) for $defline_fields[0] ($seqLength nt) and % GC content\n"; 45 46 # Beware of OBOB ==> for ($i = $start; $i < $end; $i++) would be one nucleotide off 47 # This "off-by-one bug" is a common problem, so always check your code 48 49 # Adjust counting since computers start at 0 but people think of sequences starting at 1 50 for ($i = $start - 1; $i < $end - 1; $i++) 51 { 52 # Extract substring of sequence to make oligo ==> oligo = substr(seq, start, length); 53 # Use $_ variable to make tr/// transliteration (letter counting) easier 54 $_ = substr($seq, $i, $mer); 55 56 # Adjust to conventional way of counting (e.g., first nt = 1, not 0) 57 $nt = $i + 1; 58 59 # Count the number of Gs and Cs in the oligo by counting 60 # the number of times G or C are "translated" into the same G or C within $_ (the oligo) 61 $numGC = tr/GC//; 62 63 # Calculate per cent GC 64 $pcGC = 100 * $numGC / $mer; 65 66 # Round off percent GC to 1 decimal places (%0.2f ==> 2 decimal places) 67 $pcGCrounded = sprintf("%0.1f", $pcGC); 68 69 # Print results delimited by tabs (for easy importing into spreadsheets, etc) 70 print "$nt\t$_\t$pcGCrounded\n"; 71 } 72 print "\n"; 73