hw6/blast_get_target_seq.pl

#!/usr/local/bin/perl -w

# Do blast search and retrieve top 10 target sequences

# See documentation at http://www.bioperl.org/Core/POD/Bio/Tools/BPlite.html

use Bio::Tools::BPlite;

# Prompt the user for the query file name if it's not an argument
if (! $ARGV[0])
{
        print "What is the query file for the blast? ";

        # Get input and remove the newline character at the end
        chomp ($inFile = <STDIN>);
}
else
{
        $inFile = $ARGV[0];
}

# Prompt the user for the file of blast result
if (! $ARGV[1])
{
        print "What is the blast file? ";

        # Get input and remove the newline character at the end
        chomp ($blastFile = <STDIN>);
}
else
{
        $blastFile = $ARGV[1];
}

# Prompt the user for the target sequence file
if (! $ARGV[2])    
{
        print "where does the target file go? ";

        # Get input and remove the newline character at the end
        chomp ($outFile = <STDIN>);
}
else
{
        $outFile = $ARGV[2];
}

print "Wait, it's processing your data...\n";

`blastall -p blastp -d /cluster/db0/Data/nr -i $inFile -o $blastFile`; #blast command

$report = new Bio::Tools::BPlite(-file=>"$blastFile");                 #create a new BPlite object to hold blast output result

# Count hits
$i = 0;

open(FH, ">$outFile") || die "Can not write to $outFile\n";            #open file for save

while(my $sbjct = $report->nextSbjct) {
    if ($i < 10) {                            #the max number of hits 
   $targetName = $sbjct->name;           #retrieve the database name
#   print "$targetName\n";                #print out for debugging
   $targetAcc = "";
   if ($targetName =~ /^[a-z]+?\|\|([a-z0-9_.-]+)?\s.*/i) {      #for format like "pir||A45032", "A45032" is used. 
       $targetAcc = $1;
#       print "ACCESSION=AA=$targetAcc\n"; #print out for debugging
   }
   elsif ($targetName =~ /^[a-z]+?\|([a-z0-9_.-]+)?\|.*/i) {     #for format like "sp|Q9V9I4|O42B_DROME", "Q9V9I4" is used. 
       $targetAcc = $1;                   #accession number
#       print "ACCESSION=BB=$targetAcc\n"; #print out for debugging
   }
   if ($targetAcc ne "") {
       $fastaSeq = `fastacmd -d nr -s $targetAcc`;
       print FH $fastaSeq;                #save the target sequence in $outFile
       $i++;                              #count the num of hits
   }
    }
}
print "Done\n";
close (FH) || die "Can not close $outFile\n";