#!/bin/perl -w
# Do blast search and retrieve top 10 target sequences
# See help at http://www.bioperl.org/HOWTOs/html/SearchIO.html for all data that can be extracted
use Bio::SearchIO;
# Prompt the user for the query file name if it's not an argument
if (! $ARGV[0])
{
print "What is the query file for the blast? ";
# Get input and remove the newline character at the end
chomp ($inFile = );
}
else
{
$inFile = $ARGV[0];
}
# Prompt the user for the file of blast result
if (! $ARGV[1])
{
print "What is name of the blast output file? ";
# Get input and remove the newline character at the end
chomp ($blastFile = );
}
else
{
$blastFile = $ARGV[1];
}
# Prompt the user for the target sequence file
if (! $ARGV[2])
{
print "what is the name of blast targets file? ";
# Get input and remove the newline character at the end
chomp ($outFile = );
}
else
{
$outFile = $ARGV[2];
}
print "Wait, it's blasting your query...\n";
`blastall -p blastp -d /cluster/db0/Data/nr -i $inFile -o $blastFile`; #blast command
$report = new Bio::SearchIO(
-file=>"$blastFile",
-format => "blast"
);
open(FH, ">$outFile") || die "Can not write to $outFile\n"; #open file for save
# Go through BLAST reports one by one
while($result = $report->next_result)
{
# Count hits
$i = 0;
# Go through each each matching sequence
while($hit = $result->next_hit)
{
$i++; #count the num of hits
if ($i <= 10) { # the max number of hits
$targetAcc = $hit->accession(); # retrieve the database accession
print "PARSE $i HIT: $targetAcc\n"; # print out for debugging
$fastaSeq = `fastacmd -d /cluster/db0/Data/nr -s $targetAcc`; # extract target sequences
print FH $fastaSeq; # save the target sequence in $outFile
}
}
}
print "Done\n";
close (FH) || die "Can not close $outFile\n";