align_SOLUTION.pl
$hs_mm_pairs = "human_mouse_pairs.txt";
$humanSeq = "mouse.fa";
$mouseSeq = "human.fa";
$seqPairFile = "seqPair.fa";
$bigOutputFile = "alignment-all.txt";
if (-e "$bigOutputFile")
{ unlink($bigOutputFile); }
open (HOMOLOGY, "$hs_mm_pairs") || die "Major problem: cannot open $hs_mm_pairs for reading: $!";
while (<HOMOLOGY>)
{
chomp($_);
@data = split(/\t/, $_);
$humanAcc = $data[0];
$mouseAcc = $data[1];
print "$humanAcc vs. $mouseAcc\n";
`fastacmd -d nr -s $humanAcc > $humanSeq`;
`sed 's/>gi|.*ref|/>/' $humanSeq > hs.tmp; mv hs.tmp $humanSeq`;
`fastacmd -d nr -s $mouseAcc > $mouseSeq`;
`sed 's/>gi|.*ref|/>/' $mouseSeq > mm.tmp; mv mm.tmp $mouseSeq`;
`cat $humanSeq $mouseSeq > $seqPairFile`;
doClustal();
doGlobal();
doLocal();
}
close (HOMOLOGY);
unlink ($humanSeq, $mouseSeq, $seqPairFile);
print "\nSee $bigOutputFile for output\n\n";
sub doClustal
{
$clustalOut = "clustal.aln";
`grep ">" $seqPairFile >> $bigOutputFile`;
`clustalw -INFILE=$seqPairFile -TYPE=DNA -OUTFILE=$clustalOut`;
`cat $clustalOut >> $bigOutputFile`;
unlink("$clustalOut", "seqPair.dnd");
}
sub doGlobal
{
$globalOut = "global.aln";
`grep ">" $seqPairFile >> $bigOutputFile`;
`needle $humanSeq $mouseSeq -outfile $globalOut -auto`;
`cat $globalOut >> $bigOutputFile`;
unlink("$globalOut");
}
sub doLocal
{
$localOut = "local.aln";
`grep ">" $seqPairFile >> $bigOutputFile`;
`water $humanSeq $mouseSeq -outfile $localOut -auto`;
`cat $localOut >> $bigOutputFile`;
unlink("$localOut");
}