#!/usr/local/bin/perl -w
# Unix, Perl, and Python - class 2
# Align a list of human-mouse homology pairs
# Input data file - pairs of human and mouse RefSeq accessions
$hs_mm_pairs = "human_mouse_pairs.txt";
# Files used in data processing
$humanSeq = "human.fa";
$mouseSeq = "mouse.fa";
$seqPairFile = "seqPair.fa";
$bigOutputFile = "alignment-all.txt";
# Remove $bigOutputFile from any previous script outputs
if (-e "$bigOutputFile") # if the file exists
{ unlink($bigOutputFile); } # delete it [Perl's unlink = Unix's rm]
# Open the homology data file
open (HOMOLOGY, "$hs_mm_pairs") || die "Major problem: cannot open $hs_mm_pairs for reading: $!";
while (<HOMOLOGY>) # Read one line at a time
{
chomp($_); # Delete newline at end of each line
@data = split(/\t/, $_); # Split the line into tab-delimited fields
# Get human ($humanAcc) and mouse ($mouseAcc) accessions from @data
# 0
print STDERR "$humanAcc vs. $mouseAcc\n"; # for debugging
# Get a human sequence using the "fastacmd" syntax and redirect output to $humanSeq
# 1
# Change format of sequence headers to make alignments clearer
`perl -i -pe 's/>gi.+ref\\|/>/' $humanSeq`;
# Get a mouse sequence using the "fastacmd" syntax and redirect output to $mouseSeq
# 2
# Change format of sequence headers to make alignments clearer
`perl -i -pe 's/>gi.+ref\\|/>/' $mouseSeq`;
# Concatenate both sequences ($humanSeq and $mouseSeq) to $seqPairFile (if doing clustal alignment)
# 3
# Do alignment for each sequence, choosing an informative filename for output
# 4
# Append sequence headers and alignment to $bigOutputFile
# 5
}
# Close file handles and delete temporary files
close (HOMOLOGY);
unlink ($humanSeq, $mouseSeq, $seqPairFile);
print "\nSee $bigOutputFile for output\n\n";
syntax highlighted by Code2HTML, v. 0.9.1