align.pl

#!/usr/local/bin/perl -w

# Unix, Perl, and Python - class 2
# Align a list of human-mouse homology pairs

# Input data file - pairs of human and mouse RefSeq accessions
$hs_mm_pairs = "human_mouse_pairs.txt";

# Files used in data processing
$humanSeq = "human.fa";
$mouseSeq = "mouse.fa";
$seqPairFile = "seqPair.fa";
$bigOutputFile = "alignment-all.txt";

# Remove $bigOutputFile from any previous script outputs
if (-e "$bigOutputFile")				# if the file exists
	 { unlink($bigOutputFile); }			# delete it [Perl's unlink = Unix's rm]

# Open the homology data file
open (HOMOLOGY, "$hs_mm_pairs") || die "Major problem: cannot open $hs_mm_pairs for reading: $!";

while (<HOMOLOGY>)	# Read one line at a time
{
	chomp($_);					# Delete newline at end of each line
	@data = split(/\t/, $_);	# Split the line into tab-delimited fields

	# Get human ($humanAcc) and mouse ($mouseAcc) accessions from @data
	# 0
	


		
	print STDERR "$humanAcc vs. $mouseAcc\n";		# for debugging
	
	# Get a human sequence using the "fastacmd" syntax and redirect output to $humanSeq				
	# 1
	

 
	# Change format of sequence headers to make alignments clearer 
	`perl -i  -pe 's/>gi.+ref\\|/>/' $humanSeq`;


	# Get a mouse sequence using the "fastacmd" syntax and redirect output to $mouseSeq	
	# 2
	
	
	
	# Change format of sequence headers to make alignments clearer 
	`perl -i  -pe 's/>gi.+ref\\|/>/' $mouseSeq`;
	
	# Concatenate both sequences ($humanSeq and $mouseSeq) to $seqPairFile (if doing clustal alignment)
	# 3
	
	
	
	# Do alignment for each sequence, choosing an informative filename for output
	# 4
	
	

	# Append sequence headers and alignment to $bigOutputFile
	# 5
	


}

# Close file handles and delete temporary files
close (HOMOLOGY);
unlink ($humanSeq, $mouseSeq, $seqPairFile);

print "\nSee $bigOutputFile for output\n\n";
syntax highlighted by Code2HTML, v. 0.9.1