SAMtoBED.pl

#!/usr/local/bin/perl -w

# Unix, Perl, and Python - class 2
# Parse a SAM file into a BED file that can be visualized in a genome browser

# Get the SAM file as the first argument
#   ex: ./SAMtoBEDcounts.pl myFile.SAM

$sam = $ARGV[0];

if (! $ARGV[0])	# No argument is given
{
	# Print an error message if the user forgets the name of the sam file
	# 0
	
	
	exit;
}

# Open the SAM file
open (SAM, "$sam") || die "Major problem: cannot open $sam for reading: $!";

while (<SAM>)		# Read one line at a time
{
	# 1
	if (! /^@/)		
	{
		chomp($_);					# Delete newline at end of each line
		
		# Use the tabs "\t" to split the line into fields, and place these fields into an array called @data
		# Example command:  @arrayOfFields = split /\t/, $lineOfTabDelimitedFile;
		# 2
	
		
		

		# Get the position of the start of the read sequence from field 4 ($data[3]),
		# but SAM files start counting at 1, while BED files start counting as 0, so we need to subtract 1.
		# 3
		
		

		
		# Get the length of the read sequence in field 10 ($data[9])
		# Example command:  $myLength = length "TGCGTGCCCCGGT"; 
		# 4
		
		


		# Given the "start" (field 4) and the red length ($length), calculate the end ($end)
		# We also need to subtract 1 to get the BED coordinate to match to SAM coordinate
 		# 5
 		
 		
		
		
		# Convert this row into a BED-style file with the fields chr TAB start TAB end 
 		# 6
	
		
		
		
		# Add a name, score, and strand of the read to the BED file
		# The strand is encoded in field 2 ($data[1]): If field 2 is 0, it's +; if field 2 is 16, it's -.
		# 7 [Optional]
		
		
		
		
		
		
		
	}
}

# Close file handles
close (SAM);

##########
syntax highlighted by Code2HTML, v. 0.9.1