draw_figure.pl

#!/usr/local/bin/perl -w

# Draw a PNG figure (using the GD module) of a gene showing BLAST data parsed with blast_parse.pl
# WI Biocomputing course - Bioinformatics for Biologists - October 2003

# Need above line to make GIF images
# See http://stein.cshl.org/WWW/software/GD/ for documentation

use GD;
use integer;   # Do integer division - helpful since pixels are the quanta of graphics

#########################  User defined variables  ############################

$imagefile = "blast_figure.png";
$seq_length = "505";
$data_file = "blast_parse_1_out.txt";

# $div shows the number of nt or aa per pixel - requires a larger integer for longer sequences
$div = 1;
$image_width = 1400;
$image_height = 350;
$seq_width = 10;
$margin_left = 20;
$margin_top = 50;

# Location of data in fields of input file
$subjectStartField = 8;
$subjectEndField = 9;
$hitNameField = 5;

###############################  Set up the picture and colors  ################

# Create a new image
$img = new GD::Image($image_width, $image_height);

# Describe colors you want to use: each integer is 0-255 for Red, Green, and Blue
$white = $img->colorAllocate(255,255,255);
$black = $img->colorAllocate(0,0,0);
$green = $img->colorAllocate(0, 255, 0);
$blue = $img->colorAllocate(0, 0, 255);
$red = $img->colorAllocate(255, 0, 0);

##############################  Get the BLAST data  ###########################

open(DATA, "$data_file") || die "Cannot open $data_file for reading: $!\n";
# Get all data as array of lines
@data = <DATA>;

# Extract gene name from data file
$data[2] =~ s/DATA FOR QUERY//;
chomp ($seqName = $data[2]);
$title = "Top 10 BLAST hits for $seqName";

##############################  Draw the image  ################################

# Write a title across the top - ee documentation for choices of font sizes
$img->string(gdMediumBoldFont, $margin_left, 5, $title, $black);

# Draw a bar for sequence of width $seq_width and color it red
$x1 = $margin_left;
$x2 = $x1 + ($seq_length / $div);
$y1 = $margin_top;
$y2 = $y1 + $seq_width;
$img->filledRectangle($x1, $y1, $x2, $y2, $red);

# Draw nt along length of sequence, with vertical lines
for ($i = 1; $i < $seq_length; $i+= 100)
{
   $x1 = $margin_left + (($i - 1) / $div);
   $img->string(gdMediumBoldFont, $x1, 20, $i, $red);
   $img->line($x1, 35, $x1, 60, $blue);
}

# Print data for each BLAST hits (described in lines 5 - 15 of the file) 
for ($i = 5; $i < $#data; $i++)
{
   # Get tab delimited fields
   @fields = split (/\t/, $data[$i]);
   $start = $fields[$subjectStartField];
   $end = $fields[$subjectEndField];
   $hit = $fields[$hitNameField];
   # print "start = $start end = $end hit = $hit\n";

   $x1 = $margin_left + ($start / $div);
   $x2 = $margin_left + ($end / $div);
   $y1 = $margin_top + $seq_width + ($i - 4) * 20;
   $y2 = $y1 + 5;

   # Draw a box the length of the matching sequence
   $img->filledRectangle($x1, $y1, $x2, $y2, $green);
   # Under the matching sequence box, write the name of the sequence
   $img->string(gdTinyFont, $x1, $y2 + 1, "$hit", $black);
}

open(OUT, ">$imagefile") || die "Cannot write to $imagefile: $!\n";

# Print this image to a file as a PNG graphic
print OUT $img->png;
close OUT;

print "All done - figure is $imagefile\n";