#!/usr/local/bin/perl -w # Extract sequence features from a GenBank report with BioPerl. # Once the features are parsed, they can be printed in any format. # WI Biocomputing course - Unix and Programming Skills for Biologists - March 2003 use Bio::SeqIO; # Formats: Fasta, EMBL. GenBank, Swissprot (swiss), PIR and GCG $seqin = Bio::SeqIO->new( '-format' => 'Genbank' , -file => 'genbank_sample.txt'); while(my $seqobj = $seqin->next_seq()) { print "Sequence: ", $seqobj->display_id, "\n\n"; foreach $feat ($seqobj->all_SeqFeatures()) { print $feat->primary_tag, " (from ", $feat->start, " to ", $feat->end, ")", "\n"; foreach $tag ($feat->all_tags()) { print "\t", $tag, ": ", join(' ',$feat->each_tag_value($tag)), "\n"; } print "new feature\n" if $feat->has_tag('new'); # features can have sub features # @subfeat = $feat->sub_SeqFeature(); } }