Bioinformatics for Biologists lecture 4 Unix & sequence analysis demo # Login and enter password when prompted ssh -l user_name hebrides.wi.mit.edu or ssh -l user_name tak.wi.mit.edu # Create directory ests, and directories db, blastn, blastx in ‘ests’ mkdir ests cd ests mkdir db blastn blastx # Go to /home/george/bioinfo_lec4/ests (where sequences are) cd /home/george/bioinfo_lec4/ests ls ls.tfa | wc -l # get number of seqs # Copy all files into directory home/george/bioinfo_lec4/bioinfo # since /home/george/bioinfo_lec4/ests doesn’t have write permission cp *.tfa ../bioinfo/. # Check results cd ../bioinfo ls ls.tfa | wc -l # Tar (combine) all tfa files into tarfile called all_ests.tar tar cvf all_ests.tar *.tfa # Check results ls -l *.tar # Compress (zip) tar file gzip all_ests.tar ls -l *.gz # Remove all tfa files you just created # Remove prompting (since 'rm' is aliased to 'rm -i') alias rm rm rm *.tfa alias rm rm -i # Re-alias interactive prompting # Move zipped file to your ests directory mv *.gz ~/ests/. # Go to the zipped file, and unzip and untar it: cd ~/ests gunzip * tar xvf * # Check results ls ls *.tfa | wc -l # Remove tar file since you're finished with it rm *.tar # Make multi seq file in db directory mkdir db cat *.tfa > db/mouse_heart # Check results cd db more mouse_heart # type q to exit from 'more' # Blast one seq against the nr database using blastx, # only looking at hits where E < 0.05, and only keep top 10 hits cd .. # blastall # -p blastx # program = blastx # -d nr # db = nr (non-redundant nucleotide database) # -e 0.05 # Expect cut-off value threshold # -v 10 # number of one-line descriptions to show # -b 10 # number of alignments to show # -i 16493200.tfa # input file # -o blastx/16493200 # output file mkdir blastx blastall -p blastx -d nr -e 0.05 -v 10 -b 10 -i 16493200.tfa -o blastx/16493200 # Make blast alias (if you want to use these specific options often in the future) alias myblastx blastall -p blastx -d nr -e 0.05 -v 10 -b 10 # Try it myblastx -i 16493201.tfa -o blastx/16493201 # If you want to remove an alias, type unalias alias_name (ex: unalias myblastx) # Turn multiple sequence file into blastable DB # with sequences that can be extracted with fastacmd # -p F (since it's not protein - it's DNA) # -o T (to be able to use fastacmd to extract sequences) cd db formatdb -i mouse_heart -p F -o T # Check out what files were made ls -l # Try it cd .. blastall -p blastn -d db/mouse_heart -e 0.05 -v 10 -b 10 -i 16493200.tfa -o blastn/16493200 # Try fastacmd to extract a sequence from the db using the GI fastacmd -d db/mouse_heart -s 16493200 # or the accession fastacmd -d db/mouse_heart -s BB659379