#!/usr/bin/perl # This scripts takes a bmrb file as input, and genereates a table of chemical sifts in TALOS format. # It reads in the sequence from the bmrb file, and then wrtie out the atoms (in this version,HA, HA2(for G), # HA3(for G) C, CA, CB and N that would have chemical shifts for use in TALOS. # Usage: tab_4_talos.pl # Based on a scripy by Aneerban # These modules allow me to use modules rather than hardcoding everything into one script use FindBin; use lib $FindBin::Bin; use strict; # to use Hunter's BMRB parsing utility use BMRBParsing qw(:ALL); sub message() { print "\n $0 requires two arguments, a BMRB file and a name for output "; print "\n Example: $0 bmrb_file TabForTalos \n"; } if ( $#ARGV < 0 ) { &message ; die"\n"; } my $bmrbfile_s = $ARGV[0]; # chemical shifts in BMRB format my $tabfile_s = $ARGV[1]; # output table my @sequence_a; # stores sequence using 1-letter code my %AAcode_h = ('ALA' => 'A', 'CYS' => 'C', 'ASP' => 'D', 'GLU' => 'E', 'PHE' => 'F', 'GLY' => 'G', 'HIS' => 'H', 'ILE' => 'I', 'LYS' => 'K', 'LEU' => 'L', 'MET' => 'M', 'ASN' => 'N', 'PRO' => 'P', 'GLN' => 'Q', 'ARG' => 'R', 'SER' => 'S', 'THR'=> 'T', 'VAL' => 'V', 'TRP' => 'W', 'TYR' => 'Y', 'ASX' => 'B', 'GLX' => 'Z'); # read through the BMRB file extracting sequence -- may need modifications my $line_s; open(IF, "<$bmrbfile_s") || die("Can't open $bmrbfile_s : $!"); while ($line_s = ) { # Read in sequence if ($line_s =~ /_Mol_residue_sequence/) { $line_s = ; while ($line_s = ) { print $line_s ; if ($line_s =~ /;/) { last; } chomp($line_s); push(@sequence_a, $line_s); } } # Finsihed getting sequence } close(IF); # read the file using BMRBParsing #my $bmrb_list_hr = &read_bmrb_file($bmrbfile_s); my $bmrb_list_hr = readBMRBasCMAP($bmrbfile_s); open(OF, ">$tabfile_s") || die("Can't open $tabfile_s : $!"); my $index_n; #print the sequence to the tble file for ($index_n = 0; $index_n <= $#sequence_a; $index_n++) { if (($index_n % 40) == 0) { # for the 1st, 41st etc residue print(OF "\nDATA SEQUENCE"); } if (($index_n % 10) == 0) { # for 1st, 11th etc residue print(OF " "); } print(OF "$sequence_a[$index_n]"); } # print the VARS and the FORMAT lines print(OF "\n\n"); print(OF "VARS RESID RESNAME ATOMNAME SHIFT\n"); print(OF "FORMAT %4d %1s %4s %8.3f\n"); my $residue_name_s; # Print the atom chemical shifts # These have been reduced to the ones that have meaning for TALOS, i.e. # HA, CA, CB, CO, N. All others are discarded # foreach $residue_name_s (@{$$bmrb_list_hr{"name_array"}}) { #N if (exists $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"N15"}) { print(OF sprintf("%4d %1s %4s %8.3f\n", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"index"}, $$bmrb_list_hr{"rlist"}{$residue_name_s}{"aa"}, "N", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"N15"}{"list"}[0])); } # CA if (exists $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"CA"}) { print(OF sprintf("%4d %1s %4s %8.3f\n", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"index"}, $$bmrb_list_hr{"rlist"}{$residue_name_s}{"aa"}, "CA", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"CA"}{"list"}[0])); } # HA if (exists $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"HA"}) { # both HA2 and HA3 required for G if ($$bmrb_list_hr{"rlist"}{$residue_name_s}{"aa"} eq "G") { print(OF sprintf("%4d %1s %4s %8.3f\n", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"index"}, $$bmrb_list_hr{"rlist"}{$residue_name_s}{"aa"}, "HA2", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"HA"}{"list"}[0])); print(OF sprintf("%4d %1s %4s %8.3f\n", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"index"}, $$bmrb_list_hr{"rlist"}{$residue_name_s}{"aa"}, "HA3", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"HA"}{"list"}[1])); } else { print(OF sprintf("%4d %1s %4s %8.3f\n", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"index"}, $$bmrb_list_hr{"rlist"}{$residue_name_s}{"aa"}, "HA", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"HA"}{"list"}[0])); } } # CB if (exists $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"CB"}) { print(OF sprintf("%4d %1s %4s %8.3f\n", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"index"}, $$bmrb_list_hr{"rlist"}{$residue_name_s}{"aa"}, "CB", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"CB"}{"list"}[0])); } # C if (exists $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"CO"}) { print(OF sprintf("%4d %1s %4s %8.3f\n", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"index"}, $$bmrb_list_hr{"rlist"}{$residue_name_s}{"aa"}, "C", $$bmrb_list_hr{"rlist"}{$residue_name_s}{"shifts"}{"CO"}{"list"}[0])); } } close(OF); exit(); # STUFF BELOW NOT FOR USE # NOTE: values for Residue_ID are being store as strings within this program - thus the slight difference in the sprintf format below, and what is shown as the table format in the output file for ($index_n = 0; $index_n <= $#sequence_a; $index_n++) { print(OF sprintf("%5s%6s%6s%5s%6s%6s%9.3f%9.3f%6.2f\n", $sequence_a[$index_n][1], $sequence_a[$index_n][0], "HN", $sequence_a[$index_n][1], $sequence_a[$index_n][0], 'N', 0.0, 1.000, 1.00)); } close(OF);