#!/usr/bin/perl #!/usr/bin/perl #!/usr/bin/perl #!/usr/bin/perl # missing_shifts.pl # Reports the missing AtomTypes in a given BMRB file. # # usage: missing_shifts.pl [ options ] input_bmrb_file # # input_bmrb_file - the BMRB file to check. # options: # -noaromatics - will not check for ring atoms. # -expected - print expected values for shifts. # -std - print standard deviations for shifts. # -long - print each shift on a separate line. # -printstats - print percent missing data for all AtomTypes. # -stats_only - print only the statistics. # -nonreleavant - print info on all shifts and not just NMR relevant shifts. # -range min max - only use the given range of residues for the calculations. # -count - print a count of missing shifts. # -Conly - print carbon shifts only. # -Nonly - print nitrogen shifts only. # -Honly - print hydrogen shifts only. # -Cgreater value - print carbons with average carbons shifts greater than value. # -Ngreater value - print carbons with average nitrogen shifts greater than value. # -Hgreater value - print carbons with average hydrogren shifts greater than value. # -Cless value - print carbons with average carbons shifts less than value. # -Nless value - print carbons with average nitrogen shifts less than value. # -Hless value - print carbons with average hydrogren shifts less than value. # # Author: Gurmukh Sahota 02/15/2001 # Copyright: Gurmukh Sahota, 2001. All rights reserved. # # Mostly Rewritten by Hunter Moseley 02/19/2001 # Updated by Hunter Moseley 03/20/2001 # Copyright: Hunter Moseley, 2001. All rights reserved. # # Modified by Gurmukh Sahota 06/17/2001 # added -printstats option # # These modules allow me to use modules rather than hardcoding everything into one script use FindBin; use lib $FindBin::Bin; # Parser module parses in/out the cmap data and the peak file use BMRBParsing qw(:ALL); use strict; if ((@ARGV < 1) || ($ARGV[0] =~ /^-h/)) { print "\nError: not enough parameters given.\n\n" if ($ARGV[0] !~ /^-h/); print "missing_shifts.pl\n"; print " Reports the missing AtomTypes in a given BMRB file.\n"; print "\n"; print " usage: missing_shifts.pl [ options ] input_bmrb_file\n"; print "\n"; print " input_bmrb_file - the BMRB file to check.\n"; print " options:\n"; print " -noaromatics - will not check for ring atoms.\n"; print " -expected - print expected values for shifts.\n"; print " -std - print standard deviations for shifts.\n"; print " -long - print each shift on a separate line.\n"; print " -printstats - print percent missing data for all AtomTypes.\n"; print " -stats_only - print only the statistics.\n"; print " -nonreleavant - print info on all shifts and not just NMR relevant shifts.\n"; print " -range min max - only use the given range of residues for the calculations.\n"; print " -count - print a count of missing shifts.\n"; print " -Conly - print carbon shifts only.\n"; print " -Nonly - print nitrogen shifts only.\n"; print " -Honly - print hydrogen shifts only.\n"; print " -Cgreater value - print carbons with average carbons shifts greater than value.\n"; print " -Ngreater value - print carbons with average nitrogen shifts greater than value.\n"; print " -Hgreater value - print carbons with average hydrogren shifts greater than value.\n"; print " -Cless value - print carbons with average carbons shifts less than value.\n"; print " -Nless value - print carbons with average nitrogen shifts less than value.\n"; print " -Hless value - print carbons with average hydrogren shifts less than value.\n"; print " \n"; exit(1); } # Read in command line parameters my $noaromatics = 0; my $print_expected_value = 0; my $print_std = 0; my $print_long = 0; my $print_statistics = 0; my $print_stats_only = 0; my $restrict_atom_types = 0; my $C_present = 0; my $N_present = 0; my $H_present = 0; my $C_greater = 0; my $N_greater = 0; my $H_greater = 0; my $C_less = 0; my $N_less = 0; my $H_less = 0; my $count_shifts = 0; my $relevant = 1; my $range_min = 0; my $range_max = 0; while (@ARGV > 1) { my $switch = shift @ARGV; if ($switch =~ /^\-noa/) { $noaromatics = 1; } elsif( $switch =~ /\-e/) { $print_expected_value = 1; } elsif( $switch =~ /\-non/) { $relevant = 0; } elsif( $switch =~ /\-std/) { $print_std = 1; } elsif( $switch =~ /\-sta/) { $print_stats_only = 1; $print_statistics = 1; } elsif( $switch =~ /\-l/) { $print_long = 1; } elsif( $switch =~ /\-p/) { $print_statistics = 1; } elsif( $switch =~ /\-r/) { $range_min = shift @ARGV; $range_max = shift @ARGV; } elsif( $switch =~ /\-c/) { $count_shifts = 1; } elsif( $switch =~ /\-Co/) { $C_present = 1; $restrict_atom_types = 1; } elsif( $switch =~ /\-No/) { $N_present = 1; $restrict_atom_types = 1; } elsif( $switch =~ /\-Ho/) { $H_present = 1; $restrict_atom_types = 1; } elsif( $switch =~ /\-Cg/) { $C_present = 1; $C_greater = shift @ARGV; $restrict_atom_types = 1; } elsif( $switch =~ /\-Ng/) { $N_present = 1; $N_greater = shift @ARGV; $restrict_atom_types = 1; } elsif( $switch =~ /\-Hg/) { $H_present = 1; $H_greater = shift @ARGV; $restrict_atom_types = 1; } elsif( $switch =~ /\-Cl/) { $C_present = 1; $C_less = shift @ARGV; $restrict_atom_types = 1; } elsif( $switch =~ /\-Nl/) { $N_present = 1; $N_less = shift @ARGV; $restrict_atom_types = 1; } elsif( $switch =~ /\-Hl/) { $H_present = 1; $H_less = shift @ARGV; $restrict_atom_types = 1; } else { print STDERR "Error: Invalid parameter $switch given.\n"; exit(1); } } if (!$restrict_atom_types) { $C_present = 1; $N_present = 1; $H_present = 1; } my $input_BMRB_file = shift @ARGV; # prepare variables and read bmrb file my @aromatic_aa = qw(F Y W); my @non_aromatic_shifts = sort { $a cmp $b } qw(H HA HB2 HB3 C CA CB N); my $bmrb_hlist = &read_bmrb_file($input_BMRB_file, {convert_aa_names => 1}); my ($BMRBshifts_hlist, $BMRB_shift_set_hlist) = &Initialize_Atom_Shifts(); my $notfound = {}; my $statistics = {}; # cycle through each residue for(my $x=0; $x < @{$$bmrb_hlist{"name_array"}}; $x++) { my $residuename = $$bmrb_hlist{"name_array"}[$x]; my $aa = $$bmrb_hlist{"rlist"}{$residuename}{"aa"}; next if (($range_min || $range_max) && (($$bmrb_hlist{"rlist"}{$residuename}{"index"} < $range_min) || ($$bmrb_hlist{"rlist"}{$residuename}{"index"} > $range_max))); # skip residue if outside of range. my @find_atomtypes = sort { $a cmp $b } (keys %{$$BMRBshifts_hlist{$aa}}); $$notfound{$residuename} = []; foreach my $at ( @find_atomtypes ) { if (! $relevant || $x || (($at ne "H") && ($at ne "N"))) { $$statistics{"count"}{"expected"}{$at}++; foreach my $complete_cat (keys %{$$BMRB_shift_set_hlist{"completeness"}}) { if (exists $$BMRB_shift_set_hlist{"completeness"}{$complete_cat}{$aa}{$at}) { $$statistics{"categories"}{$complete_cat . " completeness"}{"expected"}++; } } # did this so that exists is short circuting the array referencing. CHANGE AT YOUR OWN RISK ... if (! ((exists $$bmrb_hlist{"rlist"}{$residuename}{"shifts"}{$at}{"list"}) && (@{$$bmrb_hlist{"rlist"}{$residuename}{"shifts"}{$at}{"list"}} > 0))) { push @{$$notfound{$residuename}}, $at; } else { $$statistics{"count"}{"found"}{$at}++; foreach my $complete_cat (keys %{$$BMRB_shift_set_hlist{"completeness"}}) { if (exists $$BMRB_shift_set_hlist{"completeness"}{$complete_cat}{$aa}{$at}) { $$statistics{"categories"}{$complete_cat . " completeness"}{"found"}++; } } foreach my $ambig_cat (keys %{$$BMRB_shift_set_hlist{"ambiguity"}}) { if (exists $$BMRB_shift_set_hlist{"ambiguity"}{$ambig_cat}{$aa}{$at}) { $$statistics{"categories"}{"unambiguous " . $ambig_cat . " completeness"}{"expected"}++; if (exists $$bmrb_hlist{"rlist"}{$residuename}{"shifts"}{$at}{"ambiguity_code"} && ($$bmrb_hlist{"rlist"}{$residuename}{"shifts"}{$at}{"ambiguity_code"} == 1)) { $$statistics{"categories"}{"unambiguous " . $ambig_cat . " completeness"}{"found"}++; } } } } } } } # print out missing shifts. if (! $print_stats_only) { my $count = 0; foreach my $residuename (@{$$bmrb_hlist{"name_array"}}) { print $residuename , ":\t"; my $aa = $$bmrb_hlist{"rlist"}{$residuename}{"aa"}; print "\n" if ($print_long); for my $at (@{$$notfound{$residuename}}) { $count++; print "\t" if ($print_long); print $at," "; print "\t" if ($print_long); print "Expected = "x($print_long),$$BMRBshifts_hlist{$aa}{$at}[0], "\t"x($print_long)," " if ($print_expected_value); print "STD = "x($print_long),$$BMRBshifts_hlist{$aa}{$at}[1], "\t"x($print_long)," " if ($print_std); print " "; print "\n" if ($print_long); } print "\n"; } if ($count_shifts) { print "\nThere were $count missing shifts.\n"; } } # GSS 06/17/2001 allow the user to print statistics about the data if ($print_statistics) { print "\n\n"; if ($range_min || $range_max) { print "AtomType Completeness Statistics (residues $range_min - $range_max):\n"; } else { print "AtomType Completeness Statistics:\n"; } print "\n\n"; foreach my $cat (sort keys %{$$statistics{"categories"}}) { $$statistics{"categories"}{$cat}{"found"} = 0 if !(exists $$statistics{"categories"}{$cat}{"found"}); printf "\t%40s :: %4d / %4d = %6.2f\%\n", $cat, $$statistics{"categories"}{$cat}{"found"}, $$statistics{"categories"}{$cat}{"expected"}, round($$statistics{"categories"}{$cat}{"found"} * 100 / $$statistics{"categories"}{$cat}{"expected"}, 3); } print "\n\n"; foreach my $at (sort keys %{$$statistics{"count"}{"expected"}}) { $$statistics{"count"}{"found"}{$at} = 0 if !(exists $$statistics{"count"}{"found"}{$at}); printf "\t%4s :: %4d / %4d = %6.2f\%\n", $at, $$statistics{"count"}{"found"}{$at}, $$statistics{"count"}{"expected"}{$at}, round($$statistics{"count"}{"found"}{$at} * 100 / $$statistics{"count"}{"expected"}{$at}, 3); } } ################################### round ############################## # # # Input : Variable, log(10^x) power (1 for 0.1 ...) # # Output : Rounded Variable to the 10^ power (only positive integers) # # Purpose : Rounding variables # ######################################################################## sub round { my $rounding_var = shift @_; my $power = shift @_; my $string = '%.' . $power . 'f'; # This sprintf statement was the only way I could keep the 30.0 from becoming 30. return sprintf($string, (((int(($rounding_var * (10 ** $power)) + 0.5))) / (10 ** $power))); } #---------------------------------------------------------------------------------------------------------------# # sub Initialize_Atom_Shifts() # # # # Input : None # # Output : hash of a hash of arrays of Residue->Atom_type->[shift, std_deviation] # # Purpose : To initialize brmb atom shift and deviation data # # # # How : returns a hash of hash of an array # #---------------------------------------------------------------------------------------------------------------# sub Initialize_Atom_Shifts { my $shift_type_hlist = { A => {H => [8.17, 0.61], HA => [6.16, 0.43], HB => [1.38, 0.28], C => [177.72, 2.19], CA => [53.05, 2.16], CB => [18.87, 1.73], N => [122.97, 3.88]}, R => {H => [8.27, 0.59], HA => [4.28, 0.43], HB2 => [1.76, 0.30], HB3 => [1.79, 0.26], HG2 => [1.58, 0.25], HG3 => [1.58, 0.24], HD2 => [3.13, 0.20], HD3 => [3.14, 0.19], HE => [7.33, 0.54], HH11 => [6.71, 0.21], HH12 => [6.72, 0.20], HH21 => [6.72, 0.20], HH22 => [6.72, 0.20], C => [176.33, 2.33], CA => [56.93, 2.48], CB => [30.68, 1.76], CG => [27.15, 1.12], CD => [43.10, 0.75], CZ => [159.20, 0.79], N => [120.37, 3.96], NE => [85.60, 5.52], NH1 => [72.30, 1.08], NH2 => [72.74, 0.12]}, N => {H => [8.36, 0.64], HA => [4.69, 0.39], HB2 => [2.77, 0.35], HB3 => [2.81, 0.33], HD21 => [7.19, 0.50], HD22 => [7.24, 0.56], C => [175.25, 1.67], CA => [53.54, 1.99], CB => [38.60, 1.68], CG => [175.78, 1.43], N => [118.80, 4.51], ND2 => [112.79, 2.54]}, D => {H => [8.35, 0.57], HA => [4.62, 0.31], HB2 => [2.74, 0.30], HB3 => [2.75, 0.28], C => [176.56, 1.65], CA => [54.41, 2.03], CB => [40.50, 1.62], CG => [177.84, 1.89], N => [120.31, 4.13]}, C => {H => [8.43, 0.66], HA => [4.72, 0.58], HB2 => [2.94, 0.43], HB3 => [3.01, 0.43], HG => [3.32, 2.54], C => [173.93, 1.98], CA => [57.13, 3.49], CB => [36.16, 7.97], N => [118.83, 3.98]}, Q => {H => [8.22, 0.61], HA => [4.28, 0.44], HB2 => [2.03, 0.26], HB3 => [2.06, 0.27], HG2 => [2.31, 0.30], HG3 => [2.32, 0.27], HE21 => [7.11, 0.48], HE22 => [7.12, 0.45], C => [176.39, 1.90], CA => [56.58, 2.24], CB => [29.16, 1.96], CG => [33.70, 0.98], CD => [179.14, 1.66], N => [119.71, 3.96], NE2 => [111.80, 2.26]}, E => {H => [8.31, 0.61], HA => [4.26, 0.42], HB2 => [2.04, 0.22], HB3 => [2.05, 0.21], HG2 => [2.32, 0.22], HG3 => [2.33, 0.21], C => [177.11, 1.98], CA => [57.42, 2.15], CB => [29.98, 1.69], CG => [35.94, 1.34], CD => [181.16, 1.52], N => [120.68, 3.55]}, G => {H => [8.35, 0.74], HA2 => [3.90, 0.43], HA3 => [3.98, 0.39], C => [173.87, 1.88], CA => [45.25, 1.48], N => [109.20, 4.12]}, H => {H => [8.26, 0.75], HA => [4.62, 0.52], HB2 => [3.10, 0.49], HB3 => [3.13, 0.51], HD1 => [11.56, 3.76], HD2 => [7.10, 0.53], HE1 => [8.11, 0.54], HE2 => [10.99, 3.37], C => [174.97, 2.17], CA => [55.99, 2.60], CB => [29.67, 2.22], CG => [130.59, 3.41], CD2 => [119.64, 2.27], CE1 => [136.09, 1.74], N => [118.70, 4.41], ND1 => [192.74, 17.24], NE2 => [180.95, 15.19]}, I => {H => [8.25, 0.66], HA => [4.20, 0.56], HB => [1.79, 0.34], HG12 => [1.25, 0.40], HG13 => [1.27, 0.42], HG2 => [0.79, 0.32], HD1 => [0.70, 0.33], C => [175.99, 1.94], CA => [61.81, 2.86], CB => [38.50, 2.01], CG1 => [27.79, 2.04], CG2 => [17.25, 1.46], CD1 => [13.41, 1.62], N => [121.75, 4.61]}, L => {H => [8.22, 0.61], HA => [4.31, 0.45], HB2 => [1.63, 0.32], HB3 => [1.61, 0.34], HG => [1.53, 0.31], HD1 => [0.76, 0.26], HD2 => [0.77, 0.28], C => [177.21, 2.02], CA => [55.63, 2.23], CB => [42.40, 1.94], CG => [26.76, 1.40], CD1 => [24.67, 1.70], CD2 => [24.27, 1.75], N => [121.71, 4.17]}, K => {H => [8.21, 0.65], HA => [4.26, 0.42], HB2 => [1.77, 0.26], HB3 => [1.79, 0.26], HG2 => [1.36, 0.27], HG3 => [1.37, 0.28], HD2 => [1.61, 0.24], HD3 => [1.61, 0.24], HE2 => [2.92, 0.20], HE3 => [2.92, 0.19], HZ => [7.52, 0.21], C => [176.65, 2.04], CA => [56.77, 2.19], CB => [32.78, 1.83], CG => [24.88, 1.02], CD => [28.92, 1.10], CE => [41.65, 0.92], N => [121.01, 3.94], NZ => [33.90, 0.28]}, M => {H => [8.29, 0.59], HA => [4.41, 0.42], HB2 => [2.01, 0.40], HB3 => [2.01, 0.41], HG2 => [2.47, 0.34], HG3 => [2.46, 0.31], HE => [1.84, 0.56], C => [176.71, 2.09], CA => [56.08, 2.26], CB => [32.86, 2.42], CG => [32.13, 1.16], CE => [17.43, 4.61], N => [120.22, 4.02]}, F => {H => [8.43, 0.73], HA => [4.63, 0.57], HB2 => [2.96, 0.35], HB3 => [3.01, 0.34], HD1 => [6.80, 1.02], HD2 => [6.80, 1.03], HE1 => [6.85, 1.02], HE2 => [6.85, 1.02], HZ => [6.84, 0.94], C => [175.59, 2.17], CA => [58.27, 2.69], CB => [39.78, 1.86], CG => [138.35, 1.12], CD1 => [131.35, 1.21], CD2 => [131.34, 1.26], CE1 => [130.50, 0.96], CE2 => [130.53, 0.92], CZ => [129.03, 1.55], N => [120.59, 4.21]}, P => {HA => [4.41, 0.36], HB2 => [2.01, 0.40], HB3 => [2.09, 0.42], HG2 => [1.91, 0.39], HG3 => [1.94, 0.37], HD2 => [3.61, 0.38], HD3 => [3.65, 0.42], C => [176.71, 1.71], CA => [63.21, 1.60], CB => [31.79, 1.10], CG => [27.07, 1.15], CD => [50.20, 0.92], N => [130.53, 8.81]}, S => {H => [8.30, 0.59], HA => [4.51, 0.43], HB2 => [3.84, 0.31], HB3 => [3.85, 0.32], HG => [5.19, 1.15], C => [174.62, 1.66], CA => [58.49, 2.20], CB => [63.77, 1.55], N => [116.31, 3.82]}, T => {H => [8.25, 0.65], HA => [4.49, 0.51], HB => [4.18, 0.38], HG1 => [4.17, 1.75], HG2 => [1.16, 0.30], C => [174.69, 1.63], CA => [61.96, 2.83], CB => [69.52, 2.01], CG2 => [21.41, 1.13], N => [115.10, 5.35]}, W => {H => [8.37, 0.83], HA => [4.72, 0.58], HB2 => [3.19, 0.35], HB3 => [3.23, 0.33], HD1 => [7.16, 0.36], HE1 => [10.11, 0.57], HE3 => [7.04, 1.28], HZ2 => [7.01, 1.10], HZ3 => [6.53, 1.24], HH2 => [6.70, 1.13], C => [176.19, 1.82], CA => [57.49, 2.82], CB => [30.05, 1.88], CG => [110.37, 1.55], CD1 => [125.51, 1.79], CD2 => [127.56, 0.96], CE2 => [137.76, 0.97], CE3 => [119.92, 2.00], CZ2 => [114.01, 1.39], CZ3 => [121.40, 1.55], CH2 => [123.18, 1.14], N => [120.25, 4.47], NE1 => [129.30, 2.13]}, Y => {H => [8.38, 0.75], HA => [4.65, 0.54], HB2 => [2.89, 0.35], HB3 => [2.96, 0.33], HD1 => [6.82, 0.80], HD2 => [6.82, 0.81], HE1 => [6.58, 0.72], HE2 => [6.59, 0.73], HH => [9.08, 1.08], C => [175.35, 1.87], CA => [57.68, 2.55], CB => [39.12, 2.10], CG => [129.29, 1.21], CD1 => [132.38, 0.97], CD2 => [132.08, 2.08], CE1 => [117.29, 0.99], CE2 => [117.32, 0.89], CZ => [156.37, 2.49], N => [120.44, 4.90]}, V => {H => [8.26, 0.70], HA => [4.15, 0.56], HB => [1.97, 0.34], HG1 => [0.82, 0.32], HG2 => [0.83, 0.34], C => [175.75, 1.98], CA => [62.26, 2.99], CB => [32.77, 1.83], CG1 => [21.11, 1.54], CG2 => [21.31, 1.58], N => [121.05, 5.42]} }; if ($noaromatics) { foreach my $aa (@aromatic_aa) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if (! (grep {$atom eq $_; } @non_aromatic_shifts)) { delete $$shift_type_hlist{$aa}{$atom}; } } } } my %non_relevant_hash = ("P" => [ "N" ], "C" => [ "HG" ], "D" => [ "CG" ], "E" => [ "CD" ], "K" => [ "HZ", "NZ" ], "H" => [ "CG" ], "F" => [ "CG" ], "Y" => [ "CG", "CZ", "HH" ], "R" => [ "NH1", "NH2", "HH11", "HH12", "HH21", "HH22" ], "S" => [ "HG" ], "T" => [ "HG1" ], "W" => [ "CG", "CD2","CE2" ] ); if ($relevant) { foreach my $aa (keys %non_relevant_hash) { foreach my $atom (@{$non_relevant_hash{$aa}}) { delete $$shift_type_hlist{$aa}{$atom}; } } } if (!$C_present) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^C/) { delete $$shift_type_hlist{$aa}{$atom}; } } } } if (!$N_present) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^N/) { delete $$shift_type_hlist{$aa}{$atom}; } } } } if (!$H_present) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^H/) { delete $$shift_type_hlist{$aa}{$atom}; } } } } if ($C_greater) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^C/ && $$shift_type_hlist{$aa}{$atom}[0] <= $C_greater) { delete $$shift_type_hlist{$aa}{$atom}; } } } } if ($N_greater) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^N/ && $$shift_type_hlist{$aa}{$atom}[0] <= $N_greater) { delete $$shift_type_hlist{$aa}{$atom}; } } } } if ($H_greater) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^H/ && $$shift_type_hlist{$aa}{$atom}[0] <= $H_greater) { delete $$shift_type_hlist{$aa}{$atom}; } } } } if ($C_less) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^C/ && $$shift_type_hlist{$aa}{$atom}[0] >= $C_less) { delete $$shift_type_hlist{$aa}{$atom}; } } } } if ($N_less) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^N/ && $$shift_type_hlist{$aa}{$atom}[0] >= $N_less) { delete $$shift_type_hlist{$aa}{$atom}; } } } } if ($H_less) { foreach my $aa (keys %$shift_type_hlist) { foreach my $atom (keys %{$$shift_type_hlist{$aa}}) { if ($atom =~ /^H/ && $$shift_type_hlist{$aa}{$atom}[0] >= $H_less) { delete $$shift_type_hlist{$aa}{$atom}; } } } } my $shift_special_hlist = { completeness => { backbone => { A => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, C => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, D => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, E => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, F => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, G => { H => 1, N => 1, C => 1, CA => 1, HA2 => 1, HA3 => 1 }, H => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, I => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, K => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, L => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, M => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, N => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, P => { N => 1, C => 1, CA => 1, HA => 1 }, Q => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, R => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, S => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, T => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, V => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, W => { H => 1, N => 1, C => 1, CA => 1, HA => 1 }, Y => { H => 1, N => 1, C => 1, CA => 1, HA => 1 } }, aromatic => { F => { HD1 => 1, HD2 => 1, HE1 => 1, HE2 => 1, HZ => 1, CG => 1, CD1 => 1, CD2 => 1, CE1 => 1, CE2 => 1, CZ => 1}, W => { HE3 => 1, HZ2 => 1, HZ3 => 1, HH2 => 1, CD2 => 1, CE2 => 1, CE3 => 1, CZ2 => 1, CZ3 => 1, CH2 => 1}, Y => { HD1 => 1, HD2 => 1, HE1 => 1, HE2 => 1, HH => 1, CG => 1, CD1 => 1, CD2 => 1, CE1 => 1, CE2 => 1, CZ => 1} }, sidechain => { A => { HB => [1.38, 0.28], CB => [18.87, 1.73]}, R => { HB2 => [1.76, 0.30], HB3 => [1.79, 0.26], HG2 => [1.58, 0.25], HG3 => [1.58, 0.24], HD2 => [3.13, 0.20], HD3 => [3.14, 0.19], HE => [7.33, 0.54], HH11 => [6.71, 0.21], HH12 => [6.72, 0.20], HH21 => [6.72, 0.20], HH22 => [6.72, 0.20], CB => [30.68, 1.76], CG => [27.15, 1.12], CD => [43.10, 0.75], CZ => [159.20, 0.79], NE => [85.60, 5.52], NH1 => [72.30, 1.08], NH2 => [72.74, 0.12]}, N => { HB2 => [2.77, 0.35], HB3 => [2.81, 0.33], HD21 => [7.19, 0.50], HD22 => [7.24, 0.56], CB => [38.60, 1.68], CG => [175.78, 1.43], ND2 => [112.79, 2.54]}, D => { HB2 => [2.74, 0.30], HB3 => [2.75, 0.28], CB => [40.50, 1.62], CG => [177.84, 1.89]}, C => { HB2 => [2.94, 0.43], HB3 => [3.01, 0.43], CB => [36.16, 7.97]}, Q => { HB2 => [2.03, 0.26], HB3 => [2.06, 0.27], HG2 => [2.31, 0.30], HG3 => [2.32, 0.27], HE21 => [7.11, 0.48], HE22 => [7.12, 0.45], CB => [29.16, 1.96], CG => [33.70, 0.98], CD => [179.14, 1.66], NE2 => [111.80, 2.26]}, E => { HB2 => [2.04, 0.22], HB3 => [2.05, 0.21], HG2 => [2.32, 0.22], HG3 => [2.33, 0.21], CB => [29.98, 1.69], CG => [35.94, 1.34], CD => [181.16, 1.52]}, H => { HB2 => [3.10, 0.49], HB3 => [3.13, 0.51], HD1 => [11.56, 3.76], HD2 => [7.10, 0.53], HE1 => [8.11, 0.54], HE2 => [10.99, 3.37], CB => [29.67, 2.22], CG => [130.59, 3.41], CD2 => [119.64, 2.27], CE1 => [136.09, 1.74], ND1 => [192.74, 17.24], NE2 => [180.95, 15.19]}, I => { HB => [1.79, 0.34], HG12 => [1.25, 0.40], HG13 => [1.27, 0.42], HG2 => [0.79, 0.32], HD1 => [0.70, 0.33], CB => [38.50, 2.01], CG1 => [27.79, 2.04], CG2 => [17.25, 1.46], CD1 => [13.41, 1.62]}, L => { HB2 => [1.63, 0.32], HB3 => [1.61, 0.34], HG => [1.53, 0.31], HD1 => [0.76, 0.26], HD2 => [0.77, 0.28], CB => [42.40, 1.94], CG => [26.76, 1.40], CD1 => [24.67, 1.70], CD2 => [24.27, 1.75]}, K => { HB2 => [1.77, 0.26], HB3 => [1.79, 0.26], HG2 => [1.36, 0.27], HG3 => [1.37, 0.28], HD2 => [1.61, 0.24], HD3 => [1.61, 0.24], HE2 => [2.92, 0.20], HE3 => [2.92, 0.19], HZ => [7.52, 0.21], CB => [32.78, 1.83], CG => [24.88, 1.02], CD => [28.92, 1.10], CE => [41.65, 0.92], NZ => [33.90, 0.28]}, M => { HB2 => [2.01, 0.40], HB3 => [2.01, 0.41], HG2 => [2.47, 0.34], HG3 => [2.46, 0.31], HE => [1.84, 0.56], CB => [32.86, 2.42], CG => [32.13, 1.16], CE => [17.43, 4.61]}, F => { HB2 => [2.96, 0.35], HB3 => [3.01, 0.34], HD1 => [6.80, 1.02], HD2 => [6.80, 1.03], HE1 => [6.85, 1.02], HE2 => [6.85, 1.02], HZ => [6.84, 0.94], CB => [39.78, 1.86], CG => [138.35, 1.12], CD1 => [131.35, 1.21], CD2 => [131.34, 1.26], CE1 => [130.50, 0.96], CE2 => [130.53, 0.92], CZ => [129.03, 1.55]}, P => { HB2 => [2.01, 0.40], HB3 => [2.09, 0.42], HG2 => [1.91, 0.39], HG3 => [1.94, 0.37], HD2 => [3.61, 0.38], HD3 => [3.65, 0.42], CB => [31.79, 1.10], CG => [27.07, 1.15], CD => [50.20, 0.92]}, S => { HB2 => [3.84, 0.31], HB3 => [3.85, 0.32], HG => [5.19, 1.15], CB => [63.77, 1.55]}, T => { HB => [4.18, 0.38], HG1 => [4.17, 1.75], HG2 => [1.16, 0.30], CB => [69.52, 2.01], CG2 => [21.41, 1.13]}, W => { HB2 => [3.19, 0.35], HB3 => [3.23, 0.33], HD1 => [7.16, 0.36], HE1 => [10.11, 0.57], HE3 => [7.04, 1.28], HZ2 => [7.01, 1.10], HZ3 => [6.53, 1.24], HH2 => [6.70, 1.13], CB => [30.05, 1.88], CG => [110.37, 1.55], CD1 => [125.51, 1.79], CD2 => [127.56, 0.96], CE2 => [137.76, 0.97], CE3 => [119.92, 2.00], CZ2 => [114.01, 1.39], CZ3 => [121.40, 1.55], CH2 => [123.18, 1.14], NE1 => [129.30, 2.13]}, Y => { HB2 => [2.89, 0.35], HB3 => [2.96, 0.33], HD1 => [6.82, 0.80], HD2 => [6.82, 0.81], HE1 => [6.58, 0.72], HE2 => [6.59, 0.73], HH => [9.08, 1.08], CB => [39.12, 2.10], CG => [129.29, 1.21], CD1 => [132.38, 0.97], CD2 => [132.08, 2.08], CE1 => [117.29, 0.99], CE2 => [117.32, 0.89], CZ => [156.37, 2.49]}, V => { HB => [1.97, 0.34], HG1 => [0.82, 0.32], HG2 => [0.83, 0.34], CB => [32.77, 1.83], CG1 => [21.11, 1.54], CG2 => [21.31, 1.58], } } }, ambiguity => { CH2 => { R => { HB2 => 1, HB3 => 1, HG2 => 1, HG3 => 1, HD2 => 1, HD3 => 1}, N => { HB2 => 1, HB3 => 1}, D => { HB2 => 1, HB3 => 1}, C => { HB2 => 1, HB3 => 1}, Q => { HB2 => 1, HB3 => 1, HG2 => 1, HG3 => 1}, E => { HB2 => 1, HB3 => 1, HG2 => 1, HG3 => 1}, G => { HA2 => 1, HA3 => 1}, H => { HB2 => 1, HB3 => 1}, I => { HG12 => 1, HG13 => 1}, L => { HB2 => 1, HB3 => 1}, K => { HB2 => 1, HB3 => 1, HG2 => 1, HG3 => 1, HD2 => 1, HD3 => 1, HE2 => 1, HE3 => 1}, M => { HB2 => 1, HB3 => 1, HG2 => 1, HG3 => 1}, F => { HB2 => 1, HB3 => 1}, P => { HB2 => 1, HB3 => 1, HG2 => 1, HG3 => 1, HD2 => 1, HD3 => 1}, S => { HB2 => 1, HB3 => 1}, W => { HB2 => 1, HB3 => 1}, Y => { HB2 => 1, HB3 => 1} }, CH3 => { L => { HD1 => 1, HD2 => 1, CD1 => 1, CD2 => 1}, V => { HG1 => 1, HG2 => 1, CG1 => 1, CG2 => 1} }, "sidechain NH2" => { R => { HH11 => 1, HH12 => 1, HH21 => 1, HH22 => 1, NH1 => 1, NH2 => 1}, N => { HD21 => 1, HD22 => 1}, Q => { HE21 => 1, HE22 => 1} } } }; return ($shift_type_hlist,$shift_special_hlist); }