#       Copyright Hunter Moseley, 2000. All rights reserved.
#       Written by Hunter Moseley 8/1/2000
#       "Mostly" ReWritten by Gurmukh Sahota 02/01/2001
#	Modified by Hunter Moseley 6/12/2001
#       Modified by Gurmukh Sahota 06/18/2001
#                fixed the cmap_conversion_hash (some wrong names)
#       Modified by Gurmukh Sahota 07/01/2001
#                added hack for name_array where inconsistent with current residue shifts (write_bmrb_file)
#                added hack for non-existant _Atom_type (using first letter of shift name) (write_bmrb_file)
#                modified the multiplicity hash for increased atomType inclusions and some typo's.
#                modified deMultiplicate so that it now accepts -1 residues and demultiplicates accordingly.
#
#  BMRBParsing.pm
#	Contains subroutines for reading and writing BMRB files.
#
#	Subroutines:
#		read_bmrb_file - reads a bmrb file and returns a hash of records (Residue is a hash).
#		write_bmrb_file - prints a bmrb file out to $filename
#		convert_bmrb_shift_names - converts the bmrb shift names using the given shift_conversion_hash.
#		adjust_by_reference - Adjusts the carbon chemical shifts to DSS standard based on their current reference.
#		convert_residue_names - Converts residue names in the bmrb file structure from 3-letter to 1-letter and visa versa
#
package BMRBParsing;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw();
@EXPORT_OK = qw(clone read_bmrb_file write_bmrb_file adjust_by_reference convert_bmrb_shift_names convert_residue_names readBMRBasCMAP deMultiplicate convertBMRBtoCMap);
%EXPORT_TAGS = ( ALL => [@EXPORT_OK] );

use strict;
use Dumpvalue qw(:ALL);

my $dumper = new Dumpvalue;



# useful hashes
# LEU HG => 1, MET CD => 0, LEU CE => 0
my %multiplicity = ("A" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 1, "CA" => 1, "CB" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1},
		    "C" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "CA" => 1, "CB" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "SG" => 1, "HG" => 1},
		    "D" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HD" => 1, "HD2" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "OD" => 2, "OD1" => 1, "OD2" => 1},
		    "E" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HG" => 2, "HG2" => 1, "HG3" => 1, "HE" => 1, "HE2" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "OE" => 2, "OE1" => 1, "OE2" => 1},
		    "F" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HD" => 2, "HD1" => 1, "HD2" => 1, "HE" => 2, "HE1" => 1, "HE2" => 1, "HZ" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 2, "CE" => 2, "CZ" => 1, "CD1" => 1, "CD2" => 1, "CE1" => 1, "CE2" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1},
		    "G" => { "H" => 1, "HN" => 1, "HA2" => 1, "HA3" => 1, "CA" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1},
		    "H" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HD" => 2, "HD1" => 1, "HD2" => 1, "HE" => 2, "HE1" => 1, "HE2" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 1, "CE" => 1, "CD2" => 1, "CE1" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "ND" => 1, "ND1" => 1, "NE" => 1, "NE2" => 1},
		    "I" => {"H" => 1, "HN" => 1, "HA" => 1, "HB" => 1, "HG" => 3, "HG12" => 1, "HG13" => 1, "HG2" => 1, "HD" => 1, "HD1" => 1, "CA" => 1, "CB" => 1, "CG" => 2, "CD" => 1, "CG1" => 1, "CG2" => 1, "CD1" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1},
		    "K" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HG" => 2, "HG2" => 1, "HG3" => 1, "HD" => 2, "HD2" => 1, "HD3" => 1, "HE" => 2, "HE2" => 1, "HE3" => 1, "HZ" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 1, "CE" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "NZ" => 1},
		    "L" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HG" => 1, "HD" => 2, "HD1" => 1, "HD2" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 2, "CD1" => 1, "CD2" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1},
		    "M" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HG" => 2, "HG2" => 1, "HG3" => 1, "HE" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CE" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "SD" => 1},
		    "N" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HD" => 2, "HD2" => 2, "HD21" => 1, "HD22" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CE" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "ND" => 1, "ND2" => 1, "OD" => 1, "OD1" => 1},
		    "P" => { "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HG" => 2, "HG2" => 1, "HG3" => 1, "HD" => 2, "HD2" => 1, "HD3" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1},
		    "Q" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HG" => 2, "HG2" => 1, "HG3" => 1, "HE" => 2, "HE2" => 2, "HE21" => 1, "HE22" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "NE" =>1, "NE2" => 1, "OE" => 1, "OE1" => 1},
		    "R" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HG" => 2, "HG2" => 1, "HG3" => 1, "HD" => 2, "HD2" => 1, "HD3" => 1, "HE" => 1, "HH" => 4, "HH1" => 2, "HH11" => 1, "HH12" => 1, "HH2" => 2, "HH21" => 1, "HH22" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 1, "CZ" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "NE" => 1, "NH" => 2, "NH1" => 1, "NH2" => 1},
		    "S" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HG" => 1, "CA" => 1, "CB" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "OG" => 1},
		    "T" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 1, "HG" =>2, "HG1" => 1, "HG2" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CG2" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "OG" => 1, "OG1" => 1},
		    "V" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 1, "HG" => 2, "HG1" => 1, "HG2" => 1, "CA" => 1, "CB" => 1, "CG" => 2, "CG1" => 1, "CG2" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1},
		    "W" => { "H" => 1, "HN" => 1, "HA" => 1, "HB" => 2, "HB2" => 1, "HB3" => 1, "HD" => 1, "HD1" => 1, "HE" => 2, "HE1" => 1, "HE3" => 1, "HZ" => 2, "HZ2" => 1, "HZ3" => 1, "HH" => 1, "HH2" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 2, "CE" => 2, "CZ" => 2, "CH" => 1, "CD1" => 1, "CD2" => 1, "CE2" => 1, "CE3" => 1, "CZ2" => 1, "CZ3" => 1, "CH2" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "NE" => 1, "NE1" => 1},
		    "Y" => { "H" => 1, "HN" => 1, "HA" => 1,  "HB" => 2, "HB2" => 1, "HB3" => 1, "HD" => 2, "HD1" => 1, "HD2" => 1, "HE" => 2, "HE1" => 1, "HE2" => 1, "HH" => 1, "CA" => 1, "CB" => 1, "CG" => 1, "CD" => 2, "CE" => 2, "CZ" => 1, "CD1" => 1, "CD2" => 1, "CE1" => 1, "CE2" => 1, "C" => 1, "CO" => 1, "N" => 1, "N15" => 1, "OH" => 1}
		    );

my %aa_name_conversion = ( "ala" => "A", "arg" => "R", "asn" => "N", "asp" => "D",
			   "cys" => "C", "gln" => "Q", "glu" => "E", "gly" => "G",
			   "his" => "H", "ile" => "I", "leu" => "L", "lys" => "K",
			   "met" => "M", "phe" => "F", "pro" => "P", "ser" => "S",
			   "thr" => "T", "trp" => "W", "tyr" => "Y", "val" => "V",
			   "a"   => "A", "r"   => "R", "n"   => "N", "d"   => "D", 
			   "c"   => "C", "q"   => "Q", "e"   => "E", "g"   => "G",
			   "h"   => "H", "i"   => "I", "l"   => "L", "k"   => "K",
			   "m"   => "M", "f"   => "F", "p"   => "P", "s"   => "S",
			   "t"   => "T", "w"   => "W", "y"   => "Y", "v"   => "V"); 

my %aa_name_conversion2 = ("A"   => "Ala", "R"   => "Arg", "N"   => "Asn", "D"   => "Asp",
			   "C"   => "Cys", "Q"   => "Gln", "E"   => "Glu", "G"   => "Gly",
			   "H"   => "His", "I"   => "Ile", "L"   => "Leu", "K"   => "Lys",
			   "M"   => "Met", "F"   => "Phe", "P"   => "Pro", "S"   => "Ser",
			   "T"   => "Thr", "W"   => "Trp", "Y"   => "Tyr", "V"   => "Val",
			   "ALA" => "Ala", "ARG" => "Arg", "ASN" => "Asn", "ASP" => "Asp",
			   "CYS" => "Cys", "GLN" => "Gln", "GLU" => "Glu", "GLY" => "Gly",
			   "HIS" => "His", "ILE" => "Ile", "LEU" => "Leu", "LYS" => "Lys",
			   "MET" => "Met", "PHE" => "Phe", "PRO" => "Pro", "SER" => "Ser",
			   "THR" => "Thr", "TRP" => "Trp", "TYR" => "Tyr", "VAL" => "Val" ); 


my %degeneracy_shift_conversion = 
  ( "ala" => { "H" => "H", "HA" => "HA", "HB" => "HB", "C" => "C", "CA" => "CA", "CB" => "CB", "N" => "N" },
    "arg" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "HD" => "HD", "HD2" => "HD", "HD3" => "HD", "HE" => "HE", "HH" => "HH", "HH11" => "HH",
	       "HH12" => "HH", "HH21" => "HH", "HH22" => "HH", "C" => "C", "CA" => "CA", "CB" => "CB", "CG" => "CG",
	       "CD" => "CD", "CZ" => "CZ", "N" => "N", "NE" => "NE", "NH" => "NH", "NH1" => "NH", "NH2" => "NH" },
    "asn" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD2" => "HD2", "HD21" => "HD2",
	       "HD22" => "HD2", "C" => "C", "CA" => "CA", "CB" => "CB", "CG" => "CG", "N" => "N", "ND2" => "ND2" },
    "asp" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "C" => "C", "CA" => "CA",
	       "CB" => "CB", "CG" => "CG", "N" => "N" },
    "cys" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "C" => "C",
	       "CA" => "CA", "CB" => "CB", "N" => "N" },
    "gln" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "HE2" => "HE2", "HE21" => "HE2", "HE22" => "HE2", "C" => "C", "CA" => "CA", "CB" => "CB",
	       "CG" => "CG", "CD" => "CD", "N" => "N", "NE2" => "NE2" },
    "glu" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "C" => "C", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD" => "CD", "N" => "N" },
    "gly" => { "H" => "H", "HA" => "HA", "HA2" => "HA", "HA3" => "HA", "C" => "C", "CA" => "CA", "N" => "N" },
    "his" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD1" => "HD1", "HD2" => "HD2",
	       "HE1" => "HE1", "HE2" => "HE2", "C" => "C", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD2" => "CD2",
	       "CE1" => "CE1", "N" => "N", "ND1" => "ND1", "NE2" => "NE2" },
    "ile" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HG1" => "HG", "HG12" => "HG", "HG13" => "HG", 
	       "HG2" => "HG", "HD"=>"HD", "HD1" => "HD", "C" => "C", "CA" => "CA", "CB" => "CB", "CG1" => "CG", "CG2" => "CG",
	       "CD1" => "CD", "CD"=>"CD", "N" => "N" },
    "leu" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HD" => "HD",
	       "HD1" => "HD", "HD2" => "HD", "C" => "C", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD" => "CD",
	       "CD1" => "CD", "CD2" => "CD", "N" => "N" },
    "lys" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "HD" => "HD", "HD2" => "HD", "HD3" => "HD", "HE" => "HE", "HE2" => "HE", "HE3" => "HE",
	       "HZ" => "HZ", "C" => "C", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD" => "CD", "CE" => "CE",
	       "N" => "N", "NZ" => "NZ" },
    "met" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "HE" => "HE", "C" => "C", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CE" => "CE",
	       "N" => "N" },
    "phe" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD" => "HD", "HD1" => "HD",
	       "HD2" => "HD", "HE1" => "HE", "HE2" => "HE", "HZ" => "HZ", "C" => "C", "CA" => "CA", "CB" => "CB",
	       "CG" => "CG", "CD" => "CD", "CD1" => "CD", "CD2" => "CD", "CE1" => "CE", "CE2" => "CE", "CZ" => "CZ",
	       "N" => "N" },
    "pro" => { "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG", "HG3" => "HG",
	       "HD" => "HD", "HD2" => "HD", "HD3" => "HD", "C" => "C", "CA" => "CA", "CB" => "CB", "CG" => "CG",
	       "CD" => "CD", "N" => "N" },
    "ser" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "C" => "C",
	       "CA" => "CA", "CB" => "CB", "N" => "N" },
    "thr" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HG1" => "HG1", "HG2" => "HG2", "C" => "C", "CA" => "CA",
	       "CB" => "CB", "CG2" => "CG2", "N" => "N" },
    "trp" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD1" => "HD1", "HE1" => "HE1",
	       "HE3" => "HE3", "HZ2" => "HZ2", "HZ3" => "HZ3", "HH2" => "HH2", "C" => "C", "CA" => "CA", "CB" => "CB",
	       "CG" => "CG", "CD1" => "CD1", "CD2" => "CD2", "CE2" => "CE2", "CE3" => "CE3", "CZ2" => "CZ2",
	       "CZ3" => "CZ3", "CH2" => "CH2", "N" => "N", "NE1" => "NE1" },
    "tyr" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD" => "HD", "HD1" => "HD",
	       "HD2" => "HD", "HE" => "HE", "HE1" => "HE", "HE2" => "HE", "HH" => "HH", "C" => "C", "CA" => "CA",
	       "CB" => "CB", "CG" => "CG", "CD" => "CD", "CD1" => "CD", "CD2" => "CD", "CE" => "CE", "CE1" => "CE",
	       "CE2" => "CE", "CZ" => "CZ", "N" => "N" },
    "val" => { "H" => "H", "HA" => "HA", "HB" => "HB", "HG" => "HG", "HG1" => "HG", "HG2" => "HG", "C" => "C",
	       "CA" => "CA", "CB" => "CB", "CG" => "CG", "CG1" => "CG", "CG2" => "CG", "N" => "N" } );

my %cmap_shift_conversion = 
  ( "ala" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "C" => "CO", "CA" => "CA", "CB" => "CB", "N" => "N15", 
	       "HN" => "HN", "CO" => "CO", "N15" => "N15" },
    "arg" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "HD" => "HD", "HD2" => "HD", "HD3" => "HD", "HE" => "HE", "HH" => "HH", "HH1" => "HH", 
	       "HH11" => "HH", "HH2" => "HH", "HH12" => "HH", "HH21" => "HH", "HH22" => "HH", "C" => "CO", 
	       "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD" => "CD", "CZ" => "CZ", "N" => "N15", "NE" => "NE", 
	       "NH" => "NH", "NH1" => "NH", "NH2" => "NH", "HN" => "HN", "CO" => "CO", "N15" => "N15" },
    "asn" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD2" => "HD", "HD21" => "HD",
	       "HD22" => "HD", "C" => "CO", "CA" => "CA", "CB" => "CB", "CG" => "CG", "N" => "N15", "ND2" => "ND", 
	       "HN" => "HN", "CO" => "CO", "N15" => "N15", "ND" => "ND", "HD" => "HD" },
    "asp" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "C" => "CO", "CA" => "CA",
	       "CB" => "CB", "CG" => "CG", "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15", "OD1" => "OD",
	        "OD2" => "OD", "HD2" => "HD", "OD" => "OD", "HD" => "HD"},
    "cys" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "C" => "CO",
	       "CA" => "CA", "CB" => "CB", "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15", "SG" => "SG" },
    "gln" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "HE2" => "HE", "HE21" => "HE", "HE22" => "HE", "C" => "CO", "CA" => "CA", "CB" => "CB",
	       "CG" => "CG", "CD" => "CD", "N" => "N15", "NE2" => "NE", "HN" => "HN", "CO" => "CO", "N15" => "N15",
	       "NE" => "NE", "HE" => "HE", "OE1" => "OE", "OE" => "OE"},
    "glu" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "C" => "CO", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD" => "CD", "N" => "N15", 
	       "HN" => "HN", "CO" => "CO", "N15" => "N15", "OE1" => "OE", "OE2" => "OE", "OE" => "OE", "HE2" => "HE",
	       "HE" => "HE"},
    "gly" => { "H" => "HN", "HA" => "HA", "HA2" => "HA", "HA3" => "HA", "C" => "CO", "CA" => "CA", "N" => "N15", 
	       "HN" => "HN", "CO" => "CO", "N15" => "N15" },
    "his" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD1" => "HD", "HD2" => "HD",
	       "HE1" => "HE", "HE2" => "HE", "C" => "CO", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD2" => "CD",
	       "CE1" => "CE", "N" => "N15", "ND1" => "ND", "NE2" => "NE", "ND" => "ND", "NE" => "NE", "CD" => "CD", 
	       "HN" => "HN", "CO" => "CO", "N15" => "N15", "CE" => "CE", "HD" => "HD", "HE" => "HE" },
    "ile" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HG1" => "HG", "HG12" => "HG", "HG13" => "HG", 
	       "HG2" => "HG", "HD1" => "HD", "C" => "CO", "CA" => "CA", "CB" => "CB", "CG1" => "CG", "CG2" => "CG",
	       "CD1" => "CD", "N" => "N15", "HD" => "HD", "HN" => "HN", "CO" => "CO", "N15" => "N15", "HG" => "HG",
	       "CD" => "CD", "CG" => "CG" },
    "leu" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HD" => "HD",
	       "HD1" => "HD", "HD2" => "HD", "C" => "CO", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD" => "CD",
	       "CD1" => "CD", "CD2" => "CD", "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15"},
    "lys" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "HD" => "HD", "HD2" => "HD", "HD3" => "HD", "HE" => "HE", "HE2" => "HE", "HE3" => "HE",
	       "HZ" => "HZ", "C" => "CO", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CD" => "CD", "CE" => "CE",
	       "N" => "N15", "NZ" => "NZ", "HN" => "HN", "CO" => "CO", "N15" => "N15" },
    "met" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG",
	       "HG3" => "HG", "HE" => "HE", "C" => "CO", "CA" => "CA", "CB" => "CB", "CG" => "CG", "CE" => "CE",
	       "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15" , "SD" => "SD"},
    "phe" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD" => "HD", "HD1" => "HD",
	       "HD2" => "HD", "HE1" => "HD", "HE2" => "HD", "HZ" => "HZ", "C" => "CO", "CA" => "CA", "CB" => "CB",
	       "CG" => "CG", "CD" => "CD", "CD1" => "CD", "CD2" => "CD", "CE1" => "CE", "CE2" => "CE", "CZ" => "CZ",
	       "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15" },
    "pro" => { "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "HG2" => "HG", "HG3" => "HG",
	       "HD" => "HD", "HD2" => "HD", "HD3" => "HD", "C" => "CO", "CA" => "CA", "CB" => "CB", "CG" => "CG",
	       "CD" => "CD", "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15" },
    "ser" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HG" => "HG", "C" => "CO",
	       "CA" => "CA", "CB" => "CB", "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15", "OG" => "OG" },
    "thr" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HG1" => "HG", "HG2" => "HG", "C" => "CO", "CA" => "CA",
	       "CB" => "CB", "CG2" => "CG", "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15", "CG" => "CG",
	       "HG" => "HG" },
    "trp" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD1" => "HD", "HE1" => "HE",
	       "HE3" => "HE", "HZ2" => "HZ", "HZ3" => "HZ", "HH2" => "HH", "C" => "CO", "CA" => "CA", "CB" => "CB",
	       "CG" => "CG", "CD1" => "CD", "CD2" => "CD", "CE2" => "CE", "CE3" => "CE", "CZ2" => "CZ",
	       "CZ3" => "CZ", "CH2" => "CH", "N" => "N15", "NE1" => "NE", "CZ" => "CZ", "HZ" => "HZ", "CD" => "CD",
	       "HN" => "HN", "CO" => "CO", "N15" => "N15", "HE" => "HE", "HH" => "HH", "NE" => "NE", "CE" => "CE",
	       "HD" => "HD", "CH" => "CH" },
    "tyr" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HB2" => "HB", "HB3" => "HB", "HD" => "HD", "HD1" => "HD",
	       "HD2" => "HD", "HE" => "HE", "HE1" => "HE", "HE2" => "HE", "HH" => "HH", "C" => "CO", "CA" => "CA",
	       "CB" => "CB", "CG" => "CG", "CD" => "CD", "CD1" => "CD", "CD2" => "CD", "CE" => "CE", "CE1" => "CE",
	       "CE2" => "CE", "CZ" => "CZ", "N" => "N15", "HN" => "HN", "CO" => "CO", "N15" => "N15", "OH" => "OH" },
    "val" => { "H" => "HN", "HA" => "HA", "HB" => "HB", "HG" => "HG", "HG1" => "HG", "HG2" => "HG", "C" => "CO",
	       "CA" => "CA", "CB" => "CB", "CG" => "CG", "CG1" => "CG", "CG2" => "CG", "N" => "N15", 
	       "HN" => "HN", "CO" => "CO", "N15" => "N15" } );
  
my @shift_name_order = ("HN", "N15", "CO", "CA", "CB", "CG", "CD", "CE", "CZ", "CH", "HA", "HB", 
			"HG", "HD", "HE", "HZ", "HH", 
			"CO-1", "CA-1", "CB-1", "CG-1", "CD-1", "CE-1", "CZ-1", "CH-1", "HA-1", "HB-1", "HG-1", "HD-1", "HE-1", "HZ-1", "HH-1");


# clone
#   clones a bmrb_hlist.
#
#        $original_hlist - the original $bmrb_hlist
#        $cloned_hlist   - the cloned $bmrb_hlist
sub clone
{
    my $original_hlist = shift @_;
    my $cloned_hlist = shift @_;
    
    $$cloned_hlist{"filename"} = $$original_hlist{"filename"};
    @{$$cloned_hlist{"name_array"}} = @{$$original_hlist{"name_array"}};
    foreach my $atom_type (keys %{$$original_hlist{"reference"}})
    {
	@{$$cloned_hlist{"reference"}{$atom_type}} = @{$$original_hlist{"reference"}{$atom_type}}    	                       if (exists $$original_hlist{"reference"}{$atom_type});
    }
    
    $$cloned_hlist{"reference_fields"} = $$original_hlist{"reference_fields"};
    $$cloned_hlist{"reference_shift_index"} = $$original_hlist{"reference_shift_index"};
    $$cloned_hlist{"sequence"} = $$original_hlist{"sequence"};
    $$cloned_hlist{"sequence_startpos"} = $$original_hlist{"sequence_startpos"};
    foreach my $residue (keys %{$$original_hlist{"rlist"}})
       {
	   $$cloned_hlist{"rlist"}{$residue}{"name"} = $$original_hlist{"rlist"}{$residue}{"name"};
	   $$cloned_hlist{"rlist"}{$residue}{"author_name"} = $$original_hlist{"rlist"}{$residue}{"author_name"}                    if (exists $$original_hlist{"rlist"}{$residue}{"author_name"});
	       
		
	   $$cloned_hlist{"rlist"}{$residue}{"aa"} = $$original_hlist{"rlist"}{$residue}{"aa"};
	   $$cloned_hlist{"rlist"}{$residue}{"author_index"} = $$original_hlist{"rlist"}{$residue}{"author_index"}                  if (exists $$original_hlist{"rlist"}{$residue}{"author_index"}) ;
	   $$cloned_hlist{"rlist"}{$residue}{"index"} = $$original_hlist{"rlist"}{$residue}{"index"};
	   foreach my $shift_name (keys % {$$original_hlist{"rlist"}{$residue}{"shifts"}})
	      {
		  $$cloned_hlist{"rlist"}{$residue}{"shifts"}{$shift_name}{"atom_type"} = $$original_hlist{"rlist"}{$residue}{"shifts"}{$shift_name}{"atom_type"};
		  if (exists $$original_hlist{"rlist"}{$residue}{"shifts"}{$shift_name}{"list"})
		     {
			 @{$$cloned_hlist{"rlist"}{$residue}{"shifts"}{$shift_name}{"list"}} = @{$$original_hlist{"rlist"}{$residue}{"shifts"}{$shift_name}{"list"}};
		     }
	      }
	   $$cloned_hlist{"rlist"}{$residue}{"next"} = $$original_hlist{"rlist"}{$residue}{"next"};
	   $$cloned_hlist{"rlist"}{$residue}{"prev"} = $$original_hlist{"rlist"}{$residue}{"prev"};
       }
    return;
}


# read_bmrb_file
#   reads a bmrb file and returns a hash of records (Residue is a hash).
#
#       $$bmrb_hlist{"filename"} - name of bmrb file used to build hash of residue names.
#       $$bmrb_hlist{"name_array"} - list of residue names in their proper order.
#	$$bmrb_hlist{"reference"}{"atom_type"} - hash list of references (array of fields) by atom type.
#	$$bmrb_hlist{"reference_fields"} - list and order of reference fields.
#	$$bmrb_hlist{"reference_shift_index"} - index of chemical shift value in reference fields array.
#	$$bmrb_hlist{"sequence"} - array of amino acid sequence (single letter code).
#	$$bmrb_hlist{"sequence_startpos"} - starting author_index (or index if no author_index).
#       $$bmrb_hlist{"rlist"} - hash list of residue_names to residues.
#		$$residue{"name"} - name of residue (uses index in name)
#		$$residue{"author_name"} - name of residue (uses author_index in name)
#		$$residue{"aa"} - amino acid of residue
#		$$residue{"author_index"} - author designated residue number
#		$$residue{"index"} - residue number
#		$$residue{"shifts"} - hash of shifts
#		$$residue{"shifts"}{$shift_name}{"atom_type"} - atom_type of shift.
#		$$residue{"shifts"}{$shift_name}{"list"} - array of shift values.
#		$$residue{"shifts"}{$shift_name}{"ambiguity_code"} - ambiguity code for the shift_name.
#		$$residue{"next"} - name of the next residue.
#		$$residue{"prev"} - name of the previous residue.
#
#   Parameters:
#       $filename - bmrb file to read.
#	$read_options - reference to hash of read options (optional).
#		$$read_options{"convert_aa_name"} - convert aa names from single letter to three letter. 
#		$$read_options{"convert_shift_names"} - convert shift names to degenerate naming scheme.
#		$$read_options{"shift_conversion_hash"} - ref to hash of shift name conversions.
#		$$read_options{"bmrb_hlist"} - reference to bmrb_hlist to fill.
sub read_bmrb_file
  {
  my $filename = shift @_;
  my $read_options = {};
  if (@_)
    { $read_options = shift @_; }

  my $convert_aa_names = 1; 
  if (exists $$read_options{"convert_aa_names"} && ! $$read_options{"convert_aa_names"})
    { $convert_aa_names = 0; }

  my $convert_shift_names = 0; 
  if (exists $$read_options{"convert_shift_names"} && $$read_options{"convert_shift_names"})
    { $convert_shift_names = 1; }
  
  my $shift_conversion_hash = \%degeneracy_shift_conversion;
  if (exists $$read_options{"shift_conversion_hash"} && ref $$read_options{"shift_conversion_hash"} eq "HASH")
    { $shift_conversion_hash = $$read_options{"shift_conversion_hash"}; }

  my $bmrb_hlist;
  if (exists $$read_options{"bmrb_hlist"} && ref $$read_options{"bmrb_hlist"} eq "HASH")
    { $bmrb_hlist = $$read_options{"bmrb_hlist"}; }
  else
    { 
    $bmrb_hlist = {}; 
    $$bmrb_hlist{"filename"} = $filename;
    }

  local *STARFILE;
  if ($filename eq "-")
    { *STARFILE = *STDIN; }
  else
    { open (STARFILE, "<$filename") || die "unable to open $filename"; }

  # skip junk

  while (my $line = <STARFILE>)
    {
#   print $line;
    chomp $line;

    # hash of column names to their column positions.
    my $columns = {};

    # find the column definition section
    if ($line =~ m/^\s*?loop_\s*?$/)
       {
	   # initialize position counter.
	   my $count = 0;

	   # read the whole definition section.  End when is a null or space filled element.
	   # keep track of position using count and columns hash
	   while ($line = <STARFILE>) 
	     {
		 chomp($line);
		 last     if ($line =~ /^\s*$/);

		 # HACK ADDED FOR AUTOASSIGN's BMRB WRITING
		 last     if ($line !~ /^\s*?_/);

		 # Remove flanking spaces
		 $line =~ s/^\s*(.*?)\s*$/$1/;

		 # uppercase letter after the _, to make life easier later.
		 $line =~ s/^(\_.)/uc($1)/e; 

		 # store line's position and "new" content.   next line, so next position
		 $$columns{$line} = $count++;
	     }
       }

    # Check to see whether the columns elements contain certain key elements and then parse accordingly.
    # As of 01-02-2001, _Residue_author_seq_code adds additional optional info if it is there.

    if    ( &contains($columns, '_Residue_label', '_Residue_seq_code', '_Chem_shift_value', '_Atom_type', '_Atom_name', '_Chem_shift_ambiguity_code') )
	{ &read_bmrb_shift_lines(\*STARFILE, $columns, $convert_aa_names, $convert_shift_names, $shift_conversion_hash, $bmrb_hlist); }
    elsif ( &contains($columns, '_Residue_label', '_Residue_author_seq_code', '_Chem_shift_value', '_Atom_type', '_Atom_name', '_Chem_shift_ambiguity_code') )
	{ &read_bmrb_shift_lines(\*STARFILE, $columns, $convert_aa_names, $convert_shift_names, $shift_conversion_hash, $bmrb_hlist); }
#    elsif ( &contains($columns, '_Residue_label', '_Residue_author_seq_code', '_Residue_seq_code') )
#	{ &read_bmrb_residue_list_format(\*STARFILE, $columns, $convert_aa_names, $bmrb_hlist); }
    elsif ( &contains($columns, '_Residue_label', '_Residue_seq_code') )
	{ &read_bmrb_residue_list_format(\*STARFILE, $columns, $convert_aa_names, $bmrb_hlist); }
    elsif ( &contains($columns, '_Residue_label', '_Residue_author_seq_code') )
	{ &read_bmrb_residue_list_format(\*STARFILE, $columns, $convert_aa_names, $bmrb_hlist); }
    elsif ( &contains($columns, 'save_[Cc]hem(ical)?_shift_reference') )
	{ &read_bmrb_chemical_shift_reference(\*STARFILE, $columns, $bmrb_hlist); }
    }


  # ADDED BY GSS 10/29/2000 for bmr1133.str, but applies to a lot more ...
  # if at this point, you do not have a name array, then kinda fake one.
  if ( ($$bmrb_hlist{"name_array"} eq '') )
      {
	  @{$$bmrb_hlist{"name_array"}} = sort { ($a =~ /^[a-zA-Z]+(\-?\d+)$/)[0] <=> ($b =~ /^[A-Za-z]+(\-?\d+)$/)[0]; } (keys %{$$bmrb_hlist{"rlist"}});
      }

  # create sequence field
  $$bmrb_hlist{"sequence"} = [];
  for(my $x=0; $x < @{$$bmrb_hlist{"name_array"}}; $x++)
    {
    my $resname = $$bmrb_hlist{"name_array"}[$x];

    my $aa = $$bmrb_hlist{"rlist"}{$resname}{"aa"};
    if (exists $aa_name_conversion{lc($aa)})
      { $aa = $aa_name_conversion{lc($aa)}; }    push @{$$bmrb_hlist{"sequence"}}, $aa;

    if ($x)
      { $$bmrb_hlist{"rlist"}{$resname}{"prev"} = $$bmrb_hlist{"name_array"}[$x-1]; }
    if ($x < (@{$$bmrb_hlist{"name_array"}} - 1))
      { $$bmrb_hlist{"rlist"}{$resname}{"next"} = $$bmrb_hlist{"name_array"}[$x+1]; }
    }

  # create sequence_startpos field
  if (exists $$bmrb_hlist{"rlist"}{$$bmrb_hlist{"name_array"}[0]}{"author_index"})
    { $$bmrb_hlist{"sequence_startpos"} = $$bmrb_hlist{"rlist"}{$$bmrb_hlist{"name_array"}[0]}{"author_index"}; }
  else
    { $$bmrb_hlist{"sequence_startpos"} = $$bmrb_hlist{"rlist"}{$$bmrb_hlist{"name_array"}[0]}{"index"}; }

  
  close STARFILE if ($filename ne "-");

  return $bmrb_hlist; 
  }



# contains
#   checks to see whether @elements are all in keys of %$columns.  return 1 if all found else 0.
#
#   Parameters:
#	$columns - reference to the hash of the column entries to their order.
#	@elements - elements to check to see if in $columns
sub contains
{
    my $columns = shift @_;
    my @elements = @_;

    # Check all elements ...
    foreach my $elem (@elements)
	{
	    # if you don't find an element, then fail 
	    return 0     if ( scalar(grep /$elem/,  keys %$columns)  == 0);
	}

    # if you found them all, then pass
    return 1;
}




# write_bmrb_file
#   Prints a bmrb file out to $filename
#
#   Parameters:
#	$bmrb_hlist - reference to bmrb hash structure.
#	$filename - name of output filename.
sub write_bmrb_file
  {
      my $bmrb_hlist = shift @_;
      my $filename = shift @_; 
      
      local *STARFILE;
      if ($filename eq "-")
      { *STARFILE = *STDOUT; }
      else
      { open (STARFILE, ">$filename") || die "unable to open $filename"; }
      
      my @sorted_residue_names;  
      if (exists $$bmrb_hlist{"name_array"})
      { @sorted_residue_names = @{$$bmrb_hlist{"name_array"}}; }
      else
      { @sorted_residue_names = sort { ($a =~ /^[a-zA-Z]+(\-?\d+)$/)[0] <=> ($b =~ /^[A-Za-z]+(\-?\d+)$/)[0]; } (keys %{$$bmrb_hlist{"rlist"}}); }
      
      # print residues as single letters.
      print STARFILE "_Mol_residue_sequence\n;\n";
      
      {
	  my $count = 0;
	  foreach my $residue_name (@sorted_residue_names)
	  {
	      my $aa = $$bmrb_hlist{"rlist"}{$residue_name}{"aa"};
	      print STARFILE $aa_name_conversion{lc($aa)};
	      $count = ($count +1) % 20;
	      print STARFILE "\n" if (! $count);
	  }
	  
	  print STARFILE"\n" if ($count);
      }
      
      # print sequence with sequence number.
      print STARFILE";\n\n";
      print STARFILE"  loop_\n    _Residue_seq_code\n    _Residue_author_seq_code\n    _Residue_label\n\n";
      my $count =1;
      my %residue_seq_code_hlist;
      foreach my $residue_name (@sorted_residue_names)
      {
	  my $residue = $$bmrb_hlist{"rlist"}{$residue_name};
	  if (exists $$residue{"author_index"})
	  { 
	      print STARFILE "$count\t",$$residue{"author_index"},"\t", $aa_name_conversion2{uc($$residue{"aa"})}, "\n"; 
	      $residue_seq_code_hlist{$$residue{"author_index"}} = $count;
	  }
	  else
	  { 
	      print STARFILE "$count\t",$$residue{"index"},"\t", $aa_name_conversion2{uc($$residue{"aa"})}, "\n"; 
	      $residue_seq_code_hlist{$$residue{"index"}} = $count;
	  }
	  $count++;
      }
      
      print STARFILE "  stop_\n\n\n";
      
      # print shifts
      print STARFILE "  loop_\n    _Atom_shift_assign_ID\n    _Residue_seq_code\n    _Residue_label\n";
      print STARFILE "    _Atom_name\n    _Atom_type\n    _Chem_shift_value\n    _Chem_shift_value_error\n";
      print STARFILE "    _Chem_shift_ambiguity_code\n\n";
      
      my $count = 1;
      foreach my $residue_name (@sorted_residue_names)
      {
	  my $residue = $$bmrb_hlist{"rlist"}{$residue_name};
	  foreach my $shift_name (keys %{$$residue{"shifts"}})
	  {
	      foreach my $shift_value (@{$$residue{"shifts"}{$shift_name}{"list"}})
	      {

                  # ADDED BY GSS 07/01/2001.  The residue_seq_code_hlist hash makes sure that the next time you read in the data, the name array is consistent with the chemical shift data.
		  print STARFILE "$count\t",$residue_seq_code_hlist{$$residue{"index"}}, "\t", $aa_name_conversion2{uc($$residue{"aa"})}, "\t", 
		  $shift_name,"\t";
		  # ADDED BY GSS 07/01/2001.  If you do not have an atomType existing, make it from the first letter of the shift name.
		  if (exists $$residue{"shifts"}{$shift_name}{"atom_type"})
		  {
		      print STARFILE $$residue{"shifts"}{$shift_name}{"atom_type"},"\t";
		  }
		  else
		  {
		      print STARFILE ($shift_name =~ /^(.)/)[0], "\t";
		  }
		  print STARFILE $shift_value,"\t.\t";
		  if (exists $$residue{"shifts"}{$shift_name}{"ambiguity_code"} && $$residue{"shifts"}{$shift_name}{"ambiguity_code"})
		  { print STARFILE $$residue{"shifts"}{$shift_name}{"ambiguity_code"}; }
		  else
		  { print STARFILE "1"; }
		  
		  print STARFILE "\n";
		  $count++;
	      }
	  }
      }
      
      print STARFILE "  stop_\n";
      close STARFILE;
  }


# convert_bmrb_shift_names
#   Converts the bmrb shift names using the given shift_conversion_hash.  assumes lowercase 3 letter code for Residue type
#
#   Parameters:
#	$bmrb_hlist - reference to bmrb hash structure.
#	$shift_conversion_hash - reference to a shift conversion hash. 
sub convert_bmrb_shift_names
  {
  my $bmrb_hlist = shift @_;
  my $shift_conversion_hash = shift @_;

  foreach my $residue_name (@{$$bmrb_hlist{"name_array"}})
    {
    my $residue = $$bmrb_hlist{"rlist"}{$residue_name};
    my $new_shift_list = {};
    foreach my $shift_name_real (keys %{$$residue{"shifts"}})
    {
      my ($shift_name, $suffix) = ($shift_name_real =~ /^([a-zA-Z]+\d*)(\-1)?$/);
      my $res_name = $$residue{"aa"};
      $res_name = $$bmrb_hlist{"rlist"}{$$bmrb_hlist{"rlist"}{$residue_name}{"prev"}}{"aa"}  if ($suffix ne "");
      if (exists $aa_name_conversion2{uc($res_name)})
	{ $res_name = $aa_name_conversion2{uc($res_name)}; }
      $res_name = lc($res_name);

      if (exists $$shift_conversion_hash{$res_name}{$shift_name})
	{
	if (exists $$new_shift_list{$$shift_conversion_hash{$res_name}{$shift_name} . $suffix})
	  {
	      
	      push @{$$new_shift_list{$$shift_conversion_hash{$res_name}{$shift_name} . $suffix}{"list"}}, @{$$residue{"shifts"}{$shift_name . $suffix}{"list"}};
	  }
	else
	  { $$new_shift_list{$$shift_conversion_hash{$res_name}{$shift_name} . $suffix} = $$residue{"shifts"}{$shift_name . $suffix}; }
	}
      }
    $$residue{"shifts"} = $new_shift_list;
    }

  }

# adjust_by_reference
#   Adjusts the carbon chemical shifts to DSS standard based on their current reference.
#
#   Parameters:
#	$bmrb_hlist - bmrb hash structure.
#
sub adjust_by_reference
  {
  my $bmrb_hlist = shift @_;
  
  my $carbon_shifting = 0.0; 
  if (exists $$bmrb_hlist{"reference"}{"C"})
    {
    if ($$bmrb_hlist{"reference"}{"C"}[0] eq "TMS")
      { $carbon_shifting = 1.7; }
    elsif ($$bmrb_hlist{"reference"}{"C"}[0] eq "TSP")
      { $carbon_shifting = -0.15; }
    elsif ($$bmrb_hlist{"reference"}{"C"}[0] eq "dioxane" && exists $$bmrb_hlist{"reference_shift_index"} &&
	   ($$bmrb_hlist{"reference"}{"C"}[$$bmrb_hlist{"reference_shift_index"}] == 0))
      { $carbon_shifting = 69.3; }
    }

  if ($carbon_shifting != 0.0)
    {
    foreach my $residue (values %{$$bmrb_hlist{"rlist"}})
      {
      foreach my $shift (values %{$$residue{"shifts"}})
	{ 
	if ($$shift{"atom_type"} eq "C")
	  {
	  foreach my $value (@{$$shift{"list"}})
	    { $value += $carbon_shifting; }
	  }
	}
      }
    }

  }

# convert_residue_names
#   Converts residue names in the bmrb file structure from 3-letter to 1-letter and visa versa
#
#   Parameters:
#	$bmrb_hlist - bmrb hash structure.
#	$conversion - 1 or 3 for conversion to 1-letter or 3-letter
#
sub convert_residue_names
  {
  my $bmrb_hlist = shift @_;
  my $conversion = shift @_;

  if ($conversion == 1)
    {
    my $rlist = {};
    foreach my $residue_name (@{$$bmrb_hlist{"name_array"}})
      {
      my $residue = $$bmrb_hlist{"rlist"}{$residue_name};
      if (exists $aa_name_conversion{lc($$residue{"aa"})})
	{ 
	$$residue{"aa"} = $aa_name_conversion{lc($$residue{"aa"})};
	$$residue{"name"} = $$residue{"aa"} . $$residue{"index"};
	if (exists $$residue{"author_index"})
	  { $$residue{"author_name"} = $$residue{"aa"} . $$residue{"author_index"}; }
	$residue_name = $$residue{"name"};
	}

      $$rlist{$residue_name} = $residue;
      }

    $$bmrb_hlist{"rlist"} = $rlist;
    }
  else
    {
    my $rlist = {};
    foreach my $residue_name (@{$$bmrb_hlist{"name_array"}})
      {
      my $residue = $$bmrb_hlist{"rlist"}{$residue_name};
      if (exists $aa_name_conversion2{uc($$residue{"aa"})})
	{ 
	$$residue{"aa"} = $aa_name_conversion2{uc($$residue{"aa"})};
	$$residue{"name"} = $$residue{"aa"} . $$residue{"index"};
	if (exists $$residue{"author_index"})
	  { $$residue{"author_name"} = $$residue{"aa"} . $$residue{"author_index"}; }
	$residue_name = $$residue{"name"};
	}

      $$rlist{$residue_name} = $residue;
      }

    $$bmrb_hlist{"rlist"} = $rlist;    
    }
  }

#
#  Routines for internal use only.
#



# read_bmrb_shift_lines
#   Reads the shift lines from a bmrb file and puts them in the bmrb hash structure.
#
#   Parameters:
#       $file - file descriptor to read from.
#	$convert_aa_names - converts single letter amino acid names to three letter representations if true.
#       $convert_shift_names - convert shift names to degenerate naming scheme.
#       $shift_conversion_hash - hash to use for converting shift names.
#	$bmrb_hlist - bmrb hash structure to fill.
#
sub read_bmrb_shift_lines
  {
  my $file = shift @_;
  my $column_elements = shift @_;
  my $convert_aa_names = shift @_;
  my $convert_shift_names = shift @_;
  my $shift_conversion_hash = shift @_;
  my $bmrb_hlist = shift @_;

  # foreach line in the file until stop_
  while (my $line = <$file>)
    {
    # chomp the line
    chomp $line;

    # skip blank lines
    next if ($line =~ /^\s*$/);
    next if ($line =~ /^#/);


    # last time if stop_
    last if ($line =~ /stop_/);

    # split and split it on spaces
    my @tokens = split(/\s+/, $line);

    # get rid of the null element in the beginning caused by the spaces followed by characters
    while (@tokens && ($tokens[0] eq ""))
      { shift @tokens; }

  #  print STDERR "BEFORE IF > @tokens < ::", scalar(@tokens), " = ", scalar(keys %$column_elements), ":: \n";

    # as long as you have enough tokens to map to the column elements and the residue label is legitimate continue, otherwise next line.
    next if ((@tokens < scalar(keys %$column_elements)) || 
              ((! exists $aa_name_conversion{lc($tokens[$$column_elements{"_Residue_label"}])}) 
            && (! exists $aa_name_conversion2{uc($tokens[$$column_elements{"_Residue_label"}])}))
             );
 #   print STDERR "AFTER IF\n";
    
    # if you are supposed to convert the name and you can, then do so ...
    if ($convert_aa_names && exists $aa_name_conversion{lc($tokens[$$column_elements{"_Residue_label"}])})
      { $tokens[$$column_elements{"_Residue_label"}] = $aa_name_conversion{lc($tokens[$$column_elements{"_Residue_label"}])}; }

    # get the residue name, by hook or crook.
      my $residue_name = (exists $$column_elements{"_Residue_seq_code"}) ? $tokens[$$column_elements{"_Residue_label"}] . $tokens[$$column_elements{"_Residue_seq_code"}] : $tokens[$$column_elements{"_Residue_label"}] . $tokens[$$column_elements{"_Residue_author_seq_code"}];

  #q  print STDERR "YO >> ",$residue_name, "\n"; 
    my $residue;

    if (! exists $$bmrb_hlist{"rlist"}{$residue_name})
      {
      # Fill the residue hash reference with useful goodies 
      $residue = {};
      $$residue{"index"} = (exists $$column_elements{"_Residue_seq_code"}) ? $tokens[$$column_elements{"_Residue_seq_code"}] : $tokens[$$column_elements{"_Residue_author_seq_code"}];
      $$residue{"author_index"} = $tokens[$$column_elements{"_Residue_author_seq_code"}]    if (exists $$column_elements{"_Residue_author_seq_code"});
      $$residue{"aa"} = $tokens[$$column_elements{"_Residue_label"}];
      $$residue{"name"} = $$residue{"aa"} . $$residue{"index"};
      $$residue{"author_name"} = $$residue{"aa"} . $$residue{"author_index"}    if (exists $$residue{"author_index"});
      $$bmrb_hlist{"rlist"}{$residue_name} = $residue;
      }
    # it already exists, so just reset the stuff that you would essentially re-read
    else
      { $residue = $$bmrb_hlist{"rlist"}{$residue_name};}

    # if you are using a degenerate naming scheme then degenerate it ...
    if ($convert_shift_names)
      {
      my ($new_token, $suffix) = ($tokens[$$column_elements{"_Atom_name"}] =~ /^([a-zA-Z]+\d*)(\-1)??$/);

      my $res_name = $$residue{"aa"};
      $res_name = $$bmrb_hlist{"rlist"}{$$bmrb_hlist{"rlist"}{$residue_name}{"prev"}}{"aa"}  if ($suffix ne "");
      if (exists $aa_name_conversion2{uc($res_name)})
	{ $res_name = $aa_name_conversion2{uc($res_name)}; }

#      print STDERR "BF>", $residue_name, " | $res_name | " , $tokens[$$column_elements{"_Atom_name"}], " >> ", "$new_token :: $suffix\n";
      $tokens[$$column_elements{"_Atom_name"}] = $$shift_conversion_hash{lc($res_name)}{$new_token} . $suffix; 
#      print STDERR "AF>", $residue_name, " | $res_name | " , $tokens[$$column_elements{"_Atom_name"}], " >> ", "$new_token :: $suffix\n";
      } 

    # if you do not already have values for the shift and atomtype then setup the atom type.
    if (! exists $$residue{"shifts"}{$tokens[$$column_elements{"_Atom_name"}]})
      { 
      $$residue{"shifts"}{$tokens[$$column_elements{"_Atom_name"}]}{"atom_type"} = $tokens[$$column_elements{"_Atom_type"}];
      $$residue{"shifts"}{$tokens[$$column_elements{"_Atom_name"}]}{"ambiguity_code"} = $tokens[$$column_elements{"_Chem_shift_ambiguity_code"}];
      $$residue{"shifts"}{$tokens[$$column_elements{"_Atom_name"}]}{"list"} = []; 
      }
    # if the atom name is defined then push the chemical shift value 
    if ($tokens[$$column_elements{"_Atom_name"}] ne "")
       {push @{$$residue{"shifts"}{$tokens[$$column_elements{"_Atom_name"}]}{"list"}}, $tokens[$$column_elements{"_Chem_shift_value"}]; }
    }
  }


# read_bmrb_residue_list_format
#   Reads the residue list from a bmrb file and puts them in the bmrb hash.
#
#   Parameters:
#       $file - file descriptor to read from.
#	$convert_aa_names - converts single letter amino acid names to three letter representations if true.
#	$bmrb_hlist - bmrb hash structure to fill.
#
sub read_bmrb_residue_list_format
  {
  my $file = shift @_;
  my $column_elements = shift @_;
  my $convert_aa_names = shift @_;
  my $bmrb_hlist = shift @_;


  # if you do not have a name array then make it a reference to an array.
  if (! exists $$bmrb_hlist{"name_array"})
    { $$bmrb_hlist{"name_array"} = []; }

  # foreach line do...
  while (my $line = <$file>)
    {
    # take out newline	
    chomp $line;

    # skip blank lines
    next if ($line =~ /^\s*$/);

    # Start at the beginning and when you come to the end Stop.
    last if ($line =~ /stop_/);

    # split on spaces
    my @tokens = split(/\s+/, $line);

    # get rid of the null element in the beginning caused by the spaces followed by characters
    while (@tokens && ($tokens[0] eq ""))
      { shift @tokens; }

    # as long as you have enough tokens to map to the column elements and the residue label is legitimate continue, otherwise next line.
    next if ((@tokens < scalar(keys %$column_elements)) || ((! exists $aa_name_conversion{lc($tokens[$$column_elements{"_Residue_label"}])}) && (! exists $aa_name_conversion2{uc($tokens[$$column_elements{"_Residue_label"}])})));

    # increment the position counter by the number of column elements, from one block to the next (multiblock lines)
    for(my $x=0; $x < @tokens; $x += scalar(keys %$column_elements))
      {

      # if you are supposed to convert the line, then do so (3 -> 1 conversion)
      if ($convert_aa_names && (exists $aa_name_conversion{lc($tokens[$x + $$column_elements{"_Residue_label"}])}))
	{ $tokens[$x + $$column_elements{"_Residue_label"}] = $aa_name_conversion{lc($tokens[$x + $$column_elements{"_Residue_label"}])}; }

      # the residue name is the label and the sequence code, or author sequence code if the previous is not available
      my $residue_name = (exists $$column_elements{"_Residue_seq_code"}) ? $tokens[$x+$$column_elements{"_Residue_label"}] . $tokens[$x+$$column_elements{"_Residue_seq_code"}] : $tokens[$x+$$column_elements{"_Residue_label"}] . $tokens[$x+$$column_elements{"_Residue_author_seq_code"}];
      
      # if this has already been defined, then skip 
      next if (grep { $_ eq $residue_name; } @{$$bmrb_hlist{"name_array"}});

      # if the residue name is not there, then empty the residue hash reference and fill it.
      if (! exists $$bmrb_hlist{"rlist"}{$residue_name})
	{
	my $residue = {};

	# aa is the element at the current array position + the internal block position of the _Residue_label
	$$residue{"aa"} = $tokens[$x+$$column_elements{"_Residue_label"}];

	# This is built for compatibility where you have either the sequence code or the author's sequence code.
	# as above, take the position + the internal block position of the used code
	$$residue{"index"} = (exists $$column_elements{"_Residue_seq_code"}) ? $tokens[$x+$$column_elements{"_Residue_seq_code"}] : $tokens[$x+$$column_elements{"_Residue_author_seq_code"}];

	# if you have the author's definition, then save it.
	if  (exists $$column_elements{"_Residue_author_seq_code"})
	   { 
	       $$residue{"author_index"} = $tokens[$x+$$column_elements{"_Residue_author_seq_code"}]; 
	       $$residue{"author_name"}  = $$residue{"aa"} . $$residue{"author_index"};
	   }

	# residue name saved and reference to residue hash saved as well.
	$$residue{"name"} = $residue_name;
	$$bmrb_hlist{"rlist"}{$residue_name} = $residue;
	}

      # add the residue name to the name array.
      push @{$$bmrb_hlist{"name_array"}}, $residue_name;
      }    
    }

  # create sequence field
  $$bmrb_hlist{"sequence"} = [];
  for(my $x=0; $x < @{$$bmrb_hlist{"name_array"}}; $x++)
  {
      my $resname = $$bmrb_hlist{"name_array"}[$x];
      
      my $aa = $$bmrb_hlist{"rlist"}{$resname}{"aa"};
      if (exists $aa_name_conversion{lc($aa)})
      { $aa = $aa_name_conversion{lc($aa)}; }    push @{$$bmrb_hlist{"sequence"}}, $aa;
      
      if ($x)
      { $$bmrb_hlist{"rlist"}{$resname}{"prev"} = $$bmrb_hlist{"name_array"}[$x-1]; }
      if ($x < (@{$$bmrb_hlist{"name_array"}} - 1))
      { $$bmrb_hlist{"rlist"}{$resname}{"next"} = $$bmrb_hlist{"name_array"}[$x+1]; }
  }
}


# read_bmrb_chemical_shift_reference
#   Reads the chemical shift reference from a bmrb file and puts them in the bmrb hash structure.
#
#   Parameters:
#       $file - file descriptor to read from.
#	$bmrb_hlist - bmrb hash structure to fill.
#
sub read_bmrb_chemical_shift_reference
  {
  my $file = shift @_;
  my $column_elements = shift @_;
  my $bmrb_hlist = shift @_;

  while (my $line = <$file>)
    { last if ($line =~ /loop_/); }

  $$bmrb_hlist{"reference_fields"} = [];
  while (my $line = <$file>)
    {
    last if ($line =~ /^\s*$/);
    my ($token) = ($line =~ /^\s*(\S+)\s*$/); 
    push @{$$bmrb_hlist{"reference_fields"}}, $token;   
    }

  my @junk = grep { $$bmrb_hlist{"reference_fields"}[$_] =~ /_[Cc]hem_[Ss]hift_[Vv]alue/; } (0..$#{$$bmrb_hlist{"reference_fields"}});

  if (@junk)
    { $$bmrb_hlist{"reference_shift_index"} = pop @junk; }

  while (my $line = <$file>)
    {
    chomp $line;
    last if ($line =~ /stop_/);
    my @tokens = grep { defined $_; } (split(/\s+|(\'.*\')/, $line));
    while (@tokens && ($tokens[0] eq ""))
      { shift @tokens; }
    next if ((@tokens < 3) || ($tokens[1] !~ /^[HCN]$/));

    $$bmrb_hlist{"reference"}{$tokens[1]} = [@tokens];
    } 
  }

####################################### deMultiplicate ########################################
# Input   : a bmrb_hlist type                                                                 # 
# Output  : a bmrb_hlist type                                                                 #
# Purpose : To return only the proper multiplicity of the bmrb_hlist                          #
###############################################################################################
sub deMultiplicate
   {
       my $bmrb_hlist = shift @_;
       foreach my $residue_name (@{$$bmrb_hlist{"name_array"}})
	  {
	       foreach my $shift_name_real (keys %{$$bmrb_hlist{"rlist"}{$residue_name}{"shifts"}})
		   {
		       my $suffix = "";
		       my $shift_name = ($shift_name_real =~ /^(.*?)(\-1)?$/)[0];
		       $suffix = "-1"   if ($shift_name ne $shift_name_real);
		       if ( ($suffix ne "") && ( exists $multiplicity{$aa_name_conversion{lc($$bmrb_hlist{"rlist"}{$$bmrb_hlist{"rlist"}{$residue_name}{"prev"}}{"aa"})}}{$shift_name} ) )
		       {
			   splice(@{$$bmrb_hlist{"rlist"}{$residue_name}{"shifts"}{$shift_name_real}{"list"}}, $multiplicity{$aa_name_conversion{lc($$bmrb_hlist{"rlist"}{$$bmrb_hlist{"rlist"}{$residue_name}{"prev"}}{"aa"})}}{$shift_name});
		       }
		       elsif ( exists $multiplicity{$aa_name_conversion{lc($$bmrb_hlist{"rlist"}{$residue_name}{"aa"})}}{$shift_name_real} )
		       {
			   splice(@{$$bmrb_hlist{"rlist"}{$residue_name}{"shifts"}{$shift_name_real}{"list"}}, $multiplicity{$aa_name_conversion{lc($$bmrb_hlist{"rlist"}{$residue_name}{"aa"})}}{$shift_name_real});
#			      print scalar(@{$$bmrb_hlist{"rlist"}{$residue_name}{"shifts"}{$shift_name}{"list"}}), "\n";
		       }
		   }
	  }
   }

# readBMRBasCMAP
#   Reads a bmrb file in a CMap naming scheme and returns a BMRB/CMap hash of records. 
#
#   Parameters:
#       $file - bmrb filename to read
#       $restriction_list - ref to list of atom names to restrict to (optional).
sub readBMRBasCMAP
  {
  my $input_bmrb_filename = shift @_;
  

  my $user_shift_name_list = 0;
  
  if (@_ && @{$_[0]})
    {
    my $shift_list = shift @_;
    $user_shift_name_list = {};
    foreach my $shift_name (@$shift_list)
      { $$user_shift_name_list{$shift_name} = 1; }
    }
  

  # read the bmrb file
  my %read_options = ( "shift_conversion_hash" => \%cmap_shift_conversion, "convert_aa_names" => 1, "convert_shift_names" => 1 );
  my $bmrb_hlist1 = &read_bmrb_file($input_bmrb_filename, \%read_options );
  
  # convert to single letter amino acid names
  &convert_residue_names($bmrb_hlist1,1);

  # update "-1" shifts
  if (ref $user_shift_name_list)
    {
    my @previous_shifts = grep {$_ =~ /-1/; } (keys %$user_shift_name_list);
    if (@previous_shifts)
      {
      
      @previous_shifts = map { my ($shift) = ($_ =~ /^(.+)-1/); $shift; } (@previous_shifts);
      foreach my $residue (values %{$$bmrb_hlist1{"rlist"}})
	{
	foreach my $shift_name (keys %{$$residue{"shifts"}})
	  {
	  if (scalar(grep { $shift_name eq $_ } (@previous_shifts)) && exists $$residue{"next"})
	    {
	    my $prev_residue = $$bmrb_hlist1{"rlist"}{$$residue{"next"}}{"shifts"}{$shift_name . "-1"} = $$residue{"shifts"}{$shift_name};
	    }
	  }
	}
      }
    }
  
  # determine the list of shift names to use
  my %name_test_hash;
  foreach my $residue_name (@{$$bmrb_hlist1{"name_array"}})
    {
    foreach my $shift_name (keys %{$$bmrb_hlist1{"rlist"}{$residue_name}{"shifts"}})
      { $name_test_hash{$shift_name} = 1; }
    }

  $$bmrb_hlist1{"parsedtokens"} = [ grep { (exists $name_test_hash{$_} && (!$user_shift_name_list || exists $$user_shift_name_list{$_})); } @shift_name_order ];
  unshift @{$$bmrb_hlist1{"parsedtokens"}}, "AA";

  return $bmrb_hlist1;
  }

sub convertBMRBtoCMap
{
    my $bmrb_hlist1 = shift @_;
    my $user_shift_name_list = 0;
  
    if (@_ && @{$_[0]})
    {
	my $shift_list = shift @_;
	$user_shift_name_list = {};
	foreach my $shift_name (@$shift_list)
	{ $$user_shift_name_list{$shift_name} = 1; }
    }
    
    &convert_bmrb_shift_names($bmrb_hlist1, \%cmap_shift_conversion);
    &convert_residue_names($bmrb_hlist1,1);
    
    # update "-1" shifts
    if (ref $user_shift_name_list)
    {
	my @previous_shifts = grep {$_ =~ /-1/; } (keys %$user_shift_name_list);
	if (@previous_shifts)
	{
	    
	    @previous_shifts = map { my ($shift) = ($_ =~ /^(.+)-1/); $shift; } (@previous_shifts);
	    foreach my $residue (values %{$$bmrb_hlist1{"rlist"}})
	    {
		foreach my $shift_name (keys %{$$residue{"shifts"}})
		{
		    if (scalar(grep { $shift_name eq $_ } (@previous_shifts)) && exists $$residue{"next"})
		    {
			my $prev_residue = $$bmrb_hlist1{"rlist"}{$$residue{"next"}}{"shifts"}{$shift_name . "-1"} = $$residue{"shifts"}{$shift_name};
		    }
		}
	    }
	}
    }
    
    # determine the list of shift names to use
    my %name_test_hash;
    foreach my $residue_name (@{$$bmrb_hlist1{"name_array"}})
    {
	foreach my $shift_name (keys %{$$bmrb_hlist1{"rlist"}{$residue_name}{"shifts"}})
	{ $name_test_hash{$shift_name} = 1; }
    }
    
    $$bmrb_hlist1{"parsedtokens"} = [ grep { (exists $name_test_hash{$_} && (!$user_shift_name_list || exists $$user_shift_name_list{$_})); } @shift_name_order ];
    unshift @{$$bmrb_hlist1{"parsedtokens"}}, "AA";
    
    return $bmrb_hlist1;
    
}

sub name_array_consistency
{
    my $bmrb_hlist = shift @_;
    my @array1 = @{$$bmrb_hlist{"name_array"}};
    my @array2 = sort { ($a =~ /^[a-zA-Z]+(\-?\d+)$/)[0] <=> ($b =~ /^[A-Za-z]+(\-?\d+)$/)[0]; } (keys %{$$bmrb_hlist{"rlist"}});

    my %seen;
    my @unique_elems = grep { ! $seen{$_}++ } (@array1, @array2);

    # If 75% of array2 are duplicates then true.
    return 1   if (scalar(@unique_elems) < (scalar(@array1) + (0.25 * (scalar(@array2)))));
    return 0;
}


# module must return true
return 1;
