Remove cycle suppression
[gromacs.git] / scripts / xplor2gmx.pl
blob647459d1dec1cb9c88ee5f496c01626ee5289053
1 #!/usr/bin/perl -w
3 use strict;
5 # This script reads an XPLOR input file with distance restraint data
6 # as sometimes is found in the pdb database (http://www.pdb.org).
7 # From this input file dihedral restrints should be removed, such that
8 # only distance restraints are left. The script can handle ambiguous restraints.
9 # It converts the distance restraints to GROMACS format.
10 # A typical command line would be
11 # ./xplor2gmx.pl 33 conf.pdb < restraints.dat > disre.itp
12 # You can turn off debugging info, but please do verify your output is correct!
14 # David van der Spoel (spoel@gromacs.org), July 2002
17 # Turn debugging off (0), or on ( > 0).
18 my $debug = 1;
19 # Turn atom name translation on and off
20 my $trans = 1;
22 my $pdb = shift || die "I need the name of the pdb file with correct atom numbers\n";
23 my $core = shift || "core.ndx";
24 my $tbl = "$ENV{GMXDATA}/top/atom_nom.tbl";
26 printf "[ distance_restraints ]\n";
27 printf "; Read an xplor distance restraint file, and output GROMACS distance restraints.\n";
28 printf "; This also needs a pdb file with correct GROMACS atom numbering.\n";
29 printf "; I used $pdb for the atom numbers\n";
30 printf "; This also needs the offset in residues.\n";
32 # Read the pdb file
33 # If things go wrong, check whether your pdb file is read correctly.
34 my $natom = 0;
35 my $nres = 0;
36 my @resname;
37 my @aname;
38 my @resnr;
39 open(PDB,$pdb) || die "Can not open file $pdb\n";
40 while (my $line = <PDB>) {
41 if (index($line,"ATOM") >= 0) {
42 my @tmp = split(' ',$line);
43 $aname[$natom] = $tmp[2];
44 $resnr[$natom] = $tmp[4];
45 if (!defined $resname[$tmp[4]]) {
46 $resname[$tmp[4]] = $tmp[3];
47 $nres++;
49 $natom++;
52 close PDB;
53 printf "; I found $natom atoms in the pdb file $pdb\n";
54 printf "; I found $nres residues in the pdb file $pdb\n";
55 if ($debug > 1) {
56 for (my $i = 0; ($i < $natom); $i ++) {
57 printf("; %5d %5s %5s %5d\n",$i+1,$aname[$i],
58 $resname[$resnr[$i]],$resnr[$i]);
63 # Read the name translation table.
64 # Source for this comes from: http://www.bmrb.wisc.edu/ref_info/atom_nom.tbl
65 # It was adapted slightly for GROMACS use, but only as regards formatting,
66 # not for content
68 open(TBL,$tbl) || die "Can not open atom-name translation table $tbl\n";
69 my $ntbl=0;
70 my @tblxplor;
71 my @tblgmx;
72 my @tblres;
73 while (my $line = <TBL>) {
74 my @ttt = split('#',$line);
75 my @tmp = split(' ',$ttt[0]);
76 if ($#tmp == 3) {
77 # New table entry
78 $tblres[$ntbl] = $tmp[0];
79 $tblxplor[$ntbl] = $tmp[1];
80 $tblgmx[$ntbl] = $tmp[3];
81 $ntbl++;
84 close TBL;
85 printf "; Read $ntbl entries from $tbl\n";
87 my $default = "XXX";
89 my @templates = (
90 [ $default, "HA#", "HA1", "HA2" ],
91 [ $default, "HA*", "HA1", "HA2" ],
92 [ $default, "HB#", "HB", "HB1", "HB2" ],
93 [ $default, "HB*", "HB", "HB1", "HB2" ],
94 [ $default, "HG#", "HG", "HG1", "HG2", "HG11", "HG12", "HG13", "HG21", "HG22", "HG23" ],
95 [ $default, "HG*", "HG", "HG1", "HG2", "HG11", "HG12", "HG13", "HG21", "HG22", "HG23" ],
96 [ $default, "HG1#", "HG11", "HG12", "HG13" ],
97 [ $default, "HG1*", "HG11", "HG12", "HG13" ],
98 [ $default, "HG2#", "HG21", "HG22", "HG23" ],
99 [ $default, "HG2*", "HG21", "HG22", "HG23" ],
100 [ $default, "HD#", "HD1", "HD2", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23" ],
101 [ $default, "HD*", "HD1", "HD2", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23" ],
102 [ $default, "HD1#", "HD11", "HD12" ],
103 [ "ILE", "HD1*", "HD1", "HD2", "HD3" ],
104 [ $default, "HD1*", "HD11", "HD12" ],
105 [ $default, "HD2#", "HD21", "HD22" ],
106 [ $default, "HD2*", "HD21", "HD22" ],
107 [ $default, "HE#", "HE", "HE1", "HE2" ],
108 [ "GLN", "HE*", "HE21", "HE22" ],
109 [ $default, "HE*", "HE", "HE1", "HE2" ],
110 [ $default, "HE2*", "HE2", "HE21", "HE22" ],
111 [ $default, "HH#", "HH11", "HH12", "HH21", "HH22" ],
112 [ $default, "HH*", "HH11", "HH12", "HH21", "HH22" ],
113 [ $default, "HZ#", "HZ", "HZ1", "HZ2", "HZ3" ],
114 [ $default, "HZ*", "HZ", "HZ1", "HZ2", "HZ3" ],
115 [ $default, "HN", "H" ]
118 my $ntranslated = 0;
119 my $nuntransltd = 0;
120 sub transl_aname {
121 my $resnm = shift;
122 my $atom = shift;
124 if ( $trans == 1 ) {
125 for(my $i = 0; ($i < $ntbl); $i ++) {
126 if ($tblres[$i] eq $resnm) {
127 if ($tblxplor[$i] eq $atom) {
128 $ntranslated++;
129 return $tblgmx[$i];
134 $nuntransltd++;
135 if ($debug > 1) {
136 printf "; No name change for $resnm $atom\n";
138 return $atom;
141 sub expand_template {
142 my $atom = shift(@_);
143 my $rnum = shift(@_);
144 my $bdone = 0;
145 my @atoms;
146 my $jj = 0;
148 die("No residue name for residue $rnum") if (!defined ($resname[$rnum]));
149 for (my $tt=0; (($tt <= $#templates) && ($bdone == 0)); $tt++) {
150 my $templ = $templates[$tt];
151 if (($resname[$rnum] eq $templ->[0] ||
152 $default eq $templ->[0]) &&
153 ($atom eq $templ->[1])) {
154 for ($jj = 2; ($jj <= $#{$templ}); $jj++) {
155 push @atoms, transl_aname($resname[$rnum],$templ->[$jj]);
157 $bdone = 1;
160 if ($bdone == 0) {
161 push @atoms, transl_aname($resname[$rnum],$atom);
163 if ($debug > 0) {
164 my $natom = $#atoms+1;
165 printf("; Found $natom elements for atom $resname[$rnum] %d $atom:",
166 $rnum);
167 for my $aa ( @atoms ) {
168 printf " $aa";
170 printf "\n";
172 return @atoms;
175 if ($debug > 1) {
176 printf "; There are $#templates template entries\n";
177 for (my $tt=0; ($tt <= $#templates); $tt++) {
178 my $templ = $templates[$tt];
179 my $ntempl = $#{$templ};
180 printf "; Item $tt ($templates[$tt][0] $templates[$tt][1]) has $ntempl entries\n";
184 # This index file holds numbers of restraints involving core atoms
185 my @protcore = ( "H", "HN", "HA", "HA1", "HA2", "HB", "HB1", "HB2", "HB3", "HG", "HG1", "HG2", "HG3", "N", "O" );
186 open(CORE,">$core") || die "Can not open $core\n";
187 print CORE "[ core-restraints ]\n";
188 my $ncore = 0;
190 my $myindex = 0;
191 my $linec = 0;
192 my $npair = 0;
193 my $nunresolved = 0;
194 while (my $line = <STDIN>) {
195 my @ttt = split('!',$line);
196 if (index($ttt[0], "dihedral") >= 0) {
197 last;
199 elsif ((index($ttt[0],"assign") >= 0) && (index($ttt[0],"!assign") < 0)) {
200 my @tmp = split('\(',$ttt[0]);
201 # Find first argument
202 my $rhaak = undef;
203 if (($rhaak = index($tmp[1],')')) < 0) {
204 printf "No ) in '$tmp[1]'\n";
206 my $atres1 = substr($tmp[1],0,$rhaak);
207 my @at1 = split('or',$atres1);
209 # Find second argument
210 if (($rhaak = index($tmp[2],')')) < 0) {
211 printf "No ) in '$tmp[2]'\n";
213 my $atres2 = substr($tmp[2],0,$rhaak);
214 my @at2 = split('or',$atres2);
216 my @expdata = split('\)',$tmp[2]);
217 my @dist = split(' ',$expdata[1]);
219 my $bOK = 0;
220 my $bCore = 1;
221 foreach my $a1 ( @at1 ) {
222 my @info1 = split(' ',$a1);
223 my $r1 = $info1[1];
224 my @atoms1 = expand_template($info1[4],$r1);
226 foreach my $a2 ( @at2 ) {
227 my @info2 = split(' ',$a2);
228 my $r2 = $info2[1];
229 my @atoms2 = expand_template($info2[4],$r2);
231 my $i = undef;
232 for ($i = 0; ($i < $natom) && ($resnr[$i] < $r1); $i++) { ; }
233 for ( ; ($i < $natom) && ($resnr[$i] == $r1); $i++) {
234 foreach my $ii ( @atoms1 ) {
235 if ($ii eq $aname[$i]) {
236 my $bCoreI = 0;
237 for my $pp ( @protcore ) {
238 if ($ii eq $pp) {
239 $bCoreI = 1;
242 my $j = undef;
243 for ($j = 0; ($j < $natom) && ($resnr[$j] < $r2); $j++) { ; }
244 for ( ; ($j < $natom) && ($resnr[$j] == $r2); $j++) {
245 foreach my $jj ( @atoms2 ) {
246 if ($jj eq $aname[$j]) {
247 my $dd = 0.1*$dist[0];
248 my $dminus = 0.1*$dist[1];
249 my $dplus = 0.1*$dist[2];
250 my $low = $dd-$dminus;
251 my $up1 = $dd+$dplus;
252 my $up2 = $up1+1;
253 printf("%5d %5d %5d %5d %5d %7.3f %7.3f %7.3f 1.0; res $r1 $ii - res $r2 $jj\n",$i+1,$j+1,1,$myindex,1,$low,$up1,$up2);
254 # Do some checks
255 $bOK = 1;
256 my $bCoreJ = 0;
257 for my $pp ( @protcore ) {
258 if ($jj eq $pp) {
259 $bCoreJ = 1;
262 if (($bCoreI == 0) || ($bCoreJ == 0)) {
263 if ($debug > 0) {
264 printf "; No core $ii ($bCoreI) $jj ($bCoreJ)\n";
266 $bCore = 0;
268 $npair++;
277 if ($bCore == 1) {
278 printf CORE "$myindex\n";
279 $ncore++;
281 if ($bOK == 0) {
282 printf "; UNRESOLVED: $ttt[0]\n";
283 $nunresolved++;
285 else {
286 $myindex++;
289 $linec++;
291 close CORE;
293 printf "; A total of $myindex lines with distance restraints were read from $linec input lines\n";
294 printf "; From this, $npair actual restraints were generated.\n";
295 printf "; A total of $nunresolved lines could not be interpreted\n";
296 printf "; In the process $ntranslated atoms had a name change\n";
297 printf "; However for $nuntransltd no name change could be found\n";
298 printf "; usually these are either HN->H renamings or not-existing atoms\n";
299 printf "; generated by the template expansion (e.g. HG#)\n";
300 printf "; A total of $ncore restraints were classified as core restraints\n";