Bumped patch number to prepare for future 5.1.3 release
[gromacs.git] / src / contrib / do_multiprot.c
blob80474afb71aa50254b82b340ac4b8a854b94cec7
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 * This source code is NOT REALLY part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 4.2.5
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * Author: do_multiprot was written by Ran Friedman <r.friedman@bioc.uzh.ch>
34 * And Hey:
35 * Green Red Orange Magenta Azure Cyan Skyblue
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
42 #include "typedefs.h"
43 #include "macros.h"
44 #include "gromacs/utility/smalloc.h"
45 #include "gromacs/commandline/pargs.h"
46 #include "copyrite.h"
47 #include "gromacs/fileio/pdbio.h"
48 #include "gromacs/utility/fatalerror.h"
49 #include "gromacs/fileio/xvgr.h"
50 #include "gromacs/topology/index.h"
51 #include "gstat.h"
52 #include "gromacs/fileio/tpxio.h"
53 #include "viewit.h"
54 #include "gbutil.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/fileio/confio.h"
57 #include "gromacs/fileio/gmxfio.h"
59 typedef struct {
60 int resnr;
61 int count;
62 } t_countres;
64 static void process_multiprot_output(const char *fn, real *rmsd, int *nres, rvec rotangles,
65 rvec translation, bool bCountres, t_countres *countres)
67 FILE *mpoutput;
68 char line[256];
69 char *string;
70 int i=0,j=0,res;
72 (*rmsd)=-1;
73 (*nres)=0;
74 mpoutput=gmx_ffopen (fn,"r");
76 if (bCountres) {
77 do {
78 fgets(line, 256, mpoutput);
79 } while (strstr(line,"Match List") == NULL);
80 fgets(line, 256, mpoutput);
81 do {
82 string = strtok (line,".");
83 while (i<2 && string != NULL) {
84 string = strtok (NULL,". ");
85 i++;
87 i=0;
88 res=atoi(string);
89 if (res > 0) {
90 while (countres[j].resnr!=res)
91 j++;
92 countres[j].count++;
94 fgets(line, 256, mpoutput);
95 } while (strstr(line,"End of Match List") == NULL);
96 rewind(mpoutput);
98 do {
99 fgets(line, 256, mpoutput);
100 } while (strstr(line,"Trans : ") == NULL);
102 string = strtok (line," :");
103 string = strtok (NULL," ");
104 while (i<3 && string != NULL) {
105 string = strtok (NULL," ");
106 rotangles[i]=atof(string);
107 i++;
109 i=0;
110 while (i<3 && string != NULL) {
111 string = strtok (NULL," ");
112 translation[i]=atof(string)/10;
113 i++;
115 if (i!=3) {
116 gmx_warning("Not enough values for rotation and translation vectors in the output of multiprot");
119 rotangles[YY]=rotangles[YY]*(-1);
121 while ((*rmsd) <0) {
122 fgets(line, 256, mpoutput);
123 if (strstr(line,"RMSD : ") != NULL) {
124 string = strtok (line,":");
125 string = strtok (NULL,":");
126 (*rmsd)=atof(string)/10;
129 while (!(*nres)) {
130 fgets(line,256, mpoutput);
131 if (strstr(line,"Match List") != NULL) {
132 string = strtok (line,":");
133 string = strtok (NULL,":");
134 (*nres) = atoi(string);
137 gmx_ffclose(mpoutput);
140 int main(int argc,char *argv[])
142 const char *desc[] = {
143 "[TT]do_multiprot[tt] ",
144 "reads a trajectory file and aligns it to a reference structure ",
145 "each time frame",
146 "calling the multiprot program. This allows you to use a reference",
147 "structure whose sequence is different than that of the protein in the ",
148 "trajectory, since the alignment is based on the geometry, not sequence.",
149 "The output of [TT]do_multiprot[tt] includes the rmsd and the number of residues",
150 "on which it was calculated.",
151 "[PAR]",
152 "An aligned trajectory file is generated with the [TT]-ox[tt] option.[PAR]",
153 "With the [TT]-cr[tt] option, the number of hits in the alignment is given",
154 "per residue. This number can be between 0 and the number of frames, and",
155 "indicates the structural conservation of this residue.[PAR]",
156 "If you do not have the [TT]multiprot[tt] program, get it. [TT]do_multiprot[tt] assumes",
157 "that the [TT]multiprot[tt] executable is [TT]/usr/local/bin/multiprot[tt]. If this is ",
158 "not the case, then you should set an environment variable [BB]MULTIPROT[bb]",
159 "pointing to the [TT]multiprot[tt] executable, e.g.: [PAR]",
160 "[TT]setenv MULTIPROT /usr/MultiProtInstall/multiprot.Linux[tt][PAR]",
161 "Note that at the current implementation only binary alignment (your",
162 "molecule to a reference) is supported. In addition, note that the ",
163 "by default [TT]multiprot[tt] aligns the two proteins on their C-alpha carbons.",
164 "and that this depends on the [TT]multiprot[tt] parameters which are not dealt ",
165 "with here. Thus, the C-alpha carbons is expected to give the same "
166 "results as choosing the whole protein and will be slightly faster.[PAR]",
167 "For information about [TT]multiprot[tt], see:",
168 "http://bioinfo3d.cs.tau.ac.il/MultiProt/.[PAR]"
170 static bool bVerbose;
171 t_pargs pa[] = {
172 { "-v", FALSE, etBOOL, {&bVerbose},
173 "HIDDENGenerate miles of useless information" }
176 const char *bugs[] = {
177 "The program is very slow, since multiprot is run externally"
180 t_trxstatus *status;
181 t_trxstatus *trxout=NULL;
182 FILE *tapein,*fo,*frc,*tmpf,*out=NULL,*fres=NULL;
183 const char *fnRef;
184 const char *fn="2_sol.res";
185 t_topology top;
186 int ePBC;
187 t_atoms *atoms,ratoms,useatoms;
188 t_trxframe fr;
189 t_pdbinfo p;
190 int nres,nres2,nr0;
191 real t;
192 int i,j,natoms,nratoms,nframe=0,model_nr=-1;
193 int cur_res,prev_res;
194 int nout;
195 t_countres *countres=NULL;
196 matrix box,rbox;
197 int gnx;
198 char *grpnm,*ss_str;
199 atom_id *index;
200 rvec *xp,*x,*xr;
201 char pdbfile[32],refpdb[256],title[256],rtitle[256],filemode[5];
202 char out_title[256];
203 char multiprot[256],*mptr;
204 int ftp;
205 int outftp=-1;
206 real rmsd;
207 bool bTrjout,bCountres;
208 const char *TrjoutFile=NULL;
209 output_env_t oenv;
210 static rvec translation={0,0,0},rotangles={0,0,0};
211 gmx_rmpbc_t gpbc=NULL;
213 t_filenm fnm[] = {
214 { efTRX, "-f", NULL, ffREAD },
215 { efTPS, NULL, NULL, ffREAD },
216 { efNDX, NULL, NULL, ffOPTRD },
217 { efSTX, "-r", NULL , ffREAD },
218 { efXVG, "-o", "rmss", ffWRITE },
219 { efXVG, "-rc", "rescount", ffWRITE},
220 { efXVG, "-cr", "countres", ffOPTWR},
221 { efTRX, "-ox", "aligned", ffOPTWR }
223 #define NFILE asize(fnm)
225 CopyRight(stderr,argv[0]);
226 parse_common_args(&argc,argv,PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT,
227 NFILE,fnm, asize(pa),pa, asize(desc),desc,
228 asize(bugs),bugs,&oenv
230 fnRef=opt2fn("-r",NFILE,fnm);
231 bTrjout = opt2bSet("-ox",NFILE,fnm);
232 bCountres= opt2bSet("-cr",NFILE,fnm);
234 if (bTrjout) {
235 TrjoutFile = opt2fn_null("-ox",NFILE,fnm);
238 read_tps_conf(ftp2fn(efTPS,NFILE,fnm),title,&top,&ePBC,&xp,NULL,box,FALSE);
239 gpbc = gmx_rmpbc_init(&top.idef,ePBC,top.atoms.nr,box);
240 atoms=&(top.atoms);
242 ftp=fn2ftp(fnRef);
244 get_stx_coordnum(fnRef,&nratoms);
245 init_t_atoms(&ratoms,nratoms,TRUE);
246 snew(xr,nratoms);
247 read_stx_conf(fnRef,rtitle,&ratoms,xr,NULL,&ePBC,rbox);
249 if (bVerbose) {
250 fprintf(stderr,"Read %d atoms\n",atoms->nr);
251 fprintf(stderr,"Read %d reference atoms\n",ratoms.nr);
253 if (bCountres) {
254 snew(countres,ratoms.nres);
255 j=0;
256 cur_res=0;
257 for (i=0;i<ratoms.nr;i++) {
258 prev_res=cur_res;
259 cur_res=ratoms.atom[i].resind;
260 if (cur_res != prev_res) {
261 countres[j].resnr=cur_res;
262 countres[j].count=0;
263 j++;
267 get_index(atoms,ftp2fn_null(efNDX,NFILE,fnm),1,&gnx,&index,&grpnm);
268 nres=0;
269 nr0=-1;
270 for(i=0; (i<gnx); i++) {
271 if (atoms->atom[index[i]].resind != nr0) {
272 nr0=atoms->atom[index[i]].resind;
273 nres++;
276 fprintf(stderr,"There are %d residues in your selected group\n",nres);
278 strcpy(pdbfile,"ddXXXXXX");
279 gmx_tmpnam(pdbfile);
280 if ((tmpf = fopen(pdbfile,"w")) == NULL) {
281 sprintf(pdbfile,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR,DIR_SEPARATOR);
282 gmx_tmpnam(pdbfile);
283 if ((tmpf = fopen(pdbfile,"w")) == NULL) {
284 gmx_fatal(FARGS,"Can not open tmp file %s",pdbfile);
287 else {
288 gmx_ffclose(tmpf);
291 if (ftp != efPDB) {
292 strcpy(refpdb,"ddXXXXXX");
293 gmx_tmpnam(refpdb);
294 strcat(refpdb,".pdb");
295 write_sto_conf(refpdb,rtitle,&ratoms,xr,NULL,ePBC,rbox);
297 else {
298 strcpy(refpdb,fnRef);
301 if ((mptr=getenv("MULTIPROT")) == NULL) {
302 mptr="/usr/local/bin/multiprot";
304 if (!gmx_fexist(mptr)) {
305 gmx_fatal(FARGS,"MULTIPROT executable (%s) does not exist (use setenv MULTIPROT)",
306 mptr);
308 sprintf (multiprot,"%s %s %s > /dev/null %s",
309 mptr, refpdb, pdbfile, "2> /dev/null");
311 if (bVerbose)
312 fprintf(stderr,"multiprot cmd='%s'\n",multiprot);
314 if (!read_first_frame(oenv,&status,ftp2fn(efTRX,NFILE,fnm),&fr,TRX_READ_X))
315 gmx_fatal(FARGS,"Could not read a frame from %s",ftp2fn(efTRX,NFILE,fnm));
316 natoms = fr.natoms;
318 if (bTrjout) {
319 nout=natoms;
320 /* open file now */
321 outftp=fn2ftp(TrjoutFile);
322 if (bVerbose)
323 fprintf(stderr,"Will write %s: %s\n",ftp2ext(ftp),ftp2desc(outftp));
324 strcpy(filemode,"w");
325 switch (outftp) {
326 case efXTC:
327 case efG87:
328 case efTRR:
329 case efTRJ:
330 out=NULL;
331 trxout = open_trx(TrjoutFile,filemode);
332 break;
333 case efGRO:
334 case efG96:
335 case efPDB:
336 /* Make atoms struct for output in GRO or PDB files */
337 /* get memory for stuff to go in pdb file */
338 init_t_atoms(&useatoms,nout,FALSE);
339 sfree(useatoms.resinfo);
340 useatoms.resinfo=atoms->resinfo;
341 for(i=0;(i<nout);i++) {
342 useatoms.atomname[i]=atoms->atomname[i];
343 useatoms.atom[i]=atoms->atom[i];
344 useatoms.nres=max(useatoms.nres,useatoms.atom[i].resind+1);
346 useatoms.nr=nout;
347 out=gmx_ffopen(TrjoutFile,filemode);
348 break;
352 if (natoms > atoms->nr) {
353 gmx_fatal(FARGS,"\nTrajectory does not match topology!");
355 if (gnx > natoms) {
356 gmx_fatal(FARGS,"\nTrajectory does not match selected group!");
359 fo = xvgropen(opt2fn("-o",NFILE,fnm),"RMSD","Time (ps)","RMSD (nm)",oenv);
360 frc = xvgropen(opt2fn("-rc",NFILE,fnm),"Number of Residues in the alignment","Time (ps)","Residues",oenv);
362 do {
363 t = output_env_conv_time(oenv,fr.time);
364 gmx_rmpbc(gpbc,natoms,fr.box,fr.x);
365 tapein=gmx_ffopen(pdbfile,"w");
366 write_pdbfile_indexed(tapein,NULL,atoms,fr.x,ePBC,fr.box,' ',-1,gnx,index,NULL,TRUE);
367 gmx_ffclose(tapein);
368 system(multiprot);
369 remove(pdbfile);
370 process_multiprot_output(fn, &rmsd, &nres2,rotangles,translation,bCountres,countres);
371 fprintf(fo,"%12.7f",t);
372 fprintf(fo," %12.7f\n",rmsd);
373 fprintf(frc,"%12.7f",t);
374 fprintf(frc,"%12d\n",nres2);
375 if (bTrjout) {
376 rotate_conf(natoms,fr.x,NULL,rotangles[XX],rotangles[YY],rotangles[ZZ]);
377 for(i=0; i<natoms; i++) {
378 rvec_inc(fr.x[i],translation);
380 switch(outftp) {
381 case efTRJ:
382 case efTRR:
383 case efG87:
384 case efXTC:
385 write_trxframe(trxout,&fr,NULL);
386 break;
387 case efGRO:
388 case efG96:
389 case efPDB:
390 sprintf(out_title,"Generated by do_multiprot : %s t= %g %s",
391 title,output_env_conv_time(oenv,fr.time),output_env_get_time_unit(oenv));
392 switch(outftp) {
393 case efGRO:
394 write_hconf_p(out,out_title,&useatoms,prec2ndec(fr.prec),
395 fr.x,NULL,fr.box);
396 break;
397 case efPDB:
398 fprintf(out,"REMARK GENERATED BY DO_MULTIPROT\n");
399 sprintf(out_title,"%s t= %g %s",title,output_env_conv_time(oenv,fr.time),output_env_get_time_unit(oenv));
400 /* if reading from pdb, we want to keep the original
401 model numbering else we write the output frame
402 number plus one, because model 0 is not allowed in pdb */
403 if (ftp==efPDB && fr.step > model_nr) {
404 model_nr = fr.step;
406 else {
407 model_nr++;
409 write_pdbfile(out,out_title,&useatoms,fr.x,ePBC,fr.box,' ',model_nr,NULL,TRUE);
410 break;
411 case efG96:
412 fr.title = out_title;
413 fr.bTitle = (nframe == 0);
414 fr.bAtoms = FALSE;
415 fr.bStep = TRUE;
416 fr.bTime = TRUE;
417 write_g96_conf(out,&fr,-1,NULL);
419 break;
422 nframe++;
423 } while(read_next_frame(oenv,status,&fr));
424 if (bCountres) {
425 fres= xvgropen(opt2fn("-cr",NFILE,fnm),"Number of frames in which the residues are aligned to","Residue","Number",oenv);
426 for (i=0;i<ratoms.nres;i++) {
427 fprintf(fres,"%10d %12d\n",countres[i].resnr,countres[i].count);
429 gmx_ffclose(fres);
431 gmx_ffclose(fo);
432 gmx_ffclose(frc);
433 fprintf(stderr,"\n");
434 close_trj(status);
435 if (trxout != NULL) {
436 close_trx(trxout);
438 else if (out != NULL) {
439 gmx_ffclose(out);
441 view_all(oenv,NFILE, fnm);
442 sfree(xr);
443 if (bCountres) {
444 sfree(countres);
446 free_t_atoms(&ratoms,TRUE);
447 if (bTrjout) {
448 if (outftp==efPDB || outftp==efGRO || outftp==efG96) {
449 free_t_atoms(&useatoms,TRUE);
452 gmx_thanx(stderr);
453 return 0;