/*********************************************************************************** * * Given a pdb file, a file that assigns secondary structure boundaries * of the protein structure and an external program that is used to calculate * free energy of folded structures, SSFold predicts the macroscopic folding * pathway of the protein. * * Predicting macroscopic protein folding pathways based on native * interactions between secondary structure elements * * Qingwu Yang and Sing-Hoi Sze * * Implemented by Qingwu Yang * April, 2007 * ************************************************************************************/ #include #include #include #include #include #include double NATIVE_CONTACT_DIST_CUTOFF = 7.0; //angstrom int NATIVE_CONTACTS_TOT = 0; typedef struct atom_t { char * s; //keep the pdb line that contains the atom int n; //id (#) of the aa that contains the atom } atom_t; typedef struct aa_t { int na; //# of atoms struct atom_t ** at; //addresses of the atoms int ca; //if ca exists 1; otherwise 0. double x_ca; double y_ca; double z_ca; } aa_t; typedef struct ss_t { char *s; //name of the ss char t; //type of the ss int n; //# of aa's int sp; //starting point of the ss int ep; //end point of the ss double e; //energy of the ss } ss_t; typedef struct ssunit { int id; //id of the ss //char t; //type of the ss struct ssunit * next; } ssunit; typedef struct sslist { struct ssunit * ss; struct sslist * next; } sslist; typedef struct mnode { double e; struct sslist * ablist; //helix & sheet struct ssunit * turn; //turn } mnode; //parse the command-line input void cmdlin_parser(int argc, char **argv,char *pdbf, char *ssbf, char *outf); //read in atoms (of the first model, if many) from a pdb file void read_pdb_atoms(atom_t ** ats, int * n, char * f); //set up aa void set_aa(aa_t ** aa, int * n, atom_t * a, int n_a); //read in ss boundaries void read_ss_boundaries(ss_t ** ss, int * n_ss, char * f); //check if two ss boundaries overlap void check_ss_overlap(ss_t * ss, int n); //given a pdb file, call rosetta to calculate energy and return the score. Extension .pdb must be in the name. double cal_tot_energy(char *f ); //given a pdb file, call rosetta to calculate energy and return the score. Extension .pdb must be in the name. //rosetta.gcc -score -s -scorefile double cal_tot_energy_2(char *f ); //calculate the energy of each ss void cal_ss_energy(ss_t * ss, int n, aa_t * as); //calculate the total energy of a list of ss's double cal_ssset_energy(ss_t * ss, ssunit * su, aa_t * as); //given an mnode calculate energy void cal_mnode_energy(mnode * o, ss_t * ss, aa_t * as); //generate the original status void create_mnode(ss_t *ss, int n, mnode **oo); //display an mnode void display_mnode(mnode * o); //output an mnode to a temporary file void output_mnode(mnode * o); //calculate # of native contacts in an mnode int cal_num_native_contacts_mnode(mnode * o, ss_t * ss, aa_t * as); //calculate # of native contacts in an ssset int cal_tot_num_native_contacts(int n_aas, aa_t * as); //calculate # of native contacts in an ssset int cal_tot_num_native_contacts_1(int n_ss, ss_t * ss, int n_aas, aa_t * as); //greedy void greedy_path(mnode *o, ss_t *ss, int n_ss, aa_t *as, char *f); int main(int argc, char ** argv){ //FILE *outf; char pdbf[50], ssbf[50], ofname[50], logf[50]; atom_t * atoms; //atoms aa_t * aas; //aa's ss_t * sss; //ss's int n_atoms, n_aas, n_sss; double e3dtot, etmp; mnode * orinode; int i; time_t time0, time1; time0 = time(NULL); //parse the command-line input cmdlin_parser(argc, argv, pdbf, ssbf, ofname); strcpy(logf, pdbf); strcat(logf, ".log"); //if( !(outf = fopen(logf, "w")) ) // fprintf(stderr, "File %s cannot be opened for writing!\n", logf), fflush(NULL), exit(0); //read in atoms (of the first model, if many) from a pdb file read_pdb_atoms(&atoms, &n_atoms, pdbf); //set up aa set_aa(&aas, &n_aas, atoms, n_atoms); //read in ss boundaries read_ss_boundaries(&sss, &n_sss, ssbf); //check if two ss boundaries overlap check_ss_overlap(sss, n_sss); //calculate the energy of each ss cal_ss_energy(sss, n_sss, aas); etmp = 0; for(i=1; i<=n_sss; i++){ //fprintf(outf, "%2d [ %5s ](%3d, %3d)[%2d] %7.2f\n", i, sss[i].s, sss[i].sp, sss[i].ep, sss[i].ep - sss[i].sp +1, sss[i].e); etmp += sss[i].e; } //fprintf(outf, "Sum of energy: %7.2f\n", etmp); //fflush(NULL); //calculate the energy of 3d structure. //e3dtot = cal_tot_energy(pdbf); e3dtot = cal_tot_energy_2(pdbf); //fprintf(outf, "Energy of native protein structure: %7.2f\n", e3dtot); fflush(NULL); //calculate # of native contacts in an ssset NATIVE_CONTACTS_TOT = cal_tot_num_native_contacts_1(n_sss, sss, n_aas, aas); if( NATIVE_CONTACTS_TOT <= 0 ) fprintf(stderr, "\nNATIVE_CONTACTS_TOT <= 0 !\n\n"), fflush(NULL), exit(0); //generate the original status create_mnode(sss, n_sss, &orinode); //display_mnode(orinode); //printf("loosetot = %7.2f\n", orinode->e); //calculate energy of a given mnode //cal_mnode_energy(orinode, sss, aas); //printf("loosetot = %7.2f\n", orinode->e); //greedy greedy_path(orinode, sss, n_sss, aas, ofname); time1 = time(NULL); //fprintf(outf, "\nTotal time: %7.2f mins\n", (double)difftime(time1, time0)/60), fflush(NULL); //fclose(outf); //printf("\n===== NATIVE_CONTACTS_TOT = %d == %d \n\n", NATIVE_CONTACTS_TOT, cal_tot_num_native_contacts(n_aas, aas)); return 0; } //check if two ss boundaries overlap void check_ss_overlap(ss_t * ss, int n){ int i,j, k=0; for(i=1; i<=n; i++){ for(j=i+1; j<=n; j++){ if( ss[i].sp <= ss[j].ep && ss[j].sp <= ss[i].ep ){ fprintf(stderr, "Boundaries of following secondary structures overlap: \n"); fprintf(stderr, "%s (%d -- %d); %s (%d -- %d)\n", ss[i].s, ss[i].sp, ss[i].ep, ss[j].s, ss[j].sp, ss[j].ep); k=1; } } } if( k ) fflush(NULL), exit(0); return; } //combine 2 sslists and create a new mnode mnode * combine2mnode(mnode *o, sslist * s, sslist * t){ mnode * m; ssunit * ssutail, * newssu, *cssu, *dssu; sslist * ssltail, * newssl, *clist, *spos; int i; if( !(m = calloc(1, sizeof(mnode))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); m->ablist = NULL; m->turn = NULL; clist = o->ablist; while( clist ){ if( clist != t ){ if( !( newssl = calloc(1, sizeof(sslist))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssl->next = NULL; newssl->ss = NULL; cssu = clist->ss; while( cssu ){ if( !( newssu = calloc(1, sizeof(ssunit))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssu->id = cssu->id; newssu->next = NULL; if( ! newssl->ss ){ ssutail = newssl->ss = newssu; } else { ssutail->next = newssu; ssutail = ssutail->next; } cssu = cssu->next; } if( ! m->ablist ){ ssltail = m->ablist = newssl; } else { ssltail->next = newssl; ssltail = ssltail->next; } if( clist == s ) spos = newssl; } clist = clist->next; } cssu = o->turn; while( cssu ){ if( !( newssu = calloc(1, sizeof(ssunit))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssu->id = cssu->id; newssu->next = NULL; if( ! m->turn ){ ssutail = m->turn = newssu; } else { ssutail->next = newssu; ssutail = ssutail->next; } cssu = cssu->next; } //add t to new s cssu = t->ss; while( cssu ){ dssu = spos->ss; while( dssu->next && dssu->next->id < cssu->id ){ dssu = dssu->next; } if( !( newssu = calloc(1, sizeof(ssunit))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssu->id = cssu->id; newssu->next = NULL; if( dssu->next ){ newssu->next = dssu->next; dssu->next = newssu; } else { dssu->next = newssu; } cssu = cssu->next; } //add turns to new s if applicable dssu = spos->ss; while( dssu->next ){ if( dssu->next->id - dssu->id == 2 ){ newssu = dssu->next; i = dssu->id + 1; cssu = m->turn; if( cssu->id == i ){ m->turn = cssu->next; } else { ssutail = m->turn; cssu = ssutail->next; while( cssu && cssu->id != i ){ ssutail = ssutail->next; cssu = cssu->next; } if( cssu ){ ssutail->next = cssu->next; } else { display_mnode(o); display_mnode(m); fprintf(stderr, "\n\nWrong here!\n\n"), fflush(NULL), exit(0); } } cssu->next = dssu->next; dssu->next = cssu; dssu = cssu->next; } else { dssu = dssu->next; } } //combine together if only 1 cclist if( m->ablist->next == NULL && m->turn ){ cssu = m->turn; while( cssu ){ m->turn = m->turn->next; dssu = m->ablist->ss; if( dssu->id > cssu->id ){ cssu->next = dssu; m->ablist->ss = cssu; } else { while( dssu->next && dssu->next->id < cssu->id ){ dssu = dssu->next; } if( ! dssu->next ){ dssu->next = cssu; cssu->next = NULL; } else { cssu->next = dssu->next; dssu->next = cssu; } } cssu = m->turn; } } return m; } //greedy recursive void greedy_recurr(mnode *o, ss_t *ss, int n_ss, int n, aa_t *as, mnode ** p){ sslist * clist, * dlist; mnode ** m; int k, i, j; double min; if( o->ablist == NULL ) return; k = n * (n-1) / 2; if( !(m = calloc(k, sizeof(mnode *))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); clist = o->ablist; i = 0; while( clist->next ){ dlist = clist->next; while( dlist ){ m[i] = combine2mnode(o, clist, dlist); cal_mnode_energy(m[i], ss, as); //output_mnode(m[i]); i++; dlist = dlist->next; } clist = clist->next; } min = 1e300; for(i=0; ie - o->e < min ){ j = i; min = m[i]->e - o->e; } } p[n_ss - n] = m[j]; //for debugging only //output_mnode(m[j]); if( n > 2 ){ greedy_recurr(m[j], ss, n_ss, n-1, as, p); } return; } //greedy void greedy_path(mnode *o, ss_t *ss, int n_ss, aa_t *as, char *f){ FILE * outf; mnode ** path; mnode * r; ss_t * ts; sslist * clist; ssunit * cunit; int i, x; clist = o->ablist; x = 0; while( clist ){ x++; clist = clist->next; } if( !(path = calloc(x-1, sizeof(mnode *))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); greedy_recurr(o, ss, x, x, as, path); if( !(outf = fopen(f, "w")) ) fprintf(stderr, "File %s cannot be opened for writing!\n", f), fflush(NULL), exit(0); //output ID's of SS /* clist = o->ablist; while( clist ){ cunit = clist->ss; fprintf(outf, "( "); while( cunit ){ fprintf(outf, "%d ", cunit->id); cunit = cunit->next; } fprintf(outf, ")"); clist = clist->next; } cunit = o->turn; while( cunit ){ fprintf(outf, "(%d)", cunit->id); cunit = cunit->next; } fprintf(outf, " %7.2f\n", o->e); for(i=0; iablist; while( clist ){ cunit = clist->ss; fprintf(outf, "( "); while( cunit ){ fprintf(outf, "%d ", cunit->id); cunit = cunit->next; } fprintf(outf, ")"); clist = clist->next; } cunit = r->turn; while( cunit ){ fprintf(outf, "(%d)", cunit->id); cunit = cunit->next; } fprintf(outf, " %7.2f\n", r->e); } */ //output names of ss clist = o->ablist; while( clist ){ cunit = clist->ss; fprintf(outf, "( "); while( cunit ){ ts = ss+ cunit->id; if( ts->t == 'H' || ts->t == 'E' || ts->t == 'S' ) //helices and sheets fprintf(outf, "%1s ", ts->s); cunit = cunit->next; } fprintf(outf, ")"); clist = clist->next; } /* cunit = o->turn; while( cunit ){ fprintf(outf, "(%d)", cunit->id); cunit = cunit->next; } */ fprintf(outf, " %7.2f ", o->e); fprintf(outf, " %7.2f \n", (double)cal_num_native_contacts_mnode(o, ss, as)/NATIVE_CONTACTS_TOT); for(i=0; iablist; while( clist ){ cunit = clist->ss; fprintf(outf, "( "); while( cunit ){ ts = ss + cunit->id; if( ts->t == 'H' || ts->t == 'E' || ts->t == 'S' ) //helices and sheets fprintf(outf, "%1s ", ts->s); //fprintf(outf, "%1s ", ss[cunit->id].s); cunit = cunit->next; } fprintf(outf, ")"); clist = clist->next; } /* cunit = r->turn; while( cunit ){ fprintf(outf, "(%d)", cunit->id); cunit = cunit->next; } */ fprintf(outf, " %7.2f ", r->e); fprintf(outf, " %7.2f \n", (double)cal_num_native_contacts_mnode(r, ss, as)/NATIVE_CONTACTS_TOT); } fclose(outf); return; } //calculate energy of a given mnode void cal_mnode_energy(mnode * o, ss_t * ss, aa_t * as){ sslist * clist; ssunit * cunit; double e=0; clist = o->ablist; while( clist ){ cunit = clist->ss; if( cunit->next == NULL){ e += ss[ cunit->id ].e ; } else { e += cal_ssset_energy(ss, cunit, as); } clist = clist->next; } cunit = o->turn; while( cunit ){ e += ss[ cunit->id ].e ; cunit = cunit->next; } o->e = e; return; } //calculate the total energy of a list of ss's double cal_ssset_energy(ss_t * ss, ssunit * su, aa_t * as){ FILE * f; char name[] = "xxtmptmp.pdb"; char name1[] = "xxtmptmp_0001.pdb"; int j, k; double e; if( !(f = fopen(name, "w")) ) fprintf(stderr, "File %s cannot be opened for writing!\n", name), fflush(NULL), exit(0); while ( su ){ for(j=ss[ su->id ].sp; j<=ss[ su->id ].ep; j++){ for(k=1; k<=as[j].na; k++) fprintf(f, "%s", (as[j].at)[k]->s); } su = su->next; } fclose(f); //e = cal_tot_energy(name); e = cal_tot_energy_2(name); remove(name); remove(name1); return e; } //calculate the energy of each ss void cal_ss_energy(ss_t * ss, int n, aa_t * as){ FILE * f; char name[] = "xxtmptmp.pdb"; char name1[] = "xxtmptmp_0001.pdb"; int i, j, k; for(i=1; i<=n; i++){ if( !(f = fopen(name, "w")) ) fprintf(stderr, "File %s cannot be opened for writing!\n", name), fflush(NULL), exit(0); for(j=ss[i].sp; j<=ss[i].ep; j++){ for(k=1; k<=as[j].na; k++) fprintf(f, "%s", (as[j].at)[k]->s); } fclose(f); //ss[i].e = cal_tot_energy(name); ss[i].e = cal_tot_energy_2(name); remove(name); remove(name1); } return; } //output an mnode to a temporary file void output_mnode(mnode * o){ FILE * outf; sslist * clist; ssunit * cunit; if( !(outf = fopen("tempmnode", "a")) ) fprintf(stderr, "File tempmnode can't be opened for writing!\n"), fflush(NULL), exit(0); clist = o->ablist; fprintf(outf, "\n\nHelix and sheets:\n"); while( clist ){ cunit = clist->ss; fprintf(outf, "\n"); while( cunit ){ fprintf(outf, "%d ", cunit->id); cunit = cunit->next; } clist = clist->next; } cunit = o->turn; fprintf(outf, "\nTurns:\n"); while( cunit ){ fprintf(outf, "%d ", cunit->id); cunit = cunit->next; } fprintf(outf, "\n%7.2f \n", o->e); fflush(NULL); return; } //display an mnode void display_mnode(mnode * o){ sslist * clist; ssunit * cunit; clist = o->ablist; fprintf(stdout, "\nHelix and sheets:\n"); while( clist ){ cunit = clist->ss; while( cunit ){ fprintf(stdout, "%d \n", cunit->id); cunit = cunit->next; } clist = clist->next; } cunit = o->turn; fprintf(stdout, "\nTurns:\n"); while( cunit ){ fprintf(stdout, "%d \n", cunit->id); cunit = cunit->next; } return; } //generate the original status void create_mnode(ss_t *ss, int n, mnode **oo){ ssunit * ssutail, * newssu; sslist * ssltail, * newssl; mnode * o; int i; if( !(*oo = o = calloc(1, sizeof(mnode))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); o->ablist = NULL; //no ss o->turn = NULL; //no turns o->e = 0; //energy for(i=1; i<=n; i++){ o->e += ss[i].e; if( ss[i].t == 'H' || ss[i].t == 'E' || ss[i].t == 'S' ){ //helices and sheets if( ! o->ablist ){ if( !( newssl = calloc(1, sizeof(sslist))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssl->next = NULL; newssl->ss = NULL; if( !( newssu = calloc(1, sizeof(ssunit))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssu->id = i; //newssu->t = ss[i].t; newssu->next = NULL; newssl->ss = newssu; o->ablist = newssl; ssltail = o->ablist; } else { if( !( newssl = calloc(1, sizeof(sslist))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssl->next = NULL; newssl->ss = NULL; if( !( newssu = calloc(1, sizeof(ssunit))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssu->id = i; //newssu->t = ss[i].t; newssu->next = NULL; newssl->ss = newssu; ssltail->next = newssl; ssltail = ssltail->next; } } else { //turns if( ! o->turn ){ if( !( newssu = calloc(1, sizeof(ssunit))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssu->id = i; //newssu->t = ss[i].t; newssu->next = NULL; o->turn = newssu; ssutail = o->turn; } else { if( !( newssu = calloc(1, sizeof(ssunit))) ) fprintf(stderr, "out of memory!"), fflush(NULL), exit(0); newssu->id = i; //newssu->t = ss[i].t; newssu->next = NULL; ssutail->next = newssu; ssutail = ssutail->next; } } } return; } //given a pdb file, call rosetta to calculate energy and return the score. Extension .pdb must be in the name. //rosetta.gcc -score -s -scorefile double cal_tot_energy_2(char *f ){ FILE * inf; char cmdline[300]; char fname[200], buff[4096]; double d; //, e //int i; strcpy(fname, "en_en_en_tmp.sc"); sprintf(cmdline, "rosetta.gcc -score -s %s -scorefile en_en_en_tmp", f); system( cmdline ); if( !(inf = fopen(fname, "r")) ) fprintf(stderr, "File %s cannot be opened for reading!\n", fname), fflush(NULL), exit(0); d = 0; while( fgets(buff, 4096, inf) ){ //filename score env pair vdw hs ss sheet cb rsigma hb_srbb hb_lrbb rg co rama description //1BDD.pdb -17.26 -22.42 0.82 0.33 0.24 0.00 3.78 24.77 0.00 -27.83 0.00 12.70 14.22 164.01 input if( strstr(buff, "filename") ){ if( fgets(buff, 4096, inf) ){ if( sscanf(buff, " %*s %lf %*s ", &d) != 1 ) fprintf(stderr, "Wrong energy dataline in file en_en_en_tmp.sc\n\n"), fflush(NULL), exit(0); fclose(inf); remove(fname); return d; } else { fprintf(stderr, "No energy dataline in file en_en_en_tmp.sc\n\n"), fflush(NULL), exit(0); } } } fprintf(stderr, "\n\n\n*******\nNO energy file en_en_en_tmp.sc\n\n\n\n"), fflush(NULL), exit(0); return d; } //given a pdb file, call rosetta to calculate energy and return the score. Extension .pdb must be in the name. //rosetta.gcc -s %s -design -natrot double cal_tot_energy(char *f ){ FILE * inf; char cmdline[200]; char fname[200], buff[4096]; double d, e; int i; strcpy(fname, f); for(i=(int)strlen(fname)-4; i>0; i--){ if( fname[i] == '.' && fname[i+1] == 'p' && fname[i+2] == 'd' && fname[i+3] == 'b' ){ fname[i] = '\0'; break; } } if( i == 0 ) fprintf(stderr, "File extension name must be .pdb ( %s )!\n", f), fflush(NULL), exit(0); strcat(fname, "_0001.pdb"); sprintf(cmdline, "rosetta.gcc -s %s -design -natrot", f); system( cmdline ); if( !(inf = fopen(fname, "r")) ) fprintf(stderr, "File %s cannot be opened for reading!\n", fname), fflush(NULL), exit(0); d = 0; while( fgets(buff, 4096, inf) ){ /* //use score as energy if( strstr(buff, "score:") ){ fclose(inf); if( sscanf(buff, " score: %lf ", &d) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); return d; } */ /* unused scores bk_tot: the total score using the design energy function (lower is better) fa_intrares: intra-residue clashes fa_prob: P(aa|phi,psi), ramachandran preferences */ //fa_atr: attractive portion of the lennard-jones potential (rewards close contacts) if( strstr(buff, "fa_atr:") ){ if( sscanf(buff, " fa_atr: %lf ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (e * 0.77); } //fa_rep: lennard-jones repulsive (penalizes overlaps) if( strstr(buff, "fa_rep:") ){ if( sscanf(buff, " fa_rep: %lf ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (e * 0.54); } //fa_sol: lazaridis-karplus solvation model (penalizes buried polars) //* if( strstr(buff, "fa_sol:") ){ if( sscanf(buff, " fa_sol: %lf ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (e * 0.61); } //*/ //fa_pair: statistics based pair term, favors salt bridges //* if( strstr(buff, "fa_pair:") ){ if( sscanf(buff, " fa_pair: %lf ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (e * 0.23); } //*/ //hb_srbb: backbone-backbone hbonds close in primary sequence if( strstr(buff, "hb_srbb:") ){ if( sscanf(buff, " hb_srbb: %lf %*s ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (e * 2.59); } //hb_lrbb: backbone-backbone hbonds distant in primary sequence if( strstr(buff, "hb_lrbb:") ){ if( sscanf(buff, " hb_lrbb: %lf %*s ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (e * 2.59); } //hb_sc: sidechain-sidechain and sidechain-backbone hydrogen bond energy if( strstr(buff, "hb_sc:") ){ if( sscanf(buff, " hb_sc: %lf ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (e * 2.59); } //fa_dun: internal energy of side chain rotamers as derived from dunbrack's statistics //* if( strstr(buff, "fa_dun:") ){ if( sscanf(buff, " fa_dun: %lf ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (e * 0.75); } //*/ //fa_ref if( strstr(buff, "fa_ref:") ){ if( sscanf(buff, " fa_ref: %lf ", &e) != 1 ) fprintf(stderr, "Wrong dataline in file %s \n%s\n", fname, buff), fflush(NULL), exit(0); d += (- e); } } fclose(inf); return d; //fprintf(stderr, "No score line in file %s\n", fname), fflush(NULL), exit(0); //return 1e200; } //read in ss boundaries void read_ss_boundaries(ss_t ** ss, int * n_ss, char * f){ FILE * inf; ss_t *s; int n; char buff[4096], p[100]; if( !(inf = fopen(f, "r")) ) fprintf(stderr, "The secondary structure file %s cannot be opened for reading!\n", f), fflush(NULL), exit(0); n=0; while( fgets(buff, 4096, inf) ){ if(strstr(buff, ">") ) n++; } *n_ss = n; if( !(*ss = s = calloc(n +1, sizeof(ss_t))) ) fprintf(stderr, "out of memory!\n"), fflush(NULL), exit(0); rewind(inf); n=1; while( fgets(buff, 4096, inf) ){ if(strstr(buff, ">") ){ if( sscanf(buff, " >%s ", p) != 1) fprintf(stderr, "Wrong data line:\n%s\n", buff), fflush(NULL) ,exit(0); if( !( s[n].s = calloc(strlen(p)+1, sizeof(char))) ) fprintf(stderr, "out of memory!\n"), fflush(NULL), exit(0); strcpy(s[n].s, p); while( fgets(buff, 4096, inf) && strlen(buff) > 4 ){ if( sscanf(buff, " %d, %d, %c", &(s[n].sp), &(s[n].ep), &(s[n].t)) == 3 ){ //}else if( sscanf(buff, " %d, %d ", &(s[n].sp), &(s[n].ep)) == 2 ){ } else fprintf(stderr, "Wrong data line:\n%s\n", buff), fflush(NULL) ,exit(0); s[n].n = s[n].ep - s[n].sp + 1; s[n].t = toupper( s[n].t ); break; } n++; } } fclose(inf); return; } //set up aa void set_aa(aa_t ** aa, int * n, atom_t * a, int n_a){ aa_t * t; int i, m; char str[10]; m = 0; for(i=1; i<=n_a; i++){ if( a[i].n > m ) m = a[i].n; } *n = m; if( !(*aa = t = calloc(m+1, sizeof(aa_t))) ) fprintf(stderr, "out of memory!\n"), fflush(NULL), exit(0); for(i=0; i<=m; i++){ t[i].na = 0; t[i].at = NULL; } for(i=1; i<=n_a; i++){ t[ a[i].n ].na ++; } for(i=0; i<=m; i++){ if( !( t[i].at = calloc(t[i].na +1, sizeof(atom_t *))) ) fprintf(stderr, "out of memory!\n"), fflush(NULL), exit(0); t[i].na = 1; t[i].ca = 0; } for(i=1; i<=n_a; i++){ if( a[i].s == NULL || a[i].n < 0 ) continue; (t[ a[i].n ].at)[ t[ a[i].n ].na ] = a + i; t[ a[i].n ].na ++; } for(i=0; i<=m; i++){ t[i].na --; } //get coordinates of c_alpha for(i=1; i<=n_a; i++){ if( a[i].s == NULL || a[i].n < 0 ) continue; if( ! strncmp(a[i].s +12, " CA ", 4) ){ t[ a[i].n ].ca = 1; strncpy(str, a[i].s +30, 8); str[8] = '\0'; t[ a[i].n ].x_ca = atof(str); strncpy(str, a[i].s +38, 8); str[8] = '\0'; t[ a[i].n ].y_ca = atof(str); strncpy(str, a[i].s +46, 8); str[8] = '\0'; t[ a[i].n ].z_ca = atof(str); } } return; } //read in atoms (of the first model, if many) from a pdb file void read_pdb_atoms(atom_t ** ats, int * n, char * f){ FILE * inf; atom_t * a; char buff[4096]; int m,k,t,i, cond; if( !(inf = fopen(f, "r")) ) fprintf(stderr, "The pdb file %s cannot be opened for reading!\n", f), fflush(NULL), exit(0); cond=0; m = 0; while( fgets(buff, 4096, inf) ){ if( buff[0] == 'A' && buff[1] == 'T' && buff[2] == 'O'&& buff[3] == 'M'){ if( sscanf(buff, "ATOM %d %*s", &k) == 1 ) { if ( k > m ) m = k; } else { fprintf(stderr, "Data error in line %s\n", buff), fflush(NULL), exit(0); } if( !cond ) cond = 1; } else { if( cond == 1 ) break; } } *n = m; if( !(*ats = a = calloc(m+1, sizeof(atom_t))) ) fprintf(stderr, "out of memory!\n"), fflush(NULL), exit(0); for(i=0; i>>>>>>>>>>>>>>>>> NATIVE CONTACTS >>>>>>>>>>>>>>>>>>>>>>>>>>>>> double square(double x){ return (x*x); } //calculate # of native contacts in an ssset int cal_tot_num_native_contacts(int n_aas, aa_t * as) { int i, j, n; n = 0; for(i=1; i<=n_aas; ++i){ for(j=i+1; j<=n_aas; ++j){ if( sqrt( square(as[i].x_ca - as[j].x_ca) + square(as[i].y_ca - as[j].y_ca) + square(as[i].z_ca - as[j].z_ca) ) <= NATIVE_CONTACT_DIST_CUTOFF ){ ++ n; } } } return n; } //calculate # of native contacts in an ssset int cal_tot_num_native_contacts_1(int n_ss, ss_t * ss, int n_aas, aa_t * as) { int i, j, n; int n_aaids = 0; int * aaids; for(i=1; i<=n_ss; ++i) n_aaids += (ss[i].ep - ss[i].sp + 1); if( !(aaids = calloc(n_aaids, sizeof(int))) ) fprintf(stderr, "\nout of memory\n"), fflush(NULL), exit(0); n = 0; for(i=1; i<=n_ss; ++i){ for(j=ss[i].sp; j<=ss[i].ep; j++){ aaids[ n ] = j; ++ n; } } n = 0; for(i=0; iid ].sp; j<=ss[ tmpsu->id ].ep; j++){ ++ n_aaids; } tmpsu = tmpsu->next; } if( !(aaids = calloc(n_aaids, sizeof(int))) ) fprintf(stderr, "\nout of memory\n"), fflush(NULL), exit(0); tmpsu = su; n = 0; while ( tmpsu ){ //printf("\nssunit:%s (%1d -> %1d): ", ss[tmpsu->id].s,ss[ tmpsu->id ].sp, ss[ tmpsu->id ].ep); for(j=ss[ tmpsu->id ].sp; j<=ss[ tmpsu->id ].ep; j++){ aaids[ n ] = j; ++ n; } tmpsu = tmpsu->next; } //printf("\nn_aaids = %d", n_aaids); n = 0; for(i=0; i %1d): ", ss[su->id].s,ss[su->id].sp, ss[su->id].ep); for(i=ss[su->id].sp; i<=ss[su->id].ep; ++i){ for(j=i+1; j<=ss[su->id].ep; ++j){ if( sqrt( square(as[i].x_ca - as[j].x_ca) + square(as[i].y_ca - as[j].y_ca) + square(as[i].z_ca - as[j].z_ca) ) <= NATIVE_CONTACT_DIST_CUTOFF ){ //printf("\n %d -- %d ", i, j); ++ n; } } } return n; } //calculate # of native contacts in an mnode int cal_num_native_contacts_mnode(mnode * o, ss_t * ss, aa_t * as) { int n = 0, m; sslist * clist; ssunit * cunit; //printf("\n================================= mnode =================================\n"); clist = o->ablist; while( clist ){ m = cal_num_native_contacts_ssset(clist->ss, ss, as); //printf("%d ", m); n += m; clist = clist->next; } //printf(" >< "); cunit = o->turn; while( cunit ){ //calculate # of native contacts in an ssunit m = cal_num_native_contacts_ssunit(cunit, ss, as); //printf("%d ", m); n += m; cunit = cunit->next; } return n; } //<<<<<<<<<<<<<<<<<<<<<<<<<<<< NATIVE CONTACTS <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<