#include <stdlib.h> #include <stdio.h> #include <string.h> #include <thread.h> #include "dt_smiles.h" #include "dt_finger.h" #include "du_utils.h" #include "mp_utils.c" #define MAX_THREADS 20 #define TK_FINGER 1 /*** This program will use either mutexes or the 'flockfile()' utility to protect the Input and Output streams. They are controled with the following switch ***/ #define MUTEX 1 /*** Globals. ***/ int max; int min; float dens; /*** per-thread counts ***/ int count[MAX_THREADS]; int fpcount[MAX_THREADS]; int errcount[MAX_THREADS]; #ifdef MUTEX /*** 'imp' protects the input stream. ***/ pthread_mutex_t imp = PTHREAD_MUTEX_INITIALIZER; /*** 'omp' protects the output stream. ***/ pthread_mutex_t omp = PTHREAD_MUTEX_INITIALIZER; #define LOCKI() pthread_mutex_lock(&imp); #define UNLOCKI() pthread_mutex_unlock(&imp); #define LOCKO() pthread_mutex_lock(&omp); #define UNLOCKO() pthread_mutex_unlock(&omp); #else #define LOCKI() flockfile(stdin); #define UNLOCKI() funlockfile(stdin); #define LOCKO() flockfile(stdout); #define UNLOCKO() funlockfile(stdout); #endif /****************************************************** * FUNCTION: do_fp * * DESCRIPTION: * This is entry point for the threads which * actually do the work. * * RETURNS: (void *) *******************************************************/ void *do_fp(void *arg) { dt_Handle mol, fp; char *tdtbuf, *smi, *p, *di, *diend, *tdtend, *fps; int tdtbufsize, len, tdtlen, lens, ok, dofold; int no, rc; fprintf(stderr, "."); no = (int)arg; count[no] = errcount[no] = fpcount[no] = 0; tdtbuf = NULL; /* initialize for du_readtdt() */ tdtbufsize = 0; while (1) { LOCKI(); if (feof(stdin)) rc = 0; else rc = du_readtdt(stdin, &tdtlen, &tdtbuf, &tdtbufsize); UNLOCKI(); /*** Read failed, exit out of loop ***/ if (rc != 1) break; count[no]++; /**** If its a TDT that isn't SMILES rooted, skip it ****/ if (0 != strncmp(tdtbuf, "$SMI<", 5)) { LOCKO(); du_writetdt(stdout, tdtlen, tdtbuf, 1); UNLOCKO(); continue; } /**** Get the SMILES and its length ****/ tdtend = tdtbuf + tdtlen; /* ptr to TDT's end */ smi = tdtbuf + 5; if (NULL == (p = du_find_character(tdtend - smi, smi, ">"))) { fprintf(stderr, "Aaack! This shouldn't happen!\n"); exit(1); } lens = p - smi; /**** Make a molecule object ****/ if (*smi == '"') mol = dt_smilin(lens - 2, smi + 1); else mol = dt_smilin(lens, smi); if (NULL_OB == mol) { fprintf(stderr, "WARNING: couldn't parse SMILES\n"); errcount[no]++; LOCKO(); du_writetdt(stdout, tdtlen, tdtbuf, 1); UNLOCKO(); continue; } fpcount[no]++; /* count number FPs created */ /**** Make a fingerprint object. ****/ fp = dt_fp_generatefp(mol, 0, 7, max); dt_fp_foldfp(fp, min, dens); if (NULL_OB == fp) { fprintf(stderr, "WARNING: can't create fingerprint\n"); errcount[no]++; LOCKO(); du_writetdt(stdout, tdtlen, tdtbuf, 1); UNLOCKO(); continue; } /**** Convert fingerprint to a string ****/ fps = du_fp2str(fp); /* Print the result. This is complicated by the potential existance of old fingerprints in the TDT, which we have to eliminate. */ /**** print $SMI<smiles>FP<fp> ****/ di = tdtbuf; diend = du_find_character(tdtend - di, di, ">"); LOCKO(); printf("%.*s\n", (diend - di) + 1, di); /* print SMILES */ printf("%s\n", fps); /* print FP */ /**** print the rest of the TDT ****/ while (diend < tdtend) { /* loop over rest of the dataitems */ di = diend + 1; diend = du_find_character(tdtend - di, di, ">"); /* no more '>', must be '|' */ if (NULL == diend) diend = tdtend; /* skip old FPs */ if (0 != strncmp(di, "FP<", 3)) printf("%.*s\n", (diend - di) + 1, di); } UNLOCKO(); /**** free objects and memory ****/ dt_dealloc(fp); dt_dealloc(mol); free(fps); } if (tdtbuf) free(tdtbuf); return ((void *)1); } /*************************************************** * FUNCTION: main * * RETURNS: (int) ***************************************************/ main(int argc, char **argv) { pthread_t thr[MAX_THREADS]; void *status; int ct, fpct, errct; int ok, i; int thr_count = 4; /**** interpret command line arguments ****/ max = 2048; min = 64; dens = 0.3; ok = 1; switch (argc) { default: case 5: if (1 != sscanf(argv[4],"%f",&dens)) ok = 0; case 4: if (1 != sscanf(argv[3],"%d",&min)) ok = 0; case 3: if (1 != sscanf(argv[2],"%d",&max)) ok = 0; case 2: if (1 != sscanf(argv[1],"%d",&thr_count)) ok = 0; break; case 1: ok = 1; } if (!ok) { fprintf(stderr, "usage: %s [thr [max [min [dens]]]] < file.tdt\n", argv[0]); fprintf(stderr, "\tthr - Computation threads to use\n"); fprintf(stderr, "\tmax - FP maximum nbits (creation size)\n"); fprintf(stderr, "\tmin - FP minimum nbits (after folding)\n"); fprintf(stderr, "\tdens - FP minimum density (after folding)\n"); exit(1); } /**** check thread counts. Force to be in the valid range. ****/ if (thr_count > MAX_THREADS) thr_count = MAX_THREADS; if (thr_count < 0) thr_count = 1; /**** check fingerprinting options. Make sure they are in the valid range of 32 - 2^30, and that min is smaller than max, and that the density is from 0.0 - 1.0. Note that if they are not set by command- line options, the default values remain the same as in previous versions of the toolkit (pre-4.42). ****/ if (min > max) min = max; if (min < 32) min = 32; if (max > 1073741824) max = 1073741824; if (dens < 0.0) dens = 0.0; if (dens > 1.0) dens = 1.0; dt_mp_initialize(); ct = fpct = errct = 0; for (i = 0; i < thr_count; i++) pthread_create(&thr[i], NULL, do_fp, (void *)i); for (i = 0; i < thr_count; i++) pthread_join(thr[i], &status); /*================================================= * summary statistics. ================================================== */ fprintf(stderr, "\n"); for (i = 0; i < thr_count; i++) { fprintf(stderr, "#%d did %d\n", i, count[i]); ct += count[i]; errct += errcount[i]; fpct += fpcount[i]; } fprintf(stderr, "\n%d TDTs, %d %s added, %d %s\n", ct, fpct, fpct == 1 ? "fingerprint" : "fingerprints", errct, errct == 1 ? "error" : "errors"); fprintf(stderr, "Done.\n"); thr_exit(NULL); return(0); }