#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <thread.h>
#include "dt_smiles.h"
#include "dt_finger.h"
#include "du_utils.h"
#include "mp_utils.c"
#define MAX_THREADS 20
#define TK_FINGER 1
/*** This program will use either mutexes
or the 'flockfile()' utility to protect the
Input and Output streams. They are controled
with the following switch ***/
#define MUTEX 1
/*** Globals. ***/
int max;
int min;
float dens;
/*** per-thread counts ***/
int count[MAX_THREADS];
int fpcount[MAX_THREADS];
int errcount[MAX_THREADS];
#ifdef MUTEX
/*** 'imp' protects the input stream. ***/
pthread_mutex_t imp = PTHREAD_MUTEX_INITIALIZER;
/*** 'omp' protects the output stream. ***/
pthread_mutex_t omp = PTHREAD_MUTEX_INITIALIZER;
#define LOCKI() pthread_mutex_lock(&imp);
#define UNLOCKI() pthread_mutex_unlock(&imp);
#define LOCKO() pthread_mutex_lock(&omp);
#define UNLOCKO() pthread_mutex_unlock(&omp);
#else
#define LOCKI() flockfile(stdin);
#define UNLOCKI() funlockfile(stdin);
#define LOCKO() flockfile(stdout);
#define UNLOCKO() funlockfile(stdout);
#endif
/******************************************************
* FUNCTION: do_fp
*
* DESCRIPTION:
* This is entry point for the threads which
* actually do the work.
*
* RETURNS: (void *)
*******************************************************/
void *do_fp(void *arg)
{
dt_Handle mol, fp;
char *tdtbuf, *smi, *p, *di, *diend, *tdtend, *fps;
int tdtbufsize, len, tdtlen, lens, ok, dofold;
int no, rc;
fprintf(stderr, ".");
no = (int)arg;
count[no] = errcount[no] = fpcount[no] = 0;
tdtbuf = NULL; /* initialize for du_readtdt() */
tdtbufsize = 0;
while (1)
{
LOCKI();
if (feof(stdin))
rc = 0;
else
rc = du_readtdt(stdin, &tdtlen, &tdtbuf, &tdtbufsize);
UNLOCKI();
/*** Read failed, exit out of loop ***/
if (rc != 1) break;
count[no]++;
/**** If its a TDT that isn't SMILES rooted, skip it ****/
if (0 != strncmp(tdtbuf, "$SMI<", 5))
{
LOCKO();
du_writetdt(stdout, tdtlen, tdtbuf, 1);
UNLOCKO();
continue;
}
/**** Get the SMILES and its length ****/
tdtend = tdtbuf + tdtlen; /* ptr to TDT's end */
smi = tdtbuf + 5;
if (NULL == (p = du_find_character(tdtend - smi, smi, ">")))
{
fprintf(stderr, "Aaack! This shouldn't happen!\n");
exit(1);
}
lens = p - smi;
/**** Make a molecule object ****/
if (*smi == '"')
mol = dt_smilin(lens - 2, smi + 1);
else
mol = dt_smilin(lens, smi);
if (NULL_OB == mol)
{
fprintf(stderr, "WARNING: couldn't parse SMILES\n");
errcount[no]++;
LOCKO();
du_writetdt(stdout, tdtlen, tdtbuf, 1);
UNLOCKO();
continue;
}
fpcount[no]++; /* count number FPs created */
/**** Make a fingerprint object. ****/
fp = dt_fp_generatefp(mol, 0, 7, max);
dt_fp_foldfp(fp, min, dens);
if (NULL_OB == fp)
{
fprintf(stderr, "WARNING: can't create fingerprint\n");
errcount[no]++;
LOCKO();
du_writetdt(stdout, tdtlen, tdtbuf, 1);
UNLOCKO();
continue;
}
/**** Convert fingerprint to a string ****/
fps = du_fp2str(fp);
/* Print the result. This is complicated by the
potential existance of old fingerprints in the
TDT, which we have to eliminate. */
/**** print $SMI<smiles>FP<fp> ****/
di = tdtbuf;
diend = du_find_character(tdtend - di, di, ">");
LOCKO();
printf("%.*s\n", (diend - di) + 1, di); /* print SMILES */
printf("%s\n", fps); /* print FP */
/**** print the rest of the TDT ****/
while (diend < tdtend)
{ /* loop over rest of the dataitems */
di = diend + 1;
diend = du_find_character(tdtend - di, di, ">");
/* no more '>', must be '|' */
if (NULL == diend)
diend = tdtend;
/* skip old FPs */
if (0 != strncmp(di, "FP<", 3))
printf("%.*s\n", (diend - di) + 1, di);
}
UNLOCKO();
/**** free objects and memory ****/
dt_dealloc(fp);
dt_dealloc(mol);
free(fps);
}
if (tdtbuf) free(tdtbuf);
return ((void *)1);
}
/***************************************************
* FUNCTION: main
*
* RETURNS: (int)
***************************************************/
main(int argc, char **argv)
{
pthread_t thr[MAX_THREADS];
void *status;
int ct, fpct, errct;
int ok, i;
int thr_count = 4;
/**** interpret command line arguments ****/
max = 2048;
min = 64;
dens = 0.3;
ok = 1;
switch (argc)
{
default:
case 5: if (1 != sscanf(argv[4],"%f",&dens)) ok = 0;
case 4: if (1 != sscanf(argv[3],"%d",&min)) ok = 0;
case 3: if (1 != sscanf(argv[2],"%d",&max)) ok = 0;
case 2: if (1 != sscanf(argv[1],"%d",&thr_count)) ok = 0;
break;
case 1: ok = 1;
}
if (!ok)
{
fprintf(stderr, "usage: %s [thr [max [min [dens]]]] < file.tdt\n",
argv[0]);
fprintf(stderr, "\tthr - Computation threads to use\n");
fprintf(stderr, "\tmax - FP maximum nbits (creation size)\n");
fprintf(stderr, "\tmin - FP minimum nbits (after folding)\n");
fprintf(stderr, "\tdens - FP minimum density (after folding)\n");
exit(1);
}
/**** check thread counts. Force to be in
the valid range. ****/
if (thr_count > MAX_THREADS) thr_count = MAX_THREADS;
if (thr_count < 0) thr_count = 1;
/**** check fingerprinting options. Make sure they
are in the valid range of 32 - 2^30, and that min
is smaller than max, and that the density is from
0.0 - 1.0. Note that if they are not set by command-
line options, the default values remain the same as
in previous versions of the toolkit (pre-4.42). ****/
if (min > max) min = max;
if (min < 32) min = 32;
if (max > 1073741824) max = 1073741824;
if (dens < 0.0) dens = 0.0;
if (dens > 1.0) dens = 1.0;
dt_mp_initialize();
ct = fpct = errct = 0;
for (i = 0; i < thr_count; i++)
pthread_create(&thr[i], NULL, do_fp, (void *)i);
for (i = 0; i < thr_count; i++)
pthread_join(thr[i], &status);
/*=================================================
* summary statistics.
================================================== */
fprintf(stderr, "\n");
for (i = 0; i < thr_count; i++)
{
fprintf(stderr, "#%d did %d\n", i, count[i]);
ct += count[i];
errct += errcount[i];
fpct += fpcount[i];
}
fprintf(stderr, "\n%d TDTs, %d %s added, %d %s\n",
ct,
fpct, fpct == 1 ? "fingerprint" : "fingerprints",
errct, errct == 1 ? "error" : "errors");
fprintf(stderr, "Done.\n");
thr_exit(NULL);
return(0);
}