/* -*-C-*-*/ /********************************************************************** * This source code is public domain, and may be freely distributed, * * modified, and used for any purpose. IT COMES WITH ABSOLUTELY NO * * WARRANTY OF ANY KIND. * **********************************************************************/ /*+====================================================================== | FILE: findrxn.c | DESCRIPTION: | Given a reactant and product, find all reaction trees. This | finds reactions which may have more components than just the | reactant and product. For example, given the | reactant: CC(=O)O and product: CC(=O)OCC, this finds all of | the following reaction SMILES: | | CC(=O)O>>CC(=O)OCC | CC(=O)O>>CC(=O)OCC.O | CC(=O)O.OCC>>CC(=O)OCC.O | | If we just looked for the reaction by reaction SMILES, we'd | only find the first one. +====================================================================== */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include "dt_smiles.h" #include "dt_thor.h" #include "du_utils.h" #define MX_SMILES 8000 /*============================================================================ * crash_and_burn() -- write errors to stderr and exit program in error */ static void crash_and_burn(dt_String s) { dt_errorsave("findpath", DX_ERR_ERROR, s); du_printerrors(stderr, 0); exit(1); } /*============================================================================ * */ main(int argc, char *argv[]) { dt_Handle server, db; dt_Handle smidt, rmoldt, pmoldt, rsos, psos, sa, sb, seq; char *vala, *valb, rdbname[100], host[100]; int oops, lena, lenb, more; char rmol[MX_SMILES], pmol[MX_SMILES]; /**** Set host and database names from arguments or set defaults. ****/ gethostname(host, 100); strcpy(rdbname, "chemreact"); if (argc > 1) strcpy(host, argv[1]); if (argc > 2) strcpy(rdbname, argv[2]); /**** Connect to server and open database readonly. ****/ server = dt_thor_connect(host, "5555", "thor", ""); printf("Connected to %s, opening database...\n", host); db = dt_thor_open(server, rdbname, "r", ""); if (server == NULL_OB || db == NULL_OB) crash_and_burn("Can't open reaction database"); printf("Reaction database %s opened.\n", rdbname); /**** Get the datatype objects ****/ smidt = dt_thor_getdatatype(db, 4, "$SMI"); if (NULL_OB == smidt) crash_and_burn("Can't find SMILES datatype!"); rmoldt = dt_thor_getdatatype(db, 5, "$RMOL"); if (NULL_OB == rmoldt) crash_and_burn("Can't find Reactant molecule datatype!"); pmoldt = dt_thor_getdatatype(db, 5, "$PMOL"); if (NULL_OB == pmoldt) crash_and_burn("Can't find Product molecule datatype!"); /**** Read from input, print TDTs to output ****/ while (1) { /*** Get the cross reference sequences of strings for the reactant and product molecule ***/ printf("Enter a reactant molecule:\n"); if (NULL == gets(rmol)) break; rsos = dt_thor_xrefget(db, rmoldt, strlen(rmol), rmol, &oops); if (NULL_OB == rsos) { printf("Reactant not found. Try again.\n"); if (dt_errorworst() > 0) du_printerrors(stderr, 0); continue; } printf("Enter a product molecule:\n"); if (NULL == gets(pmol)) break; psos = dt_thor_xrefget(db, pmoldt, strlen(pmol), pmol, &oops); if (NULL_OB == psos) { printf("Product not found. Try again.\n"); if (dt_errorworst() > 0) du_printerrors(stderr, 0); continue; } /*** zap the first in each (its the molecule itself) ***/ sa = dt_next(rsos); sb = dt_next(psos); dt_delete(rsos); dt_delete(psos); dt_dealloc(sa); dt_dealloc(sb); /*** Sort'em ***/ dt_seqsort(rsos); dt_seqsort(psos); dt_reset(rsos); dt_reset(psos); printf("Reactant found in %d reactions.\n", dt_count(rsos, TYP_ANY)); printf("Product found in %d reactions.\n", dt_count(psos, TYP_ANY)); seq = dt_alloc_seq(); more = TRUE; sa = dt_next(rsos); sb = dt_next(psos); if ((sa == NULL_OB) && (sb == NULL_OB)) more = FALSE; else more = TRUE; /*** Compare them until we run out. Deallocate them as we go. ***/ while (more) { vala = dt_stringvalue(&lena, sa); valb = dt_stringvalue(&lenb, sb); /*** SMILES are same. ***/ if ((lena == lenb) && (strncmp(vala, valb, lena) == 0)) { dt_append(seq, dt_copy(sa)); dt_dealloc(sa); dt_dealloc(sb); if (NULL_OB == (sa = dt_next(rsos))) more = FALSE; if (NULL_OB == (sb = dt_next(psos))) more = FALSE; } /*** SMILES are not the same. Throw away the 'lowest' one of the two sequences, and compare again. ***/ else if (strncmp(vala, valb, DU_MIN(lena, lenb)) == 0) { if (lena < lenb) { dt_dealloc(sa); if (NULL_OB == (sa = dt_next(rsos))) more = FALSE; } else { dt_dealloc(sb); if (NULL_OB == (sb = dt_next(psos))) more = FALSE; } } else if (strncmp(vala, valb, DU_MIN(lena, lenb)) < 0) { dt_dealloc(sa); if (NULL_OB == (sa = dt_next(rsos))) more = FALSE; } else { dt_dealloc(sb); if (NULL_OB == (sb = dt_next(psos))) more = FALSE; } } /*** Deallocate any left ***/ if (sa != NULL_OB) { dt_dealloc(sa); while (NULL_OB != (sa = dt_next(rsos))) dt_dealloc(sa); } if (sb != NULL_OB) { dt_dealloc(sb); while (NULL_OB != (sb = dt_next(psos))) dt_dealloc(sb); } /*** If any were found, print them out. ***/ if (dt_count(seq, TYP_ANY) != 0) { dt_reset(seq); while (NULL_OB != (sa = dt_next(seq))) { vala = dt_stringvalue(&lena, sa); printf("Reaction Found: %.*s\n", lena, vala); } } else printf("No reactions found.\n"); dt_dealloc(seq); } /*** Be polite to thor server; close connection before exiting. ***/ dt_dealloc(server); return 0; }