MUG '03 -- 25 - 28 Feb, 2003

Daycart Databases

Dave Wilbur
Daylight CIS, Inc.

base design for daylight daycart databases

show some examples using ACD


What was considered for the base design

smilesC/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
substructureALKALOID; CARBOLINE; OLEFIN; QUINOLIZINE; PHENOL; COND.RING
smilesCOc1cc(ccc1O)C2Oc3c(OC)cc(cc3O[C@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
substructureALCOHOL; ARYLKETONE; BH-LINKED-CC; PHENOL-ETHER; POLYPHENOL; DIOXANE; KETONE,CYCLIC; FLAVONOID; COND.RING
smilesCOc1cc(ccc1O)[C@H]2Oc3cc(cc(OC)c3O[C@@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
substructureALCOHOL; ARYLKETONE; COND.RING; POLYPHENOL; PHENOL-ETHER; KETONE,CYCLIC; FLAVONOID; DIOXANE; BH-LINKED-CC
smilesBrc1ccc2c(c[nH]c2c1)c3cnc([nH]3)C(=O)c4c[nH]c5ccccc45
substructureALKALOID; ARYLBROMIDE; IMIDAZOLE; KETONE; HH-LINKED-CC; BENZIMIDAZOLE

lets say that we were looking for structures that were marked in the substructure field of type PHENOL:

select 
	smiles
from
	a_wdi_table
where
	substructure like '%PHENOL%'
;

SMILES
--------------------------------------------------------------------------------
COc1cc(ccc1O)C2Oc3c(OC)cc(cc3O[C@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
COc1cc(ccc1O)[C@H]2Oc3cc(cc(OC)c3O[C@@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56

not only does this query take a long time, it gives us the wrong answer because it also would say PHENOL-ETHER and POLYPHENOL were correct answers too. to get the correct answer we would have to type:

select 
	smiles
from
	a_wdi_table
where
	substructure like '%; PHENOL;%'
;

SMILES
--------------------------------------------------------------------------------
C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56

and this gives us the correct answer... assuming that there were no typo's like '; PHENOL;' or '; PHENOL ;'

Using a Master Detail

select * from compound;

SID SMILES
- ------------------------------------------------------------------------
1 C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
2 COc1cc(ccc1O)C2Oc3c(OC)cc(cc3O[C@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
3 COc1cc(ccc1O)[C@H]2Oc3cc(cc(OC)c3O[C@@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
4 Brc1ccc2c(c[nH]c2c1)c3cnc([nH]3)C(=O)c4c[nH]c5ccccc45

select * from compound_has_substructure;

COMPOUND_SID SUBSTRUCTURE
------------ ------------------------------
           1 ALKALOID
           1 CARBOLINE
           1 OLEFIN
           1 QUINOLIZINE
           1 PHENOL
           1 COND.RING
           2 ALCOHOL
           2 ARYLKETONE
           2 BH-LINKED-CC
           2 PHENOL-ETHER
           2 POLYPHENOL
           2 DIOXANE
           2 KETONE,CYCLIC
           2 FLAVONOID
           2 COND.RING
           3 ALCOHOL
           3 ARYLKETONE
           3 COND.RING
           3 POLYPHENOL
           3 PHENOL-ETHER
           3 KETONE,CYCLIC
           3 FLAVONOID
           3 DIOXANE
           3 BH-LINKED-CC
           4 ALKALOID
           4 ARYLBROMIDE
           4 IMIDAZOLE
           4 KETONE
           4 HH-LINKED-CC
           4 BENZIMIDAZOLE
select 
	c.smiles 
from 
	compound c, 
	compound_has_substructure cs
where
	c.substructure = 'PHENOL'
and c.sid = cs.compound_sid;

SMILES
--------------------------------------------------------------------------------
C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56

Using Many to Many

select * from compound;

SID SMILES
- ------------------------------------------------------------------------
1 C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
2 COc1cc(ccc1O)C2Oc3c(OC)cc(cc3O[C@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
3 COc1cc(ccc1O)[C@H]2Oc3cc(cc(OC)c3O[C@@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
4 Brc1ccc2c(c[nH]c2c1)c3cnc([nH]3)C(=O)c4c[nH]c5ccccc45

select * from substructure;

       SID SUBSTRUCTURE
---------- ------------------------------
         1 ALCOHOL
         2 ALKALOID
         3 ARYLBROMIDE
         4 ARYLKETONE
         5 BENZIMIDAZOLE
         6 BH-LINKED-CC
         7 CARBOLINE
         8 COND.RING
         9 DIOXANE
        10 FLAVONOID
        11 HH-LINKED-CC
        12 IMIDAZOLE
        13 KETONE
        14 KETONE,CYCLIC
        15 OLEFIN
        16 PHENOL
        17 PHENOL-ETHER
        18 POLYPHENOL
        19 QUINOLIZINE

select * from compound_has_substructure;

COMPOUND_SID SUBSTRUCTURE_SID
------------ ----------------
           1                2
           1                7
           1                8
           1               16
           1               19
           1               15
           2                1
           2                9
           2               18
           2               17
           2               14
           2               10
           2                8
           2                4
           2                6
           3                1
           3               18
           3               17
           3               14
           3               10
           3                8
           3                6
           3                4
           3                9
           4                2
           4                5
           4               11
           4               13
           4               12
           4                3
select 
	c.smiles 
from 
	compound c, 
	compound_has_substructure cs,
	substructure s
where
	s.substructure = 'PHENOL'
and c.sid = cs.compound_sid
and s.sid = cs.substructure_sid;

SMILES
--------------------------------------------------------------------------------
C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56

Base Design

describe COMPOUND;
 Name                     Null?    Type
 ------------------------ -------- --------------
 SID                      NOT NULL NUMBER(7)
 SMILES                   NOT NULL VARCHAR2(866)

describe COMPOUND_NAME;
 Name                     Null?    Type
 ------------------------ -------- --------------
 COMPOUND_SID             NOT NULL NUMBER(7)
 NAME                     NOT NULL VARCHAR2(189)
 COMPOUND_NAME_TYPE                NUMBER(1)

describe COMPOUND_DEPICTION;
 Name                     Null?    Type
 ------------------------ -------- --------------
 COMPOUND_SID             NOT NULL NUMBER(7)
 COMPOUND_DEPICTION_TYPE  NOT NULL NUMBER(1)
 DEPICTION                NOT NULL CLOB

describe COMPOUND_NAME_TYPE;
 Name                     Null?    Type
 ------------------------ -------- --------------
 TYPE                     NOT NULL NUMBER(1)
 DESCRIPTION              NOT NULL VARCHAR2(100)

describe COMPOUND_DEPICTION_TYPE;
 Name                     Null?    Type
 ------------------------ -------- --------------
 TYPE                     NOT NULL NUMBER(1)
 DESCRIPTION              NOT NULL VARCHAR2(100)

Using the Base Design for ACD

describe COMPOUND;
 Name                     Null?    Type
 ------------------------ -------- --------------
 SID                      NOT NULL NUMBER(7)
 AVG_MOLWT                NOT NULL NUMBER(8,4)
 WARNING                           VARCHAR2(51)
 SMILES                   NOT NULL VARCHAR2(866)

describe SUPPLIER_ITEM;
 Name                     Null?    Type
 ------------------------ -------- --------------
 COMPOUND_SID             NOT NULL NUMBER(7)
 SUPPLIER_SID             NOT NULL NUMBER(7)
 SUPPLIER_SKU             NOT NULL VARCHAR2(12)
 COMPOUND_NAME            NOT NULL VARCHAR2(253)
 GRADE                    NOT NULL VARCHAR2(4)
 PURITY                            VARCHAR2(10)
 COST                              NUMBER(9,3)
 COST_UNITS                        VARCHAR2(32)
 AMOUNT                            NUMBER(20,10)
 AMOUNT_UNITS                      VARCHAR2(32)
 NOTE                              VARCHAR2(4000)
 UPDATED                           VARCHAR2(4)

describe SUPPLIER;
 Name                     Null?    Type
 ------------------------ -------- --------------
 SID                      NOT NULL NUMBER(7)
 NAME                     NOT NULL VARCHAR2(12)

describe SUPPLIER_ADDRESS;
 Name                     Null?    Type
 ------------------------ -------- --------------
 SUPPLIER_SID             NOT NULL NUMBER(7)
 ADDRESS_NAME                      VARCHAR2(100)
 ADDRESS_LINE_ONE                  VARCHAR2(100)
 ADDRESS_LINE_TWO                  VARCHAR2(100)
 CITY                              VARCHAR2(100)
 STATE_PROVINCE                    VARCHAR2(100)
 POSTAL_CODE                       VARCHAR2(30)
 COUNTRY                           VARCHAR2(50)

describe SUPPLIER_PHONE;
 Name                     Null?    Type
 ------------------------ -------- --------------
 SUPPLIER_SID             NOT NULL NUMBER(7)
 SUPPLIER_ADDRESS_SID              NUMBER(7)
 PHONE_TYPE                        NUMBER(1)
 PHONE                             VARCHAR2(30)

describe PHONE_TYPE;
 Name                     Null?    Type
 ------------------------ -------- --------------
 PHONE_TYPE               NOT NULL NUMBER(1)
 DESCRIPTION              NOT NULL VARCHAR2(30)

What names are associated with [O-][N+](=O)c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5?

select
	cn.name
from
	compound c,
	compound_name cn
where
	c.smiles = '[O-][N+](=O)c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5'
and c.sid = cn.compound_sid


NAME
--------------------------------------------------------------------------------
2-FURAN-2-YL-4-(4-NITRO-PH)-1,9B-2H-5-OXA-3,3A-DIAZA-CYCLOPENTA(A)NAPHTHALENE
2FURAN2YL44NITROPH19B2H5OXA33ADIAZACYCLOPENTAANAPHTHALENE
R453064

What is the D2D depiction for CAS id 645-96-5?

select
	c.smiles, 
	cd.depiction
from
	compound c,
	compound_name cn,
	compound_depiction cd,
	compound_name_type cnt,
	compound_depiction_type cdt
where
	cn.name = '645-96-5'
and	cnt.description = 'CAS'
and	cdt.description = 'D2D'
and	cn.compound_name_type = cnt.type
and	cd.compound_depiction_type = cdt.type
and c.sid = cn.compound_sid
and c.sid = cd.compound_sid
;


SMILES			DEPICTION
--------------- ----------------------------------------------------------------
[SeH]c1ccccc1	1.75,1.42,0.45,0.67,-0.86,1.42,-2.16,0.67,-2.16,-0.84,-0.86,-1.59,0.45,-0.84

What vendors offer a compound that has the substructure c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5?

select
	distinct s.name
from
	compound c,
	supplier s,
	supplier_item si
where
	contains(c.smiles, 'c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5') = 1
and c.sid = si.compound_sid
and s.sid = si.supplier_sid
;


SUPPLIER_NAM
------------
SALOR

What vendors in a set of states offer this compound?

select
	distinct s.name
from
	compound c,
	supplier s,
	supplier_item si,
	supplier_address sa
where
	c.smiles = '[O-][N+](=O)c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5'
and sa.state_province in ('NY', 'NJ')
and c.sid = si.compound_sid
and s.sid = si.supplier_sid
and s.sid = sa.supplier_sid
;