MUG '03 -- 25 - 28 Feb, 2003
Daycart Databases
Dave Wilbur
Daylight CIS, Inc.
base design for daylight daycart databases
show some examples using ACD
What was considered for the base design
- Core design for all the databases that daylight will offer
- A single field for compounds
- A single field for names
- Depictions
- Multi-valued fields
smiles | C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56 |
substructure | ALKALOID; CARBOLINE; OLEFIN; QUINOLIZINE; PHENOL; COND.RING |
smiles | COc1cc(ccc1O)C2Oc3c(OC)cc(cc3O[C@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4 |
substructure | ALCOHOL; ARYLKETONE; BH-LINKED-CC; PHENOL-ETHER; POLYPHENOL; DIOXANE; KETONE,CYCLIC; FLAVONOID; COND.RING |
smiles | COc1cc(ccc1O)[C@H]2Oc3cc(cc(OC)c3O[C@@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4 |
substructure | ALCOHOL; ARYLKETONE; COND.RING; POLYPHENOL; PHENOL-ETHER; KETONE,CYCLIC; FLAVONOID; DIOXANE; BH-LINKED-CC |
smiles | Brc1ccc2c(c[nH]c2c1)c3cnc([nH]3)C(=O)c4c[nH]c5ccccc45 |
substructure | ALKALOID; ARYLBROMIDE; IMIDAZOLE; KETONE; HH-LINKED-CC; BENZIMIDAZOLE |
lets say that we were looking for structures that were marked in the substructure field of type PHENOL:
select
smiles
from
a_wdi_table
where
substructure like '%PHENOL%'
;
SMILES
--------------------------------------------------------------------------------
COc1cc(ccc1O)C2Oc3c(OC)cc(cc3O[C@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
COc1cc(ccc1O)[C@H]2Oc3cc(cc(OC)c3O[C@@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
not only does this query take a long time, it gives us the wrong answer because it also would say PHENOL-ETHER and POLYPHENOL were correct answers too. to get the correct answer we would have to type:
select
smiles
from
a_wdi_table
where
substructure like '%; PHENOL;%'
;
SMILES
--------------------------------------------------------------------------------
C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
and this gives us the correct answer... assuming that there were no typo's like '; PHENOL;' or '; PHENOL ;'
Using a Master Detail
select * from compound;
SID SMILES
- ------------------------------------------------------------------------
1 C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
2 COc1cc(ccc1O)C2Oc3c(OC)cc(cc3O[C@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
3 COc1cc(ccc1O)[C@H]2Oc3cc(cc(OC)c3O[C@@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
4 Brc1ccc2c(c[nH]c2c1)c3cnc([nH]3)C(=O)c4c[nH]c5ccccc45
select * from compound_has_substructure;
COMPOUND_SID SUBSTRUCTURE
------------ ------------------------------
1 ALKALOID
1 CARBOLINE
1 OLEFIN
1 QUINOLIZINE
1 PHENOL
1 COND.RING
2 ALCOHOL
2 ARYLKETONE
2 BH-LINKED-CC
2 PHENOL-ETHER
2 POLYPHENOL
2 DIOXANE
2 KETONE,CYCLIC
2 FLAVONOID
2 COND.RING
3 ALCOHOL
3 ARYLKETONE
3 COND.RING
3 POLYPHENOL
3 PHENOL-ETHER
3 KETONE,CYCLIC
3 FLAVONOID
3 DIOXANE
3 BH-LINKED-CC
4 ALKALOID
4 ARYLBROMIDE
4 IMIDAZOLE
4 KETONE
4 HH-LINKED-CC
4 BENZIMIDAZOLE
|
|
select
c.smiles
from
compound c,
compound_has_substructure cs
where
c.substructure = 'PHENOL'
and c.sid = cs.compound_sid;
SMILES
--------------------------------------------------------------------------------
C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
Using Many to Many
select * from compound;
SID SMILES
- ------------------------------------------------------------------------
1 C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
2 COc1cc(ccc1O)C2Oc3c(OC)cc(cc3O[C@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
3 COc1cc(ccc1O)[C@H]2Oc3cc(cc(OC)c3O[C@@H]2CO)c4cc(=O)c5c(O)cc(O)cc5o4
4 Brc1ccc2c(c[nH]c2c1)c3cnc([nH]3)C(=O)c4c[nH]c5ccccc45
select * from substructure;
SID SUBSTRUCTURE
---------- ------------------------------
1 ALCOHOL
2 ALKALOID
3 ARYLBROMIDE
4 ARYLKETONE
5 BENZIMIDAZOLE
6 BH-LINKED-CC
7 CARBOLINE
8 COND.RING
9 DIOXANE
10 FLAVONOID
11 HH-LINKED-CC
12 IMIDAZOLE
13 KETONE
14 KETONE,CYCLIC
15 OLEFIN
16 PHENOL
17 PHENOL-ETHER
18 POLYPHENOL
19 QUINOLIZINE
select * from compound_has_substructure;
COMPOUND_SID SUBSTRUCTURE_SID
------------ ----------------
1 2
1 7
1 8
1 16
1 19
1 15
2 1
2 9
2 18
2 17
2 14
2 10
2 8
2 4
2 6
3 1
3 18
3 17
3 14
3 10
3 8
3 6
3 4
3 9
4 2
4 5
4 11
4 13
4 12
4 3
|
|
select
c.smiles
from
compound c,
compound_has_substructure cs,
substructure s
where
s.substructure = 'PHENOL'
and c.sid = cs.compound_sid
and s.sid = cs.substructure_sid;
SMILES
--------------------------------------------------------------------------------
C/C=C\1/CN2CCc3c([nH]c4ccccc34)[C@@H]2C[C@@H]1Cc5nccc6c7cc(O)ccc7[nH]c56
Base Design
describe COMPOUND;
Name Null? Type
------------------------ -------- --------------
SID NOT NULL NUMBER(7)
SMILES NOT NULL VARCHAR2(866)
describe COMPOUND_NAME;
Name Null? Type
------------------------ -------- --------------
COMPOUND_SID NOT NULL NUMBER(7)
NAME NOT NULL VARCHAR2(189)
COMPOUND_NAME_TYPE NUMBER(1)
describe COMPOUND_DEPICTION;
Name Null? Type
------------------------ -------- --------------
COMPOUND_SID NOT NULL NUMBER(7)
COMPOUND_DEPICTION_TYPE NOT NULL NUMBER(1)
DEPICTION NOT NULL CLOB
describe COMPOUND_NAME_TYPE;
Name Null? Type
------------------------ -------- --------------
TYPE NOT NULL NUMBER(1)
DESCRIPTION NOT NULL VARCHAR2(100)
describe COMPOUND_DEPICTION_TYPE;
Name Null? Type
------------------------ -------- --------------
TYPE NOT NULL NUMBER(1)
DESCRIPTION NOT NULL VARCHAR2(100)
|
|
Using the Base Design for ACD
describe COMPOUND;
Name Null? Type
------------------------ -------- --------------
SID NOT NULL NUMBER(7)
AVG_MOLWT NOT NULL NUMBER(8,4)
WARNING VARCHAR2(51)
SMILES NOT NULL VARCHAR2(866)
describe SUPPLIER_ITEM;
Name Null? Type
------------------------ -------- --------------
COMPOUND_SID NOT NULL NUMBER(7)
SUPPLIER_SID NOT NULL NUMBER(7)
SUPPLIER_SKU NOT NULL VARCHAR2(12)
COMPOUND_NAME NOT NULL VARCHAR2(253)
GRADE NOT NULL VARCHAR2(4)
PURITY VARCHAR2(10)
COST NUMBER(9,3)
COST_UNITS VARCHAR2(32)
AMOUNT NUMBER(20,10)
AMOUNT_UNITS VARCHAR2(32)
NOTE VARCHAR2(4000)
UPDATED VARCHAR2(4)
describe SUPPLIER;
Name Null? Type
------------------------ -------- --------------
SID NOT NULL NUMBER(7)
NAME NOT NULL VARCHAR2(12)
describe SUPPLIER_ADDRESS;
Name Null? Type
------------------------ -------- --------------
SUPPLIER_SID NOT NULL NUMBER(7)
ADDRESS_NAME VARCHAR2(100)
ADDRESS_LINE_ONE VARCHAR2(100)
ADDRESS_LINE_TWO VARCHAR2(100)
CITY VARCHAR2(100)
STATE_PROVINCE VARCHAR2(100)
POSTAL_CODE VARCHAR2(30)
COUNTRY VARCHAR2(50)
describe SUPPLIER_PHONE;
Name Null? Type
------------------------ -------- --------------
SUPPLIER_SID NOT NULL NUMBER(7)
SUPPLIER_ADDRESS_SID NUMBER(7)
PHONE_TYPE NUMBER(1)
PHONE VARCHAR2(30)
describe PHONE_TYPE;
Name Null? Type
------------------------ -------- --------------
PHONE_TYPE NOT NULL NUMBER(1)
DESCRIPTION NOT NULL VARCHAR2(30)
|
|
What names are associated with [O-][N+](=O)c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5?
select
cn.name
from
compound c,
compound_name cn
where
c.smiles = '[O-][N+](=O)c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5'
and c.sid = cn.compound_sid
NAME
--------------------------------------------------------------------------------
2-FURAN-2-YL-4-(4-NITRO-PH)-1,9B-2H-5-OXA-3,3A-DIAZA-CYCLOPENTA(A)NAPHTHALENE
2FURAN2YL44NITROPH19B2H5OXA33ADIAZACYCLOPENTAANAPHTHALENE
R453064
|
|
What is the D2D depiction for CAS id 645-96-5?
select
c.smiles,
cd.depiction
from
compound c,
compound_name cn,
compound_depiction cd,
compound_name_type cnt,
compound_depiction_type cdt
where
cn.name = '645-96-5'
and cnt.description = 'CAS'
and cdt.description = 'D2D'
and cn.compound_name_type = cnt.type
and cd.compound_depiction_type = cdt.type
and c.sid = cn.compound_sid
and c.sid = cd.compound_sid
;
SMILES DEPICTION
--------------- ----------------------------------------------------------------
[SeH]c1ccccc1 1.75,1.42,0.45,0.67,-0.86,1.42,-2.16,0.67,-2.16,-0.84,-0.86,-1.59,0.45,-0.84
|
|
What vendors offer a compound that has the substructure c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5?
select
distinct s.name
from
compound c,
supplier s,
supplier_item si
where
contains(c.smiles, 'c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5') = 1
and c.sid = si.compound_sid
and s.sid = si.supplier_sid
;
SUPPLIER_NAM
------------
SALOR
|
|
What vendors in a set of states offer this compound?
select
distinct s.name
from
compound c,
supplier s,
supplier_item si,
supplier_address sa
where
c.smiles = '[O-][N+](=O)c1ccc(cc1)C2Oc3ccccc3C4CC(=NN42)c5ccco5'
and sa.state_province in ('NY', 'NJ')
and c.sid = si.compound_sid
and s.sid = si.supplier_sid
and s.sid = sa.supplier_sid
;
|
|