import os
from typing import List
from pprint import pprint
from biokb_chebi import get_session
from biokb_chebi import models
from biokb_chebi.api import schemas
os.environ.pop("CONNECTION_STR", None) # to make sure no environment variable is used
Query and search data¶
BioKb-ChEBI uses SQLAlchemy to define the database schema for storing chemical compound data from the ChEBI database. The following diagram illustrates the main entities and their relationships:
The central entity in the data model is the Compound, which represents a chemical compound in ChEBI. Other key entities include ChemicalData, Structure, Name, Relation, Reference, DatabaseAccession, Source, Comment, and Status. Each entity has its own set of attributes and relationships with other entities.
Overview¶
You can query the database using SQLAlchemy's ORM capabilities. Below are some example queries to get you started.
First import the data using the import_data function. You can skip this This will download the ChEBI data files, parse them, and populate the database. Depending on your system and internet connection, this may take some time.
from biokb_chebi import import_data
import_data(keep_files=True)
with get_session() as session:
compounds: List[models.Compound] = session.query(models.Compound).limit(3).all()
for compound in compounds:
pprint(schemas.Compound.model_validate(compound).model_dump())
{'ascii_name': '((R)-3-Hydroxybutanoyl)(n-2)',
'chebi_accession': 'CHEBI:3',
'chemical_data': [{'charge': 0,
'formula': '(C4H6O2)n',
'id': 2,
'is_autogenerated': True,
'mass': 86.09,
'monoisotopic_mass': 86.03678}],
'comments': [],
'database_accessions': [{'accession_number': 'C06147',
'id': 9,
'type': 'MANUAL_X_REF'}],
'definition': None,
'id': 3,
'merge_type': None,
'modified_on': datetime.datetime(2016, 1, 27, 14, 38, 49),
'name': '((R)-3-Hydroxybutanoyl)(n-2)',
'names': [{'adapted': False,
'ascii_name': '((R)-3-Hydroxybutanoyl)(n-2)',
'id': 8,
'language_code': 'en',
'name': '((R)-3-Hydroxybutanoyl)(n-2)',
'type': 'SYNONYM'}],
'parent_id': None,
'references': [{'accession_number': '223438552',
'id': 1165063527,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS1182',
'id': 1165530783,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS1485',
'id': 1165530784,
'location_in_ref': None,
'reference_name': None}],
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 2,
'status': {'id': 3, 'name': 'OK'},
'structures': [{'compound_id': 3,
'default_structure': True,
'dimension': '2D',
'id': 2845109,
'smiles': '*OC(C)CC(*)=O',
'standard_inchi': None,
'standard_inchi_key': None,
'status_id': 3}]}
{'ascii_name': '(+)-car-3-ene',
'chebi_accession': 'CHEBI:7',
'chemical_data': [{'charge': 0,
'formula': 'C10H16',
'id': 7,
'is_autogenerated': True,
'mass': 136.238,
'monoisotopic_mass': 136.1252}],
'comments': [],
'database_accessions': [{'accession_number': 'C11382',
'id': 16,
'type': 'MANUAL_X_REF'},
{'accession_number': '498-15-7',
'id': 17,
'type': 'CAS'},
{'accession_number': '498-15-7',
'id': 97740,
'type': 'CAS'},
{'accession_number': '498-15-7',
'id': 97741,
'type': 'CAS'},
{'accession_number': '663435',
'id': 97743,
'type': 'REGISTRY_NUMBER'},
{'accession_number': '4229885',
'id': 97747,
'type': 'REGISTRY_NUMBER'},
{'accession_number': 'LMPR0102120021',
'id': 120809,
'type': 'MANUAL_X_REF'},
{'accession_number': 'CPD-8756',
'id': 846506,
'type': 'MANUAL_X_REF'},
{'accession_number': 'C00011044',
'id': 870373,
'type': 'MANUAL_X_REF'},
{'accession_number': '1902767',
'id': 887109,
'type': 'REGISTRY_NUMBER'},
{'accession_number': '8373196',
'id': 887113,
'type': 'CITATION'},
{'accession_number': '22277889',
'id': 887114,
'type': 'CITATION'},
{'accession_number': '22183881',
'id': 887115,
'type': 'CITATION'}],
'definition': 'A car-3-ene (3,7,7-trimethylbicyclo[4.1.0]hept-3-ene) that has '
'<i>S</i> configuration at position 1 and <i>R</i> '
'configuration at position 6.',
'id': 7,
'merge_type': None,
'modified_on': datetime.datetime(2015, 1, 21, 16, 33, 22),
'name': '(+)-car-3-ene',
'names': [{'adapted': False,
'ascii_name': '(+)-3-Carene',
'id': 16,
'language_code': 'en',
'name': '(+)-3-Carene',
'type': 'SYNONYM'},
{'adapted': False,
'ascii_name': '(1S,6R)-3,7,7-trimethylbicyclo[4.1.0]hept-3-ene',
'id': 61699,
'language_code': 'en',
'name': '(1<i>S</i>,6<i>R</i>)-3,7,7-trimethylbicyclo[4.1.0]hept-3-ene',
'type': 'IUPAC NAME'},
{'adapted': False,
'ascii_name': '(1S)-3,7,7-trimethylbicyclo[4.1.0]hept-3-ene',
'id': 61700,
'language_code': 'en',
'name': '(1<i>S</i>)-3,7,7-trimethylbicyclo[4.1.0]hept-3-ene',
'type': 'SYNONYM'},
{'adapted': False,
'ascii_name': '(1S)-(+)-3-carene',
'id': 61701,
'language_code': 'en',
'name': '(1<i>S</i>)-(+)-3-carene',
'type': 'SYNONYM'},
{'adapted': False,
'ascii_name': '(S)-(+)-3-carene',
'id': 61702,
'language_code': 'en',
'name': '(<i>S</i>)-(+)-3-carene',
'type': 'SYNONYM'},
{'adapted': False,
'ascii_name': '(1S,6R)-(+)-3-carene',
'id': 61703,
'language_code': 'en',
'name': '(1<i>S</i>,6<i>R</i>)-(+)-3-carene',
'type': 'SYNONYM'},
{'adapted': False,
'ascii_name': '(+)-Delta(3)-carene',
'id': 61704,
'language_code': 'en',
'name': '(+)-Δ<small><sup>3</small></sup>-carene',
'type': 'SYNONYM'},
{'adapted': False,
'ascii_name': '1alpha,6alpha-car-3-ene',
'id': 61706,
'language_code': 'en',
'name': '1α,6α-car-3-ene',
'type': 'IUPAC NAME'},
{'adapted': False,
'ascii_name': '(+)-car-3-ene',
'id': 835521,
'language_code': 'en',
'name': '(+)-car-3-ene',
'type': 'UNIPROT NAME'},
{'adapted': False,
'ascii_name': 'Isodiprene',
'id': 890956,
'language_code': 'en',
'name': 'Isodiprene',
'type': 'SYNONYM'},
{'adapted': False,
'ascii_name': '(+)-alpha-carene',
'id': 899760,
'language_code': 'en',
'name': '(+)-α-carene',
'type': 'SYNONYM'}],
'parent_id': None,
'references': [{'accession_number': 'Q84SM8',
'id': 1031532100,
'location_in_ref': 'DE',
'reference_name': 'Carene synthase, chloroplastic'},
{'accession_number': 'Q84SM8',
'id': 1031532129,
'location_in_ref': 'CATALYTIC ACTIVITY',
'reference_name': 'Carene synthase, chloroplastic'},
{'accession_number': 'C7ASI9',
'id': 1031532154,
'location_in_ref': 'DE',
'reference_name': 'Carene synthase, chloroplastic'},
{'accession_number': 'C7ASI9',
'id': 1031532183,
'location_in_ref': 'CATALYTIC ACTIVITY',
'reference_name': 'Carene synthase, chloroplastic'},
{'accession_number': 'F1CKI6',
'id': 1031532206,
'location_in_ref': 'DE',
'reference_name': 'Carene synthase 1, chloroplastic'},
{'accession_number': 'F1CKI6',
'id': 1031532235,
'location_in_ref': 'CATALYTIC ACTIVITY',
'reference_name': 'Carene synthase 1, chloroplastic'},
{'accession_number': 'F1CKI8',
'id': 1031532265,
'location_in_ref': 'DE',
'reference_name': 'Carene synthase 2, chloroplastic'},
{'accession_number': 'F1CKI8',
'id': 1031532294,
'location_in_ref': 'CATALYTIC ACTIVITY',
'reference_name': 'Carene synthase 2, chloroplastic'},
{'accession_number': 'F1CKI9',
'id': 1031532325,
'location_in_ref': 'DE',
'reference_name': 'Carene synthase 3, chloroplastic'},
{'accession_number': 'F1CKI9',
'id': 1031532354,
'location_in_ref': 'CATALYTIC ACTIVITY',
'reference_name': 'Carene synthase 3, chloroplastic'},
{'accession_number': 'RHEA:32539',
'id': 1122932095,
'location_in_ref': None,
'reference_name': '(2E)-geranyl diphosphate = (+)-car-3-ene + '
'diphosphate'},
{'accession_number': '14099',
'id': 1125315936,
'location_in_ref': None,
'reference_name': 'Geranyl diphosphate = Diphosphate + '
'(+)-3-Carene'},
{'accession_number': '60019305',
'id': 1155287704,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'DTXSID60858827',
'id': 1155306205,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '71406',
'id': 1155368169,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '28483',
'id': 1155375486,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '58863',
'id': 1155384519,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '136006',
'id': 1155388171,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '71406',
'id': 1155412084,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '28483',
'id': 1155419401,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '58863',
'id': 1155428434,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '136006',
'id': 1155432086,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '1.11.1.10',
'id': 1164996873,
'location_in_ref': None,
'reference_name': 'chloride peroxidase'},
{'accession_number': '1.14.14.31',
'id': 1164996874,
'location_in_ref': None,
'reference_name': 'ipsdienol synthase'},
{'accession_number': '1.14.14.99',
'id': 1164996875,
'location_in_ref': None,
'reference_name': '(S)-limonene 3-monooxygenase'},
{'accession_number': '3.1.1.7',
'id': 1164996876,
'location_in_ref': None,
'reference_name': 'acetylcholinesterase'},
{'accession_number': '3.1.1.8',
'id': 1164996877,
'location_in_ref': None,
'reference_name': 'cholinesterase'},
{'accession_number': '4.2.3.107',
'id': 1164996878,
'location_in_ref': None,
'reference_name': '(+)-car-3-ene synthase'},
{'accession_number': '4.2.3.113',
'id': 1164996879,
'location_in_ref': None,
'reference_name': 'terpinolene synthase'},
{'accession_number': '4.6.1.13',
'id': 1164996880,
'location_in_ref': None,
'reference_name': 'phosphatidylinositol diacylglycerol-lyase'},
{'accession_number': 'SCHEMBL1532729',
'id': 1165057936,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '11533292',
'id': 1165063528,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '443156',
'id': 1165255477,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS1622',
'id': 1165530785,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS3038',
'id': 1165530786,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS6032',
'id': 1165530787,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS9409',
'id': 1165530788,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS9419',
'id': 1165530789,
'location_in_ref': None,
'reference_name': None}],
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3,
'status': {'id': 1, 'name': 'CHECKED'},
'structures': [{'compound_id': 7,
'default_structure': True,
'dimension': '2D',
'id': 18922,
'smiles': '[H][C@@]12CC=C(C)C[C@]1([H])C2(C)C',
'standard_inchi': 'InChI=1S/C10H16/c1-7-4-5-8-9(6-7)10(8,2)3/h4,8-9H,5-6H2,1-3H3/t8-,9+/m1/s1',
'standard_inchi_key': 'BQOFWKZOCNGFEC-BDAKNGLRSA-N',
'status_id': 1}]}
{'ascii_name': '(+)-8-hydroxycalamenene',
'chebi_accession': 'CHEBI:8',
'chemical_data': [{'charge': 0,
'formula': 'C15H22O',
'id': 8,
'is_autogenerated': True,
'mass': 218.34,
'monoisotopic_mass': 218.16707}],
'comments': [],
'database_accessions': [{'accession_number': 'C09938',
'id': 18,
'type': 'MANUAL_X_REF'},
{'accession_number': '88642-92-6',
'id': 19,
'type': 'CAS'},
{'accession_number': '5257045',
'id': 113424,
'type': 'REGISTRY_NUMBER'},
{'accession_number': 'LMPR0103330006',
'id': 789894,
'type': 'MANUAL_X_REF'},
{'accession_number': '4671990',
'id': 789895,
'type': 'REGISTRY_NUMBER'},
{'accession_number': 'C00002996',
'id': 869242,
'type': 'MANUAL_X_REF'}],
'definition': 'A sesquiterpenoid consisting of '
'5,6,7,8-tetrahydronaphthalen-1-ol having two methyl '
'substituents at the 3- and 8-positions, an isopropyl '
'substituent at the 5-position and '
'(+)-(5<i>R</i>,8<i>S</i>)-configuration.',
'id': 8,
'merge_type': None,
'modified_on': datetime.datetime(2014, 7, 28, 15, 57, 47),
'name': '(+)-8-hydroxycalamenene',
'names': [{'adapted': False,
'ascii_name': '(+)-8-Hydroxycalamenene',
'id': 17,
'language_code': 'en',
'name': '(+)-8-Hydroxycalamenene',
'type': 'SYNONYM'},
{'adapted': False,
'ascii_name': '(5R,8S)-3,8-dimethyl-5-(propan-2-yl)-5,6,7,8-tetrahydronaphthalen-1-ol',
'id': 90643,
'language_code': 'en',
'name': '(5<i>R</i>,8<i>S</i>)-3,8-dimethyl-5-(propan-2-yl)-5,6,7,8-tetrahydronaphthalen-1-ol',
'type': 'IUPAC NAME'},
{'adapted': False,
'ascii_name': '7betaH-cadina-1,3,5-trien-2-ol',
'id': 90658,
'language_code': 'en',
'name': '7β<em>H</em>-cadina-1,3,5-trien-2-ol',
'type': 'IUPAC NAME'},
{'adapted': False,
'ascii_name': '(+)-hydroxycalamenene',
'id': 827701,
'language_code': 'en',
'name': '(+)-hydroxycalamenene',
'type': 'SYNONYM'}],
'parent_id': None,
'references': [{'accession_number': '70028380',
'id': 1155287495,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'DTXSID40331860',
'id': 1155289444,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'SCHEMBL4742053',
'id': 1165028734,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '49658669',
'id': 1165063529,
'location_in_ref': None,
'reference_name': None},
{'accession_number': '442519',
'id': 1165255478,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS1693',
'id': 1165530790,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS3725',
'id': 1165530791,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS586',
'id': 1165530792,
'location_in_ref': None,
'reference_name': None},
{'accession_number': 'MTBLS682',
'id': 1165530793,
'location_in_ref': None,
'reference_name': None}],
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3,
'status': {'id': 1, 'name': 'CHECKED'},
'structures': [{'compound_id': 8,
'default_structure': True,
'dimension': '2D',
'id': 51993,
'smiles': 'Cc1cc(O)c2c(c1)[C@@H](C(C)C)CC[C@@H]2C',
'standard_inchi': 'InChI=1S/C15H22O/c1-9(2)12-6-5-11(4)15-13(12)7-10(3)8-14(15)16/h7-9,11-12,16H,5-6H2,1-4H3/t11-,12+/m0/s1',
'standard_inchi_key': 'FDMKIGKOMRSCAW-NWDGAFQWSA-N',
'status_id': 1}]}
Comments¶
with get_session() as session:
compounds: List[models.Comment] = session.query(models.Comment).limit(3).all()
for compound in compounds:
pprint(schemas.Comment.model_validate(compound).model_dump())
{'author_name': 'ops$mennis',
'comment': 'The natural product is the 6<stereo>S</stereo> stereoisomer.',
'compound': {'ascii_name': '(6S)-5,6,7,8-tetrahydrofolic acid',
'chebi_accession': 'CHEBI:15635',
'definition': 'A derivative of folic acid in which the pteridine '
'ring is fully reduced; it is the parent compound '
'of a variety of coenzymes that serve as carriers '
'of one-carbon groups in metabolic reactions.',
'id': 15635,
'merge_type': 'A',
'modified_on': datetime.datetime(2019, 7, 25, 15, 34, 40),
'name': '(6<i>S</i>)-5,6,7,8-tetrahydrofolic acid',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'compound_id': 15635,
'datatype': 'General',
'datatype_id': 15635,
'id': 14}
{'author_name': 'ops$mennis',
'comment': 'The naturally occurring compound is the 6R stereoisomer. ',
'compound': {'ascii_name': '(6R)-5,10-methylenetetrahydrofolate(2-)',
'chebi_accession': 'CHEBI:15636',
'definition': None,
'id': 15636,
'merge_type': 'A',
'modified_on': datetime.datetime(2019, 7, 25, 13, 45, 28),
'name': '(6<i>R</i>)-5,10-methylenetetrahydrofolate(2−)',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'compound_id': 15636,
'datatype': 'General',
'datatype_id': 15636,
'id': 15}
{'author_name': 'ops$mennis',
'comment': 'The naturally occurring compound is the tetrahydrofolate 6R '
'stereoisomer.',
'compound': {'ascii_name': '(6R)-5,10-methenyltetrahydrofolic acid',
'chebi_accession': 'CHEBI:15638',
'definition': 'The 5,10-methenyl derivative of tetrahydrofolic '
'acid arising from enzymatic cyclisation of '
'5-formyltetrahydrofolic acid.',
'id': 15638,
'merge_type': 'C',
'modified_on': datetime.datetime(2016, 1, 27, 14, 40, 1),
'name': '(6<i>R</i>)-5,10-methenyltetrahydrofolic acid',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'compound_id': 15638,
'datatype': 'General',
'datatype_id': 15638,
'id': 16}
Names¶
with get_session() as session:
compounds: List[models.Name] = session.query(models.Name).limit(3).all()
for compound in compounds:
pprint(schemas.Name.model_validate(compound).model_dump())
{'adapted': False,
'ascii_name': 'Noradrenaline',
'compound': {'ascii_name': '(R)-noradrenaline',
'chebi_accession': 'CHEBI:18357',
'definition': 'The <i>R</i>-enantiomer of noradrenaline.',
'id': 18357,
'merge_type': 'A',
'modified_on': datetime.datetime(2019, 11, 20, 17, 35, 17),
'name': '(<i>R</i>)-noradrenaline',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'id': 2,
'language_code': 'en',
'name': 'Noradrenaline',
'status': {'id': 1, 'name': 'CHECKED'},
'type': 'SYNONYM'}
{'adapted': False,
'ascii_name': 'L-Noradrenaline',
'compound': {'ascii_name': '(R)-noradrenaline',
'chebi_accession': 'CHEBI:18357',
'definition': 'The <i>R</i>-enantiomer of noradrenaline.',
'id': 18357,
'merge_type': 'A',
'modified_on': datetime.datetime(2019, 11, 20, 17, 35, 17),
'name': '(<i>R</i>)-noradrenaline',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'id': 3,
'language_code': 'en',
'name': 'L-Noradrenaline',
'status': {'id': 1, 'name': 'CHECKED'},
'type': 'SYNONYM'}
{'adapted': False,
'ascii_name': 'Norepinephrine',
'compound': {'ascii_name': '(R)-noradrenaline',
'chebi_accession': 'CHEBI:18357',
'definition': 'The <i>R</i>-enantiomer of noradrenaline.',
'id': 18357,
'merge_type': 'A',
'modified_on': datetime.datetime(2019, 11, 20, 17, 35, 17),
'name': '(<i>R</i>)-noradrenaline',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'id': 4,
'language_code': 'en',
'name': 'Norepinephrine',
'status': {'id': 1, 'name': 'CHECKED'},
'type': 'SYNONYM'}
Chemical Data¶
with get_session() as session:
compounds: List[models.ChemicalData] = (
session.query(models.ChemicalData).limit(3).all()
)
for compound in compounds:
pprint(schemas.ChemicalData.model_validate(compound).model_dump())
{'charge': 0,
'compound': {'ascii_name': '((R)-3-Hydroxybutanoyl)(n-2)',
'chebi_accession': 'CHEBI:3',
'definition': None,
'id': 3,
'merge_type': None,
'modified_on': datetime.datetime(2016, 1, 27, 14, 38, 49),
'name': '((R)-3-Hydroxybutanoyl)(n-2)',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 2},
'formula': '(C4H6O2)n',
'id': 2,
'is_autogenerated': True,
'mass': 86.09,
'monoisotopic_mass': 86.03678,
'status': {'id': 3, 'name': 'OK'},
'structure': {'compound_id': 3,
'default_structure': True,
'dimension': '2D',
'id': 2845109,
'smiles': '*OC(C)CC(*)=O',
'standard_inchi': None,
'standard_inchi_key': None,
'status_id': 3}}
{'charge': 0,
'compound': {'ascii_name': '(+)-car-3-ene',
'chebi_accession': 'CHEBI:7',
'definition': 'A car-3-ene '
'(3,7,7-trimethylbicyclo[4.1.0]hept-3-ene) that '
'has <i>S</i> configuration at position 1 and '
'<i>R</i> configuration at position 6.',
'id': 7,
'merge_type': None,
'modified_on': datetime.datetime(2015, 1, 21, 16, 33, 22),
'name': '(+)-car-3-ene',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3},
'formula': 'C10H16',
'id': 7,
'is_autogenerated': True,
'mass': 136.238,
'monoisotopic_mass': 136.1252,
'status': {'id': 1, 'name': 'CHECKED'},
'structure': {'compound_id': 7,
'default_structure': True,
'dimension': '2D',
'id': 18922,
'smiles': '[H][C@@]12CC=C(C)C[C@]1([H])C2(C)C',
'standard_inchi': 'InChI=1S/C10H16/c1-7-4-5-8-9(6-7)10(8,2)3/h4,8-9H,5-6H2,1-3H3/t8-,9+/m1/s1',
'standard_inchi_key': 'BQOFWKZOCNGFEC-BDAKNGLRSA-N',
'status_id': 1}}
{'charge': 0,
'compound': {'ascii_name': '(+)-8-hydroxycalamenene',
'chebi_accession': 'CHEBI:8',
'definition': 'A sesquiterpenoid consisting of '
'5,6,7,8-tetrahydronaphthalen-1-ol having two '
'methyl substituents at the 3- and 8-positions, an '
'isopropyl substituent at the 5-position and '
'(+)-(5<i>R</i>,8<i>S</i>)-configuration.',
'id': 8,
'merge_type': None,
'modified_on': datetime.datetime(2014, 7, 28, 15, 57, 47),
'name': '(+)-8-hydroxycalamenene',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3},
'formula': 'C15H22O',
'id': 8,
'is_autogenerated': True,
'mass': 218.34,
'monoisotopic_mass': 218.16707,
'status': {'id': 1, 'name': 'CHECKED'},
'structure': {'compound_id': 8,
'default_structure': True,
'dimension': '2D',
'id': 51993,
'smiles': 'Cc1cc(O)c2c(c1)[C@@H](C(C)C)CC[C@@H]2C',
'standard_inchi': 'InChI=1S/C15H22O/c1-9(2)12-6-5-11(4)15-13(12)7-10(3)8-14(15)16/h7-9,11-12,16H,5-6H2,1-4H3/t11-,12+/m0/s1',
'standard_inchi_key': 'FDMKIGKOMRSCAW-NWDGAFQWSA-N',
'status_id': 1}}
DatabaseAccession¶
with get_session() as session:
das: List[models.DatabaseAccession] = (
session.query(models.DatabaseAccession).limit(3).all()
)
for da in das:
pprint(schemas.DatabaseAccession.model_validate(da).model_dump())
{'accession_number': 'C00547',
'compound': {'ascii_name': '(R)-noradrenaline',
'chebi_accession': 'CHEBI:18357',
'definition': 'The <i>R</i>-enantiomer of noradrenaline.',
'id': 18357,
'merge_type': 'A',
'modified_on': datetime.datetime(2019, 11, 20, 17, 35, 17),
'name': '(<i>R</i>)-noradrenaline',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'id': 1,
'source': {'description': None,
'id': 45,
'name': 'KEGG COMPOUND',
'prefix': 'kegg.compound',
'url': 'https://bioregistry.io/kegg.compound:*'},
'status': {'id': 1, 'name': 'CHECKED'},
'type': 'MANUAL_X_REF'}
{'accession_number': '51-41-2',
'compound': {'ascii_name': '(R)-noradrenaline',
'chebi_accession': 'CHEBI:18357',
'definition': 'The <i>R</i>-enantiomer of noradrenaline.',
'id': 18357,
'merge_type': 'A',
'modified_on': datetime.datetime(2019, 11, 20, 17, 35, 17),
'name': '(<i>R</i>)-noradrenaline',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'id': 6,
'source': {'description': None,
'id': 45,
'name': 'KEGG COMPOUND',
'prefix': 'kegg.compound',
'url': 'https://bioregistry.io/kegg.compound:*'},
'status': {'id': 1, 'name': 'CHECKED'},
'type': 'CAS'}
{'accession_number': 'C06147',
'compound': {'ascii_name': '((R)-3-Hydroxybutanoyl)(n-2)',
'chebi_accession': 'CHEBI:3',
'definition': None,
'id': 3,
'merge_type': None,
'modified_on': datetime.datetime(2016, 1, 27, 14, 38, 49),
'name': '((R)-3-Hydroxybutanoyl)(n-2)',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 2},
'id': 9,
'source': {'description': None,
'id': 45,
'name': 'KEGG COMPOUND',
'prefix': 'kegg.compound',
'url': 'https://bioregistry.io/kegg.compound:*'},
'status': {'id': 3, 'name': 'OK'},
'type': 'MANUAL_X_REF'}
Relations¶
with get_session() as session:
das: List[models.Relation] = session.query(models.Relation).limit(3).all()
for da in das:
pprint(schemas.Relation.model_validate(da).model_dump())
{'evidence_accession': None,
'evidence_source': None,
'evidence_source_id': None,
'final_compound': {'ascii_name': 'chemical entity',
'chebi_accession': 'CHEBI:24431',
'definition': 'A chemical entity is a physical entity of '
'interest in chemistry including molecular '
'entities, parts thereof, and chemical '
'substances.',
'id': 24431,
'merge_type': None,
'modified_on': datetime.datetime(2015, 3, 31, 8, 17, 10),
'name': 'chemical entity',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'final_id': 24431,
'id': 3,
'init_compound': {'ascii_name': 'molecular entity',
'chebi_accession': 'CHEBI:23367',
'definition': 'Any constitutionally or isotopically '
'distinct atom, molecule, ion, ion pair, '
'radical, radical ion, complex, conformer '
'etc., identifiable as a separately '
'distinguishable entity.',
'id': 23367,
'merge_type': None,
'modified_on': datetime.datetime(2025, 1, 6, 10, 15, 44),
'name': 'molecular entity',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'init_id': 23367,
'relation_type': {'allow_cycles': False,
'code': 'is_a',
'description': 'is a',
'id': 5},
'relation_type_id': 5,
'status': {'id': 1, 'name': 'CHECKED'},
'status_id': 1}
{'evidence_accession': None,
'evidence_source': None,
'evidence_source_id': None,
'final_compound': {'ascii_name': 'divalent carboacyl group',
'chebi_accession': 'CHEBI:23855',
'definition': 'A divalent carboacyl group is a group '
'formed by loss of OH from two carboxy '
'groups of a polycarboxylic acid.',
'id': 23855,
'merge_type': None,
'modified_on': datetime.datetime(2010, 11, 16, 11, 29, 51),
'name': 'divalent carboacyl group',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'final_id': 23855,
'id': 18,
'init_compound': {'ascii_name': 'aspartoyl group',
'chebi_accession': 'CHEBI:22663',
'definition': None,
'id': 22663,
'merge_type': None,
'modified_on': datetime.datetime(2006, 3, 26, 0, 0),
'name': 'aspartoyl group',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'init_id': 22663,
'relation_type': {'allow_cycles': False,
'code': 'is_a',
'description': 'is a',
'id': 5},
'relation_type_id': 5,
'status': {'id': 3, 'name': 'OK'},
'status_id': 3}
{'evidence_accession': None,
'evidence_source': None,
'evidence_source_id': None,
'final_compound': {'ascii_name': 'divalent carboacyl group',
'chebi_accession': 'CHEBI:23855',
'definition': 'A divalent carboacyl group is a group '
'formed by loss of OH from two carboxy '
'groups of a polycarboxylic acid.',
'id': 23855,
'merge_type': None,
'modified_on': datetime.datetime(2010, 11, 16, 11, 29, 51),
'name': 'divalent carboacyl group',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'final_id': 23855,
'id': 19,
'init_compound': {'ascii_name': 'citraconoyl group',
'chebi_accession': 'CHEBI:23315',
'definition': None,
'id': 23315,
'merge_type': None,
'modified_on': datetime.datetime(2005, 10, 17, 23, 0),
'name': 'citraconoyl group',
'release_date': None,
'source': 'ChEBI',
'stars': 3},
'init_id': 23315,
'relation_type': {'allow_cycles': False,
'code': 'is_a',
'description': 'is a',
'id': 5},
'relation_type_id': 5,
'status': {'id': 3, 'name': 'OK'},
'status_id': 3}
References¶
with get_session() as session:
das: List[models.Reference] = session.query(models.Reference).limit(3).all()
for da in das:
pprint(schemas.Reference.model_validate(da).model_dump())
{'accession_number': 'EP1438962',
'compound': {'ascii_name': 'warfarin',
'chebi_accession': 'CHEBI:10033',
'definition': 'A racemate comprising equal amounts of '
'(<i>R</i>)- and (<i>S</i>)-warfarin. Extensively '
'used as both an anticoagulant drug and as a '
'pesticide against rats and mice.',
'id': 10033,
'merge_type': 'A',
'modified_on': datetime.datetime(2023, 11, 7, 12, 35, 49),
'name': 'warfarin',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3},
'id': 144719615,
'location_in_ref': None,
'reference_name': 'DRUGS COMPRISING COMBINATION OF TRIAZASPIRO 5,5 U '
'NDECANE DERIVATIVE WITH CYTOCHROME P450 ISOZYME 3A4 '
'INHIBITOR AND/OR P−GLYCOPROTEIN INHIBITOR',
'source': {'description': None,
'id': 66,
'name': 'Patent',
'prefix': 'patent',
'url': 'https://worldwide.espacenet.com/patent/search?q=*'}}
{'accession_number': 'EP1625870',
'compound': {'ascii_name': 'warfarin',
'chebi_accession': 'CHEBI:10033',
'definition': 'A racemate comprising equal amounts of '
'(<i>R</i>)- and (<i>S</i>)-warfarin. Extensively '
'used as both an anticoagulant drug and as a '
'pesticide against rats and mice.',
'id': 10033,
'merge_type': 'A',
'modified_on': datetime.datetime(2023, 11, 7, 12, 35, 49),
'name': 'warfarin',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3},
'id': 144719616,
'location_in_ref': None,
'reference_name': 'Needle for intradermal delivery of substances having '
'penetration limiting means',
'source': {'description': None,
'id': 66,
'name': 'Patent',
'prefix': 'patent',
'url': 'https://worldwide.espacenet.com/patent/search?q=*'}}
{'accession_number': 'EP1719526',
'compound': {'ascii_name': 'warfarin',
'chebi_accession': 'CHEBI:10033',
'definition': 'A racemate comprising equal amounts of '
'(<i>R</i>)- and (<i>S</i>)-warfarin. Extensively '
'used as both an anticoagulant drug and as a '
'pesticide against rats and mice.',
'id': 10033,
'merge_type': 'A',
'modified_on': datetime.datetime(2023, 11, 7, 12, 35, 49),
'name': 'warfarin',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3},
'id': 144719617,
'location_in_ref': None,
'reference_name': 'Pharmaceutical combinations comprising a P2T receptor '
'antagonist and another anti-thrombotic agent',
'source': {'description': None,
'id': 66,
'name': 'Patent',
'prefix': 'patent',
'url': 'https://worldwide.espacenet.com/patent/search?q=*'}}
Relation Types¶
with get_session() as session:
das: List[models.RelationType] = session.query(models.RelationType).limit(3).all()
for da in das:
pprint(schemas.RelationType.model_validate(da).model_dump())
{'allow_cycles': False,
'code': 'has_functional_parent',
'description': 'has functional parent',
'id': 1}
{'allow_cycles': False,
'code': 'has_parent_hydride',
'description': 'has parent hydride',
'id': 2}
{'allow_cycles': False, 'code': 'has_part', 'description': 'has part', 'id': 3}
Structures¶
with get_session() as session:
das: List[models.Structure] = session.query(models.Structure).limit(3).all()
for da in das:
pprint(schemas.Structure.model_validate(da).model_dump())
{'chemical_data': [{'charge': 0,
'formula': 'C10H8O',
'id': 11606,
'is_autogenerated': True,
'mass': 144.173,
'monoisotopic_mass': 144.05751}],
'compound': {'ascii_name': '1-naphthol',
'chebi_accession': 'CHEBI:10319',
'definition': 'A naphthol carrying a hydroxy group at position '
'1.',
'id': 10319,
'merge_type': 'A',
'modified_on': datetime.datetime(2020, 1, 3, 8, 15, 20),
'name': '1-naphthol',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3},
'compound_id': 10319,
'default_structure': True,
'dimension': '2D',
'id': 2,
'smiles': 'Oc1cccc2ccccc12',
'standard_inchi': 'InChI=1S/C10H8O/c11-10-7-3-5-8-4-1-2-6-9(8)10/h1-7,11H',
'standard_inchi_key': 'KJCVRFUGPWSIIH-UHFFFAOYSA-N',
'status': {'id': 1, 'name': 'CHECKED'},
'status_id': 1}
{'chemical_data': [{'charge': 0,
'formula': 'C3H2ClN3O2',
'id': 1088,
'is_autogenerated': True,
'mass': 147.521,
'monoisotopic_mass': 146.98355}],
'compound': {'ascii_name': '6-chloro-1,3,5-triazine-2,4-diol',
'chebi_accession': 'CHEBI:1040',
'definition': 'A dihydroxy-1,3,5-triazine that is '
'1,3,5-triazine-2,4-diol substituted by a chloro '
'group at position 6.',
'id': 1040,
'merge_type': None,
'modified_on': datetime.datetime(2015, 1, 30, 12, 49, 51),
'name': '6-chloro-1,3,5-triazine-2,4-diol',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3},
'compound_id': 1040,
'default_structure': True,
'dimension': '2D',
'id': 4,
'smiles': 'Oc1nc(O)nc(Cl)n1',
'standard_inchi': 'InChI=1S/C3H2ClN3O2/c4-1-5-2(8)7-3(9)6-1/h(H2,5,6,7,8,9)',
'standard_inchi_key': 'YDHNHFNGJCKAIZ-UHFFFAOYSA-N',
'status': {'id': 1, 'name': 'CHECKED'},
'status_id': 1}
{'chemical_data': [{'charge': 0,
'formula': 'C10H8O',
'id': 11722,
'is_autogenerated': True,
'mass': 144.173,
'monoisotopic_mass': 144.05751}],
'compound': {'ascii_name': '2-naphthol',
'chebi_accession': 'CHEBI:10432',
'definition': 'A naphthol carrying a hydroxy group at position '
'2.',
'id': 10432,
'merge_type': 'A',
'modified_on': datetime.datetime(2021, 6, 8, 14, 14, 54),
'name': '2-naphthol',
'release_date': None,
'source': 'KEGG COMPOUND',
'stars': 3},
'compound_id': 10432,
'default_structure': True,
'dimension': '2D',
'id': 5,
'smiles': 'Oc1ccc2ccccc2c1',
'standard_inchi': 'InChI=1S/C10H8O/c11-10-6-5-8-3-1-2-4-9(8)7-10/h1-7,11H',
'standard_inchi_key': 'JWAZRIHNYRIHIV-UHFFFAOYSA-N',
'status': {'id': 1, 'name': 'CHECKED'},
'status_id': 1}
Sources¶
with get_session() as session:
das: List[models.Source] = session.query(models.Source).limit(3).all()
for da in das:
pprint(schemas.Source.model_validate(da).model_dump())
{'description': None,
'id': 1,
'name': 'Agricola',
'prefix': 'agr',
'url': 'https://europepmc.org/abstract/AGR/*'}
{'description': None,
'id': 2,
'name': "Alan Wood's Pesticides",
'prefix': 'pesticides',
'url': 'https://bioregistry.io/pesticides:*'}
{'description': 'ArrayExpress is a public repository for transcriptomics and '
'related data.',
'id': 3,
'name': 'ArrayExpress',
'prefix': 'arrayexpress',
'url': 'https://bioregistry.io/arrayexpress:*'}
Status¶
with get_session() as session:
das: List[models.Status] = session.query(models.Status).limit(3).all()
for da in das:
pprint(schemas.Status.model_validate(da).model_dump())
{'id': 1, 'name': 'CHECKED'}
{'id': 3, 'name': 'OK'}
{'id': 9, 'name': 'SUBMITTED'}