Download Notebook View in GitHub Open in Google Colab
Using QCArchive with the OpenFF Toolkit
Here we show how to create OpenFF molecules safely from data in the QCArchive using the CMILES entries. This transformation relies on the "canonical_isomeric_explicit_hydrogen_mapped_smiles"
.
First load up the client you wish to connect to, in this case, we use the public instance.
import qcportal
from openff.toolkit import Molecule
client = qcportal.PortalClient("https://api.qcarchive.molssi.org:443")
print(client.list_datasets_table())
/home/runner/micromamba/envs/openff-docs-examples/lib/python3.10/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.7.3' currently installed).
from pandas.core.computation.check import NUMEXPR_INSTALLED
id type record_count name
---- ---------------- -------------- ----------------------------------------------------------------------------------
35 torsiondrive 454 OpenFF Fragmenter Phenyl Benchmark
36 torsiondrive 820 OpenFF Group1 Torsions
41 optimization 937 OpenFF Optimization Set 1
42 torsiondrive 86 Fragment Stability Benchmark
43 optimization 1132 SMIRNOFF Coverage Set 1
45 optimization 25500 OpenFF VEHICLe Set 1
48 torsiondrive 585 SMIRNOFF Coverage Torsion Set 1
49 optimization 189 OpenFF NCI250K Boron 1
50 optimization 19714 OpenFF Discrepancy Benchmark 1
57 torsiondrive 795 OpenFF Substituted Phenyl Set 1
68 optimization 352 Pfizer Discrepancy Optimization Dataset 1
69 optimization 6670 FDA Optimization Dataset 1
70 torsiondrive 227 Pfizer Discrepancy Torsion Dataset 1
71 gridoptimization 311 OpenFF Trivalent Nitrogen Set 1 (deprecated)
148 reaction 994 AlkIsod14
149 reaction 1846 BHPERI26
151 singlepoint 937 OpenFF Optimization Set 1
152 singlepoint 48280 OpenFF VEHICLe Set 1
153 singlepoint 189 OpenFF NCI250K Boron 1
156 reaction 710 CYCONF
157 manybody 1946 DS14
158 manybody 1390 FmH2O10
159 singlepoint 18864 OpenFF Discrepancy Benchmark 1
160 manybody 2919 NC15
161 reaction 4544 Butanediol65
162 manybody 2085 HB15
163 reaction 1065 ACONF
164 reaction 1278 HNBrBDE18
165 reaction 12936 MPCONF196
167 reaction 781 AlkIsomer11
168 manybody 2919 A21
169 manybody 3336 A24
170 reaction 2556 BSR36
171 reaction 2130 BH76RC
172 manybody 1668 AlkBind12
173 reaction 426 C20C24
174 singlepoint 1109 SMIRNOFF Coverage Set 1
194 torsiondrive 31 OpenFF Primary TorsionDrive Benchmark 1
195 torsiondrive 227 OpenFF Full TorsionDrive Benchmark 1
196 optimization 1885 OpenFF Primary Optimization Benchmark 1
197 optimization 26736 OpenFF Full Optimization Benchmark 1
213 singlepoint 0 Solvated Protein Fragments
217 torsiondrive 4 TorsionDrive Paper
231 gridoptimization 311 OpenFF Trivalent Nitrogen Set 1
232 optimization 6567 Kinase Inhibitors: WBO Distributions
235 torsiondrive 259 OpenFF Primary Benchmark 1 Torsion Set
237 gridoptimization 311 OpenFF Trivalent Nitrogen Set 2
239 gridoptimization 126 OpenFF Trivalent Nitrogen Set 3
241 torsiondrive 595 OpenFF Primary Benchmark 2 Torsion Set
242 torsiondrive 19 OpenFF Group1 Torsions 2
243 torsiondrive 6 OpenFF Group1 Torsions 3
245 torsiondrive 65 OpenFF Gen 2 Torsion Set 1 Roche
246 torsiondrive 93 OpenFF Gen 2 Torsion Set 2 Coverage
247 torsiondrive 38 OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy
248 torsiondrive 106 OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy
249 torsiondrive 100 OpenFF Gen 2 Torsion Set 5 Bayer
250 torsiondrive 7 OpenFF Gen 2 Torsion Set 6 Supplemental
251 optimization 298 OpenFF Gen 2 Opt Set 1 Roche
253 optimization 373 OpenFF Gen 2 Opt Set 2 Coverage
254 optimization 2201 OpenFF Gen 2 Opt Set 4 eMolecules Discrepancy
255 optimization 197 OpenFF Gen 2 Opt Set 3 Pfizer Discrepancy
256 torsiondrive 142 OpenFF Gen 2 Torsion Set 1 Roche 2
257 torsiondrive 157 OpenFF Gen 2 Torsion Set 2 Coverage 2
258 torsiondrive 82 OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy 2
259 torsiondrive 272 OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy 2
260 singlepoint 298 OpenFF Gen 2 Opt Set 1 Roche
262 singlepoint 352 OpenFF Gen 2 Opt Set 2 Coverage
263 singlepoint 197 OpenFF Gen 2 Opt Set 3 Pfizer Discrepancy
264 singlepoint 2181 OpenFF Gen 2 Opt Set 4 eMolecules Discrepancy
265 torsiondrive 219 OpenFF Gen 2 Torsion Set 5 Bayer 2
266 torsiondrive 22 OpenFF Gen 2 Torsion Set 6 Supplemental 2
268 torsiondrive 1459 SiliconTX Torsion Benchmark Set 1
270 optimization 1850 OpenFF Gen 2 Opt Set 5 Bayer
272 singlepoint 1772 OpenFF Gen 2 Opt Set 5 Bayer
275 torsiondrive 462 Fragmenter paper
278 torsiondrive 440 OpenFF Fragmenter Validation 1.0
279 optimization 3485 OpenFF Ehrman Informative Optimization v0.1
281 optimization 3485 OpenFF Ehrman Informative Optimization v0.2
282 torsiondrive 20 OpenFF DANCE 1 eMolecules t142 v1.0
283 torsiondrive 87 OpenFF Rowley Biaryl v1.0
284 optimization 576 OpenFF Protein Fragments v1.0
285 torsiondrive 36 OpenFF Theory Benchmarking Set B3LYP-D3BJ DZVP v1.0
286 torsiondrive 36 OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVP v1.0
287 torsiondrive 36 OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPD v1.0
288 torsiondrive 36 OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPP v1.0
289 torsiondrive 3848 OpenFF-benchmark-ligand-fragments-v1.0
290 optimization 6716 OpenFF Protein Fragments v2.0
291 torsiondrive 845 OpenFF Protein Fragments TorsionDrives v1.0
296 optimization 85897 OpenFF Sandbox CHO PhAlkEthOH v1.0
297 optimization 6716 OpenFF Protein Peptide Fragments constrained v1.0
298 optimization 6709 OpenFF Protein Peptide Fragments unconstrained v1.0
299 torsiondrive 36 OpenFF Theory Benchmarking Set B3LYP-D3BJ 6-31+Gss
300 torsiondrive 36 OpenFF Theory Benchmarking Set B3LYP-D3BJ def2-TZVPPD
301 optimization 22680 OpenFF PEPCONF OptimizationDataset v1.0
302 torsiondrive 780 OpenFF Substituted Phenyl Set 1 v2.0
303 singlepoint 382 OpenFF BCC Refit Study COH v1.0
304 optimization 1376 OpenFF Roche Opt Set With Protomers and Tautomers v1.0
305 optimization 445 Genentech PDB Ligand Expo whole optimization neutral v1.0
307 optimization 1376 OpenFF Roche Opt Set With Protomers and Tautomers v1.1
308 torsiondrive 787 OpenFF WBO Conjugated Series v1.0
309 torsiondrive 767 OpenFF Theory Benchmarking Set v1.0
310 optimization 864 OpenFF Theory Benchmarking Constrained Optimization Set B3LYP-NL def2-TZVPD v1.0
311 optimization 864 OpenFF Theory Benchmarking Constrained Optimization Set MP2 heavy-aug-cc-pVTZ v1.0
312 optimization 864 OpenFF Theory Benchmarking Constrained Optimization Set WB97X-V def2-TZVPD v1.0
313 optimization 2363 Genentech PDB Ligand Expo fragment optimization neutral v1.0
314 torsiondrive 260 OpenFF Amide Torsion Set v1.0
315 optimization 223 OpenFF Aniline Para Opt v1.0
316 optimization 69672 OpenFF Industry Benchmark Season 1 v1.0
317 torsiondrive 888 OpenFF Gen3 Torsion Set v1.0
318 torsiondrive 24 OpenFF Aniline 2D Impropers v1.0
319 optimization 539385 OpenFF Industry Benchmark Season 1 v1.1
320 optimization 4650 OpenFF BCC Refit Study COH v2.0
321 optimization 1416 OpenFF Theory Benchmarking Constrained Optimization Set MP2 heavy-aug-cc-pVTZ v1.1
322 optimization 501585 OpenFF Industry Benchmark Season 1 - MM v1.1
323 singlepoint 66552 OpenFF Theory Benchmarking Single Point Energies v1.0
324 torsiondrive 8052 OpenFF-benchmark-ligand-fragments-v2.0
326 optimization 735 OpenFF RESP Polarizability Optimizations v1.0
327 optimization 735 OpenFF RESP Polarizability Optimizations v1.1
329 singlepoint 24000 TorsionNet500 Single Points Dataset v1.0
333 torsiondrive 5 OpenFF Protein Dipeptide 2-D TorsionDrive v1.0
344 torsiondrive 5 OpenFF Protein Dipeptide 2-D TorsionDrive v1.1
345 optimization 600 OpenFF Gen2 Optimization Dataset Protomers v1.0
346 torsiondrive 26 OpenFF Protein Dipeptide 2-D TorsionDrive v2.0
347 singlepoint 65116 OpenFF ESP Fragment Conformers v1.0
351 optimization 56054 OpenFF ESP Industry Benchmark Set v1.0
352 torsiondrive 46 OpenFF Protein Capped 1-mer Sidechains v1.0
353 torsiondrive 26 OpenFF Protein Dipeptide 2-D TorsionDrive v2.1
357 singlepoint 39983 OpenFF ESP Industry Benchmark Set v1.1
358 torsiondrive 46 OpenFF Protein Capped 1-mer Sidechains v1.1
359 torsiondrive 46 OpenFF Protein Capped 1-mer Sidechains v1.2
360 torsiondrive 61 OpenFF Protein Capped 1-mer Sidechains v1.3
363 optimization 400 OpenFF multiplicity correction optimization set v1.0
364 torsiondrive 99 OpenFF multiplicity correction torsion drive data v1.0
365 optimization 759 OpenFF Protein Capped 1-mers 3-mers Optimization Dataset v1.0
366 torsiondrive 54 OpenFF Protein Capped 3-mer Backbones v1.0
370 torsiondrive 131 OpenFF multiplicity correction torsion drive data v1.1
371 singlepoint 13467 RNA Single Point Dataset v1.0
372 optimization 327 OpenFF Iodine Chemistry Optimization Dataset v1.0
373 singlepoint 81670 RNA Trinucleotide Single Point Dataset v1.0
374 torsiondrive 26 OpenFF Protein Capped 3-mer Omega v1.0
375 singlepoint 19110 RNA Nucleoside Single Point Dataset v1.0
376 optimization 677 OpenFF multi-Br ESP Fragment Conformers v1.0
377 singlepoint 650 OpenFF multi-Br ESP Fragment Conformers v1.1
378 torsiondrive 169 XtalPi Shared Fragments TorsiondriveDataset v1.0
379 optimization 2384 XtalPi Shared Fragments OptimizationDataset v1.0
380 torsiondrive 43 OpenFF Torsion Coverage Supplement v1.0
381 torsiondrive 192 OpenFF RNA Dinucleoside Monophosphate TorsionDrives v1.0
382 torsiondrive 8737 XtalPi 20-percent Fragments TorsiondriveDataset v1.0
383 optimization 128180 XtalPi 20-percent Fragments OptimizationDataset v1.0
384 torsiondrive 18 OpenFF Torsion Drive Supplement v1.0
385 optimization 51 OpenFF Torsion Benchmark Supplement v1.0
386 torsiondrive 59 OpenFF Torsion Multiplicity Torsion Drive Coverage Supplement v1.0
387 optimization 185 OpenFF Torsion Multiplicity Optimization Training Coverage Supplement v1.0
388 optimization 451 OpenFF Torsion Multiplicity Optimization Benchmarking Coverage Supplement v1.0
389 torsiondrive 318 OpenFF Phosphate Torsion Drives v1.0
390 torsiondrive 192 OpenFF Alkane Torsion Drives v1.0
391 singlepoint 137932 MLPepper RECAP Optimized Fragments v1.0
392 optimization 531 OpenFF Iodine Fragment Opt v1.0
393 optimization 899 OpenFF Sulfur Optimization Training Coverage Supplement v1.0
394 singlepoint 1009 OpenFF NAGL2 ESP Timing Benchmark v1.0
395 singlepoint 2018 OpenFF NAGL2 ESP Timing Benchmark v1.1
396 optimization 319 OpenFF Sulfur Optimization Benchmarking Coverage Supplement v1.0
397 singlepoint 899 OpenFF Sulfur Hessian Training Coverage Supplement v1.0
398 singlepoint 223 OpenFF Aniline Para Hessian v1.0
Data in the QCArchive is organized into datasets. Let’s grab a molecule from an optimization dataset
dataset = client.get_dataset(
dataset_type="optimization",
dataset_name="Kinase Inhibitors: WBO Distributions",
)
Take an arbitrary entry from the collection.
entry = dataset.get_entry(entry_name=dataset.entry_names[-1])
We can view the entry in detail by looking at the dictionary representation.
entry.dict()
{'name': 'cs(=o)(=o)ccncc1ccc(o1)c2ccc3c(c2)c(ncn3)nc4ccc(c(c4)cl)occ5cccc(c5)f-99',
'initial_molecule': {'schema_name': 'qcschema_molecule',
'schema_version': 2,
'validated': True,
'symbols': array(['C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C',
'C', 'C', 'C', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', 'F', 'S',
'Cl', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H',
'H'], dtype='<U2'),
'geometry': array([[ 2.89795594e+01, -5.61363284e+00, -1.25287815e+00],
[ 1.93871961e+00, -2.32111000e-01, 6.33108960e-01],
[ 2.68259179e+01, -4.17201102e+00, -7.72020570e-01],
[ 3.00076085e+00, 1.52774996e+00, 2.29260082e+00],
[ 1.49570525e+01, -1.11862575e+00, 1.34408755e+00],
[ 1.75606324e+01, -1.34772255e+00, 1.00269997e+00],
[ 2.88520513e+01, -7.66237506e+00, -2.90660376e+00],
[-1.45658410e-01, -4.97426507e+00, -2.31821525e+00],
[ 3.61899100e-02, -6.76496484e+00, -4.29656036e+00],
[ 5.84179420e+00, -2.55574391e+00, 6.16566210e-01],
[ 2.44170261e+01, -6.82796240e+00, -3.59833171e+00],
[ 1.47074969e+01, -5.54975040e+00, 2.41523984e+00],
[ 8.88178633e+00, 2.63147751e+00, 5.47147212e+00],
[ 6.89608538e+00, -7.77978130e-01, 2.28537091e+00],
[ 3.36012318e+00, -2.27643411e+00, -2.05974190e-01],
[ 2.45446332e+01, -4.77917198e+00, -1.94467087e+00],
[ 5.47938551e+00, 1.29451410e+00, 3.14641803e+00],
[ 1.35304415e+01, -3.21962903e+00, 2.05047930e+00],
[ 1.87377014e+01, -3.67784426e+00, 1.36746581e+00],
[ 2.65707774e+01, -8.26954593e+00, -4.07925220e+00],
[ 1.73111436e+01, -5.77891513e+00, 2.07375878e+00],
[ 2.26685911e+00, -4.09354036e+00, -1.91777351e+00],
[ 2.54682332e+00, -6.85047933e+00, -4.96434856e+00],
[ 9.36421367e+00, -9.47290490e-01, 3.17955455e+00],
[ 4.24499629e+00, -9.67072630e+00, -1.91013351e+01],
[ 2.22291900e+01, -3.22681771e+00, -1.42596726e+00],
[ 3.96002561e+00, -8.30889479e+00, -6.85900096e+00],
[ 4.53186197e+00, -9.37210543e+00, -1.12315297e+01],
[ 3.74896337e+00, -8.84306732e+00, -1.39373610e+01],
[ 6.46872465e+00, 3.00571708e+00, 4.74281203e+00],
[ 1.04136881e+01, 7.31465300e-01, 4.77749054e+00],
[ 1.09100724e+01, -2.98799013e+00, 2.39376886e+00],
[ 3.09195245e+00, -7.87277816e+00, -9.41970199e+00],
[ 8.13355678e+00, -9.67381434e+00, -1.60552111e+01],
[ 4.99185979e+00, -1.32529008e+01, -1.57833896e+01],
[ 3.91927510e+00, -5.22754616e+00, -3.52328384e+00],
[ 2.12846153e+01, -3.84659395e+00, 1.02017540e+00],
[ 2.64477766e+01, -1.02372453e+01, -5.66668056e+00],
[ 5.54591265e+00, -1.05877799e+01, -1.61420006e+01],
[ 1.87184396e+01, -8.68283521e+00, 2.53736164e+00],
[ 3.07562580e+01, -5.14049891e+00, -3.40496340e-01],
[-4.65003000e-03, 2.67554700e-02, 1.66408900e-02],
[ 2.69431896e+01, -2.57930069e+00, 5.19680380e-01],
[ 1.84816744e+00, 3.11097314e+00, 2.92148844e+00],
[ 1.40590732e+01, 7.06064040e-01, 1.05468507e+00],
[ 1.86602796e+01, 2.96905580e-01, 4.52302520e-01],
[ 3.05285053e+01, -8.78494061e+00, -3.27991363e+00],
[-1.83066261e+00, -4.40414485e+00, -1.31444107e+00],
[-1.47796825e+00, -7.85402013e+00, -5.12826389e+00],
[ 6.94015282e+00, -4.16145113e+00, -5.01537200e-02],
[ 2.26487725e+01, -7.31482942e+00, -4.52177921e+00],
[ 1.35866831e+01, -7.18299521e+00, 2.96635289e+00],
[ 9.66759771e+00, 4.01679529e+00, 6.75738125e+00],
[ 5.25555885e+00, -1.06711597e+01, -2.05945160e+01],
[ 2.24609368e+00, -1.01756755e+01, -1.91179422e+01],
[ 4.47789379e+00, -7.63297605e+00, -1.93109470e+01],
[ 2.26644983e+01, -1.20419194e+00, -1.48882911e+00],
[ 2.07621173e+01, -3.60554472e+00, -2.83618480e+00],
[ 6.00251831e+00, -7.85696220e+00, -6.84065293e+00],
[ 3.83567181e+00, -1.03722487e+01, -6.53978249e+00],
[ 4.27006061e+00, -1.13641289e+01, -1.07410105e+01],
[ 6.52448646e+00, -8.89050259e+00, -1.09607687e+01],
[ 4.04834476e+00, -6.84946966e+00, -1.43964509e+01],
[ 1.76271456e+00, -9.31303167e+00, -1.42681283e+01],
[ 9.92112705e+00, -4.58168017e+00, 2.00994347e+00],
[ 3.28745938e+00, -6.00243631e+00, -9.86032950e+00]]),
'name': 'C29ClFH26N4O4S',
'identifiers': {'molecule_hash': '58aaa33a5e7d9445e4276a015de257c3d6bde558',
'molecular_formula': 'C29ClFH26N4O4S'},
'molecular_charge': 0.0,
'molecular_multiplicity': 1,
'masses': array([12. , 12. , 12. , 12. , 12. ,
12. , 12. , 12. , 12. , 12. ,
12. , 12. , 12. , 12. , 12. ,
12. , 12. , 12. , 12. , 12. ,
12. , 12. , 12. , 12. , 12. ,
12. , 12. , 12. , 12. , 14.003074 ,
14.003074 , 14.003074 , 14.003074 , 15.99491462, 15.99491462,
15.99491462, 15.99491462, 18.99840316, 31.97207117, 34.96885268,
1.00782503, 1.00782503, 1.00782503, 1.00782503, 1.00782503,
1.00782503, 1.00782503, 1.00782503, 1.00782503, 1.00782503,
1.00782503, 1.00782503, 1.00782503, 1.00782503, 1.00782503,
1.00782503, 1.00782503, 1.00782503, 1.00782503, 1.00782503,
1.00782503, 1.00782503, 1.00782503, 1.00782503, 1.00782503,
1.00782503]),
'real': array([ True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True]),
'atom_labels': array(['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', ''],
dtype='<U1'),
'atomic_numbers': array([ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8,
8, 8, 8, 9, 16, 17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
dtype=int16),
'mass_numbers': array([12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 14, 14, 14, 14, 16,
16, 16, 16, 19, 32, 35, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
dtype=int16),
'connectivity': [(0, 2, 2.0),
(0, 6, 1.0),
(0, 40, 1.0),
(1, 3, 2.0),
(1, 14, 1.0),
(1, 41, 1.0),
(2, 15, 1.0),
(2, 42, 1.0),
(3, 16, 1.0),
(3, 43, 1.0),
(4, 5, 2.0),
(4, 17, 1.0),
(4, 44, 1.0),
(5, 18, 1.0),
(5, 45, 1.0),
(6, 19, 2.0),
(6, 46, 1.0),
(7, 8, 1.0),
(7, 21, 2.0),
(7, 47, 1.0),
(8, 22, 2.0),
(8, 48, 1.0),
(9, 13, 1.0),
(9, 14, 2.0),
(9, 49, 1.0),
(10, 15, 2.0),
(10, 19, 1.0),
(10, 50, 1.0),
(11, 17, 2.0),
(11, 20, 1.0),
(11, 51, 1.0),
(12, 29, 1.0),
(12, 30, 2.0),
(12, 52, 1.0),
(13, 16, 1.0),
(13, 23, 2.0),
(14, 21, 1.0),
(15, 25, 1.0),
(16, 29, 2.0),
(17, 31, 1.0),
(18, 20, 2.0),
(18, 36, 1.0),
(19, 37, 1.0),
(20, 39, 1.0),
(21, 35, 1.0),
(22, 26, 1.0),
(22, 35, 1.0),
(23, 30, 1.0),
(23, 31, 1.0),
(24, 38, 1.0),
(24, 53, 1.0),
(24, 54, 1.0),
(24, 55, 1.0),
(25, 36, 1.0),
(25, 56, 1.0),
(25, 57, 1.0),
(26, 32, 1.0),
(26, 58, 1.0),
(26, 59, 1.0),
(27, 28, 1.0),
(27, 32, 1.0),
(27, 60, 1.0),
(27, 61, 1.0),
(28, 38, 1.0),
(28, 62, 1.0),
(28, 63, 1.0),
(31, 64, 1.0),
(32, 65, 1.0),
(33, 38, 2.0),
(34, 38, 2.0)],
'fragments': [array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65],
dtype=int32)],
'fragment_charges': [0.0],
'fragment_multiplicities': [1],
'fix_com': True,
'fix_orientation': True,
'provenance': {'creator': 'QCElemental',
'version': 'v0.11.1',
'routine': 'qcelemental.molparse.from_schema'},
'id': 9590661,
'extras': {}},
'additional_keywords': {},
'attributes': {'inchi_key': 'BCFGMOOMADDAQU-UHFFFAOYSA-N',
'provenance': 'cmiles_v0.1.5+1.gdbd63e8_openeye_2019.Apr.2',
'standard_inchi': 'InChI=1S/C29H26ClFN4O4S/c1-40(36,37)12-11-32-16-23-7-10-27(39-23)20-5-8-26-24(14-20)29(34-18-33-26)35-22-6-9-28(25(30)15-22)38-17-19-3-2-4-21(31)13-19/h2-10,13-15,18,32H,11-12,16-17H2,1H3,(H,33,34,35)',
'canonical_smiles': 'CS(=O)(=O)CCNCc1ccc(o1)c2ccc3c(c2)c(ncn3)Nc4ccc(c(c4)Cl)OCc5cccc(c5)F',
'molecular_formula': 'C29H26ClFN4O4S',
'canonical_isomeric_smiles': 'CS(=O)(=O)CCNCc1ccc(o1)c2ccc3c(c2)c(ncn3)Nc4ccc(c(c4)Cl)OCc5cccc(c5)F',
'unique_protomer_representation': 'CS(=O)(=O)CCNCc1ccc(o1)c2ccc3c(c2)c(ncn3)Nc4ccc(c(c4)Cl)OCc5cccc(c5)F',
'unique_tautomer_representation': 'CS(=O)(=O)CCNCc1ccc(-c2ccc3[nH]cnc(=Nc4ccc(OCc5cccc(F)c5)c(Cl)c4)c3c2)o1',
'canonical_explicit_hydrogen_smiles': '[H]c1c(c(c(c(c1[H])F)[H])C([H])([H])Oc2c(c(c(c(c2Cl)[H])N([H])c3c4c(c(c(c(c4nc(n3)[H])[H])[H])c5c(c(c(o5)C([H])([H])N([H])C([H])([H])C([H])([H])S(=O)(=O)C([H])([H])[H])[H])[H])[H])[H])[H])[H]',
'canonical_isomeric_explicit_hydrogen_smiles': '[H]c1c(c(c(c(c1[H])F)[H])C([H])([H])Oc2c(c(c(c(c2Cl)[H])N([H])c3c4c(c(c(c(c4nc(n3)[H])[H])[H])c5c(c(c(o5)C([H])([H])N([H])C([H])([H])C([H])([H])S(=O)(=O)C([H])([H])[H])[H])[H])[H])[H])[H])[H]',
'canonical_isomeric_explicit_hydrogen_mapped_smiles': '[H:41][c:1]1[c:3]([c:16]([c:11]([c:20]([c:7]1[H:47])[F:38])[H:51])[C:26]([H:57])([H:58])[O:37][c:19]2[c:6]([c:5]([c:18]([c:12]([c:21]2[Cl:40])[H:52])[N:32]([H:65])[c:24]3[c:14]4[c:10]([c:15]([c:2]([c:4]([c:17]4[n:30][c:13]([n:31]3)[H:53])[H:44])[H:42])[c:22]5[c:8]([c:9]([c:23]([o:36]5)[C:27]([H:59])([H:60])[N:33]([H:66])[C:28]([H:61])([H:62])[C:29]([H:63])([H:64])[S:39](=[O:34])(=[O:35])[C:25]([H:54])([H:55])[H:56])[H:49])[H:48])[H:50])[H:45])[H:46])[H:43]'},
'comment': None}
Now we can make a molecule using a few different input options.
# first make a molecule using this record object
molecule_from_entry = Molecule.from_qcschema(entry)
# we could have also used the dictionary representation of the object
molecule_from_dict = Molecule.from_qcschema(entry.dict())
assert molecule_from_entry == molecule_from_dict
molecule = molecule_from_entry
# first let's get the initial molecule from the database
initial_molecule = client.get_molecules(entry.initial_molecule.id)
# note that this molecule uses an object model from QCArchive, _not_ the toolkit
print(type(initial_molecule))
# we check that the molecule has been ordered to match the ordering used in the data base
# by printing out the atomic numbers of both objects in order
for atoms in zip(molecule.atoms, initial_molecule.atomic_numbers):
print(atoms[0].atomic_number, atoms[1])
assert atoms[0].atomic_number == atoms[1]
# can compare other things, too
print(molecule.to_hill_formula(), initial_molecule.get_molecular_formula())
# QCArchive molecules don't store all information the
# toolkit needs, like bond orders and formal charges;
# that's why there is a Molecule.from_qcschema() method at all
<class 'qcelemental.models.molecule.Molecule'>
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
6 6
7 7
7 7
7 7
7 7
8 8
8 8
8 8
8 8
9 9
16 16
17 17
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
C29H26ClFN4O4S C29ClFH26N4O4S
# we can also compare the graph representations of the molecules to make sure they are in the same order
import networkx as nx
# make a graph of the initial molecule using newtorkx and the data in the record
initial_network = nx.Graph()
for index, atomic_number in enumerate(initial_molecule.atomic_numbers):
initial_network.add_node(index, atomic_number=atomic_number)
for bond in initial_molecule.connectivity:
initial_network.add_edge(*bond[:2])
# now we can use the new isomorphic check to get the atom mapping
isomorphic, atom_map = Molecule.are_isomorphic(
molecule,
initial_network,
return_atom_map=True,
aromatic_matching=False,
formal_charge_matching=False,
bond_order_matching=False,
bond_stereochemistry_matching=False,
atom_stereochemistry_matching=False,
)
# we can check if the graph was found to be isomorphic and whether or not the
# atom mappings are in the same order
assert isomorphic
print(atom_map)
for index1, index2 in atom_map.items():
assert index1 == index2
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65}
mol = Molecule.from_qcschema(entry)
mol
# OpenFF Toolkit `Molecule` objects can be converted back into QCArchive molecules,
# as long as there are conformer(s)
qc_molecule = molecule.to_qcschema()
qc_molecule
This transformation unlocks functionality of QCEngine (computing energies, gradients, hessians, etc. with a variety of different methods).
Here we will try and compute the energy using RDKit (only run this cell if QCEngine is installed.)
import qcengine
# set up the RDKit task
rdkit_task = {
"schema_name": "qcschema_input",
"schema_version": 2,
"molecule": qc_molecule,
"driver": "energy",
"model": {"method": "uff", "basis": None},
"keywords": {"scf_type": "df"},
}
# now lets compute the energy using qcengine and RDKit and print the result
result = qcengine.compute(rdkit_task, "rdkit")
# note the result is in QC units of hartrees
print(result.return_result)
0.05930479138457709