Python rdkit.Chem.AllChem.MolFromSmiles() Examples
The following are 23
code examples of rdkit.Chem.AllChem.MolFromSmiles().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem.AllChem
, or try the search function
.
Example #1
Source File: encoder.py From mhfp with MIT License | 6 votes |
def secfp_from_smiles( in_smiles, length=2048, radius=3, rings=True, kekulize=True, sanitize=False ): """Creates a folded binary vector fingerprint of a input SMILES string. Arguments: in_smiles {string} -- A valid SMILES string length {int} -- The length of the folded fingerprint (default: {2048}) radius {int} -- The MHFP radius (a radius of 3 corresponds to SECFP6) (default: {3}) rings {boolean} -- Whether or not to include rings in the shingling (default: {True}) kekulize {boolean} -- Whether or not to kekulize the extracted SMILES (default: {True}) sanitize {boolean} -- Whether or not to sanitize the SMILES when parsing it using RDKit (default: {False}) Returns: numpy.ndarray -- The folded fingerprint. """ return MHFPEncoder.secfp_from_mol( AllChem.MolFromSmiles(in_smiles, sanitize=sanitize), length=length, radius=radius, rings=rings, kekulize=kekulize, )
Example #2
Source File: encoder.py From mhfp with MIT License | 6 votes |
def shingling_from_smiles( in_smiles, radius=3, rings=True, kekulize=True, min_radius=1, sanitize=False ): """Creates a molecular shingling from a SMILES string. Arguments: in_smiles {string} -- A valid SMILES string radius {int} -- The MHFP radius (a radius of 3 corresponds to MHFP6) (default: {3}) rings {boolean} -- Whether or not to include rings in the shingling (default: {True}) kekulize {boolean} -- Whether or not to kekulize the extracted SMILES (default: {True}) min_radius {int} -- The minimum radius that is used to extract n-grams (default: {1}) sanitize {boolean} -- Whether or not to sanitize the SMILES when parsing it using RDKit (default: {False}) Returns: list -- The molecular shingling. """ return MHFPEncoder.shingling_from_mol( AllChem.MolFromSmiles(in_smiles, sanitize=sanitize), rings=rings, radius=radius, kekulize=True, min_radius=min_radius, )
Example #3
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 6 votes |
def NP_score(smile): mol = Chem.MolFromSmiles(smile) fp = Chem.GetMorganFingerprint(mol, 2) bits = fp.GetNonzeroElements() # calculating the score score = 0. for bit in bits: score += NP_model.get(bit, 0) score /= float(mol.GetNumAtoms()) # preventing score explosion for exotic molecules if score > 4: score = 4. + math.log10(score - 4. + 1.) if score < -4: score = -4. - math.log10(-4. - score + 1.) val = np.clip(remap(score, -3, 1), 0.0, 1.0) return val
Example #4
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 6 votes |
def batch_mixed_diversity(smiles, set_smiles): # set smiles rand_smiles = random.sample(set_smiles, 100) rand_mols = [Chem.MolFromSmiles(s) for s in rand_smiles] fps = [Chem.GetMorganFingerprintAsBitVect( m, 4, nBits=2048) for m in rand_mols] # gen smiles rand_gen_smiles = random.sample(smiles, 500) gen_mols = [Chem.MolFromSmiles(s) for s in smiles] fps = [Chem.GetMorganFingerprintAsBitVect( m, 4, nBits=2048) for m in gen_mols] vals = [diversity(s, fps) + diversity(s, fps) if verify_sequence(s) else 0.0 for s in smiles] return vals
Example #5
Source File: mol_utils.py From chemical_vae with Apache License 2.0 | 5 votes |
def canon_smiles(smi): return Chem.MolToSmiles(Chem.MolFromSmiles(smi), isomericSmiles=True, canonical=True)
Example #6
Source File: mol_utils.py From chemical_vae with Apache License 2.0 | 5 votes |
def CheckSmiFeasible(smi): # See if you can make a smiles with mol object # if you can't, then skip try: get_molecule_smi(Chem.MolFromSmiles(smi)) except: return False return True
Example #7
Source File: mol_utils.py From chemical_vae with Apache License 2.0 | 5 votes |
def verify_smiles(smile): return (smile != '') and pd.notnull(smile) and (Chem.MolFromSmiles(smile) is not None)
Example #8
Source File: mol_utils.py From chemical_vae with Apache License 2.0 | 5 votes |
def smiles_to_mol(smiles): try: mol = Chem.MolFromSmiles(smiles) return mol except: pass return None
Example #9
Source File: 2_to_fingerprint.py From mhfp with MIT License | 5 votes |
def convert(subset): target = '/cluster/chembl/chembl.' + str(subset) + '.smi' actives = pd.read_csv(target, sep=' ', usecols=[0], header=None) mh = MHFPEncoder() with open('/cluster/chembl/chembl.' + str(subset) + '.mhfp6', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, mh.encode_mol(mol))) f.write(fp_vals + '\n') with open('/cluster/chembl/chembl.' + str(subset) + '.mhecfp4', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, mh.from_sparse_array([*AllChem.GetMorganFingerprint(mol, 2).GetNonzeroElements()]))) f.write(fp_vals + '\n') with open('/cluster/chembl/chembl.' + str(subset) + '.ecfp4', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048))) f.write(fp_vals + '\n')
Example #10
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def substructure_match(smile, train_smiles=None, sub_mol=None): mol = Chem.MolFromSmiles(smile) val = mol.HasSubstructMatch(sub_mol) return int(val) #====== NP-likeliness
Example #11
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def druglikeliness(smile, train_smiles): try: val = qed(Chem.MolFromSmiles(smile)) return val except: return 0.0 return val
Example #12
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def diversity(smile, fps): val = 0.0 low_rand_dst = 0.9 mean_div_dst = 0.945 ref_mol = Chem.MolFromSmiles(smile) ref_fps = Chem.GetMorganFingerprintAsBitVect(ref_mol, 4, nBits=2048) dist = DataStructs.BulkTanimotoSimilarity( ref_fps, fps, returnDistance=True) mean_dist = np.mean(np.array(dist)) val = remap(mean_dist, low_rand_dst, mean_div_dst) val = np.clip(val, 0.0, 1.0) return val #==============
Example #13
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def batch_diversity(smiles, set_smiles): rand_smiles = random.sample(set_smiles, 100) rand_mols = [Chem.MolFromSmiles(s) for s in rand_smiles] fps = [Chem.GetMorganFingerprintAsBitVect( m, 4, nBits=2048) for m in rand_mols] vals = [diversity(s, fps) if verify_sequence(s) else 0.0 for s in smiles] return vals
Example #14
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def verify_sequence(smile): mol = Chem.MolFromSmiles(smile) return smile != '' and mol is not None and mol.GetNumAtoms() > 1 # def build_vocab(smiles, pad_char='_', start_char='^'): # i = 1 # char_dict, ord_dict = {start_char: 0}, {0: start_char} # for smile in smiles: # for c in smile: # if c not in char_dict: # char_dict[c] = i # ord_dict[i] = c # i += 1 # char_dict[pad_char], ord_dict[i] = i, pad_char # return char_dict, ord_dict # def pad(smile, n, pad_char='_'): # if n < len(smile): # return smile # return smile + pad_char * (n - len(smile)) # def unpad(smile, pad_char='_'): return smile.rstrip(pad_char) # def encode(smile, max_len, char_dict): return [ # char_dict[c] for c in pad(smile, max_len)] # def decode(ords, ord_dict): return unpad( # ''.join([ord_dict[o] for o in ords]))
Example #15
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def canon_smile(smile): return MolToSmiles(MolFromSmiles(smile))
Example #16
Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def setUp(self): mol = Chem.MolFromSmiles('c1cocc1') CtabModel.objects.create(ctab=Chem.MolToMolBlock(mol)) CtabModel.objects.create(ctab='rubbish')
Example #17
Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_pkl_io(self): bfps = {} for smiles in SMILES_SAMPLE: mol = Chem.MolFromSmiles(smiles) bfp = Chem.GetMorganFingerprintAsBitVect(mol, 2, 512) obj = BfpModel.objects.create(bfp=bfp) bfps[obj.pk] = bfp for obj in BfpModel.objects.all(): self.assertTrue(obj.pk in bfps) ibfp = bfps[obj.pk] obfp = obj.bfp self.assertEqual(list(ibfp.GetOnBits()), list(obfp.GetOnBits()))
Example #18
Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_issubstruct_lookup(self): objs = MoleculeModel.objects.filter(molecule__issubstruct='CCN1c2ccccc2Sc2ccccc21') self.assertEqual(objs.count(), 2) objs = MoleculeModel.objects.filter(molecule__issubstruct='CC[N+]([O-])(CC)CCCN1c2ccccc2S(=O)c2ccccc21') self.assertEqual(objs.count(), 4) objs = MoleculeModel.objects.filter(molecule__issubstruct=Chem.MolFromSmiles('CC[N+]([O-])(CC)CCCN1c2ccccc2S(=O)c2ccccc21')) self.assertEqual(objs.count(), 4)
Example #19
Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_hassubstruct_lookup(self): objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=C(C)C=CC=C1') self.assertEqual(objs.count(), 61) objs = MoleculeModel.objects.filter( molecule__hassubstruct=MOL_FROM_SMILES(Value('C1=C(C)C=CC=C1'))) self.assertEqual(objs.count(), 61) objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=CC=CC=C1') cnt1 = objs.count() self.assertEqual(cnt1, 70) objs = MoleculeModel.objects.filter(molecule__hassubstruct='C1=CN=CC=C1') cnt2 = objs.count() self.assertEqual(cnt2, 7) objs = MoleculeModel.objects.filter( Q(molecule__hassubstruct='C1=CC=CC=C1') | Q(molecule__hassubstruct='C1=CN=CC=C1'), ) cnt3 = objs.count() self.assertEqual(cnt3, 73) self.assertTrue(cnt3 <= cnt1 + cnt2) qmol = QMOL(Value('c1[c,n]cccc1')) objs = MoleculeModel.objects.filter(molecule__hassubstruct=qmol) self.assertEqual(objs.count(), cnt3) objs = MoleculeModel.objects.filter(molecule__hassubstruct=Chem.MolFromSmiles('C1=CN=CC=C1')) cnt4 = objs.count() self.assertEqual(cnt2, 7)
Example #20
Source File: tests.py From django-rdkit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_exact_lookup(self): objs = MoleculeModel.objects.filter(molecule='COC(c1ccccc1)c1ccccc1') self.assertEqual(objs.count(), 1) objs = MoleculeModel.objects.filter(molecule='Nc1ccc(Cl)nc1') self.assertEqual(objs.count(), 1) objs = MoleculeModel.objects.filter(molecule=Chem.MolFromSmiles('Nc1ccc(Cl)nc1')) self.assertEqual(objs.count(), 1) objs = MoleculeModel.objects.filter(molecule=MOL_FROM_SMILES(Value('Nc1ccc(Cl)nc1'))) self.assertEqual(objs.count(), 1)
Example #21
Source File: neural_fp.py From conv_qsar_fast with MIT License | 5 votes |
def sizeAttributeVectors(molecular_attributes = False): m = AllChem.MolFromSmiles('CC') g = molToGraph(m, molecular_attributes = molecular_attributes) a = g.nodes[0] b = g.edges[0] return len(a.attributes), len(b.attributes)
Example #22
Source File: neural_fp.py From conv_qsar_fast with MIT License | 5 votes |
def sizeAttributeVector(molecular_attributes = False): m = AllChem.MolFromSmiles('CC') g = molToGraph(m, molecular_attributes = molecular_attributes) a = g.nodes[0] b = g.edges[0] return len(a.attributes) + len(b.attributes)
Example #23
Source File: load_lowe_examples_into_db_details.py From ochem_predict_nn with MIT License | 4 votes |
def mol_to_dic(node, withAmounts = False): '''Converts a node containing molecule information into a dictionary''' dic = {} # Get name dic['name'] = str(node.getElementsByTagName('name')[0].firstChild.nodeValue) # If exact entity match, more data is available #print(node.toprettyxml()) #entityType = node.getElementsByTagName('dl:entityType')[0].firstChild.nodeValue #if entityType == 'exact' or entityType == 'definiteReference': identifiers = { child.attributes.getNamedItem('dictRef').value : \ child.attributes.getNamedItem('value').value \ for child in node.getElementsByTagName('identifier') } if 'cml:inchi' in identifiers.keys(): mol = AllChem.MolFromInchi(str(identifiers['cml:inchi'])) elif 'cml:smiles' in identifiers.keys(): mol = AllChem.MolFromSmiles(str(identifiers['cml:smiles'])) else: print('identifiers: {}'.format(identifiers.keys())) raise ValueError('No molecular identifier for {}'.format(dic['name'])) if not mol: raise ValueError('Couldnt parse molecule: {}'.format(identifiers)) Chem.SanitizeMol(mol) dic['smiles'] = AllChem.MolToSmiles(mol, isomericSmiles=True) dic['inchi'] = AllChem.MolToInchi(mol) # elif entityType == 'chemicalClass': # pass # name is all we get # else: # raise ValueError('Unknown entityType for molecule: {}'.format(entityType)) # Quantity? if withAmounts: amounts = { child.attributes.getNamedItem('units').value : \ child.firstChild.nodeValue \ for child in node.getElementsByTagName('amount') } if 'unit:percentYield' in amounts.keys(): dic['yield'] = float(amounts['unit:percentYield']) if 'unit:g' in amounts.keys(): dic['amount(g)'] = float(amounts['unit:g']) return dic