Python rdkit.Chem.MolFromSmiles() Examples
The following are 30
code examples of rdkit.Chem.MolFromSmiles().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem
, or try the search function
.
Example #1
Source File: properties.py From hgraph2graph with MIT License | 8 votes |
def similarity(a, b): if a is None or b is None: return 0.0 amol = Chem.MolFromSmiles(a) bmol = Chem.MolFromSmiles(b) if amol is None or bmol is None: return 0.0 fp1 = AllChem.GetMorganFingerprintAsBitVect(amol, 2, nBits=2048, useChirality=False) fp2 = AllChem.GetMorganFingerprintAsBitVect(bmol, 2, nBits=2048, useChirality=False) return DataStructs.TanimotoSimilarity(fp1, fp2)
Example #2
Source File: dataset.py From hgraph2graph with MIT License | 7 votes |
def __getitem__(self, idx): mol = Chem.MolFromSmiles(self.batches[idx]) leaves = get_leaves(mol) smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] ) smiles_list = sorted(list(smiles_list)) #To ensure reproducibility safe_list = [] for s in smiles_list: hmol = MolGraph(s) ok = True for node,attr in hmol.mol_tree.nodes(data=True): if attr['label'] not in self.vocab.vmap: ok = False if ok: safe_list.append(s) if len(safe_list) > 0: return MolGraph.tensorize(safe_list, self.vocab, self.avocab) else: return None
Example #3
Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License | 6 votes |
def generateAtomInvariant(mol): """ >>> generateAtomInvariant(Chem.MolFromSmiles("Cc1ncccc1")) [341294046, 3184205312, 522345510, 1545984525, 1545984525, 1545984525, 1545984525] """ num_atoms = mol.GetNumAtoms() invariants = [0]*num_atoms for i,a in enumerate(mol.GetAtoms()): descriptors=[] descriptors.append(a.GetAtomicNum()) descriptors.append(a.GetTotalDegree()) descriptors.append(a.GetTotalNumHs()) descriptors.append(a.IsInRing()) descriptors.append(a.GetIsAromatic()) invariants[i]=hash(tuple(descriptors))& 0xffffffff return invariants #------------------------------------ # # doctest boilerplate #
Example #4
Source File: test_rdkit_grid_features.py From deepchem with MIT License | 6 votes |
def setUp(self): current_dir = os.path.dirname(os.path.realpath(__file__)) # simple flat ring from rdkit.Chem import MolFromSmiles self.cycle4 = MolFromSmiles('C1CCC1') self.cycle4.Compute2DCoords() # load and sanitize two real molecules _, self.prot = rgf.load_molecule( os.path.join(current_dir, '3ws9_protein_fixer_rdkit.pdb'), add_hydrogens=False, calc_charges=False, sanitize=True) _, self.lig = rgf.load_molecule( os.path.join(current_dir, '3ws9_ligand.sdf'), add_hydrogens=False, calc_charges=False, sanitize=True)
Example #5
Source File: chemutils.py From hgraph2graph with MIT License | 6 votes |
def copy_edit_mol(mol): new_mol = Chem.RWMol(Chem.MolFromSmiles('')) for atom in mol.GetAtoms(): new_atom = copy_atom(atom) new_mol.AddAtom(new_atom) for bond in mol.GetBonds(): a1 = bond.GetBeginAtom().GetIdx() a2 = bond.GetEndAtom().GetIdx() bt = bond.GetBondType() new_mol.AddBond(a1, a2, bt) #if bt == Chem.rdchem.BondType.AROMATIC and not aromatic: # bt = Chem.rdchem.BondType.SINGLE return new_mol
Example #6
Source File: chemTopicModel.py From CheTo with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _generateMolFrags(datachunk, vocabulary, fragmentMethod, fragIdx=None): if fragIdx is None and fragmentMethod == 'Brics': return result={} for idx, smi in datachunk: mol = Chem.MolFromSmiles(str(smi)) if mol == None: continue fp,_=_generateFPs(mol,fragmentMethod=fragmentMethod) if fp is None: continue tmp={} for k,v in fp.items(): if k not in vocabulary: continue # save memory: for BRICS use index instead of long complicated SMILES if fragmentMethod == 'Brics': tmp[fragIdx[k]]=v else: tmp[k]=v result[idx]=tmp return result ########### chemical topic modeling class ###################
Example #7
Source File: dataset_utils.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __getitem__(self, item): """ Returns an rdkit mol object :param item: :return: """ smiles = self.df['smiles'][item] mol = Chem.MolFromSmiles(smiles) return mol # # TESTS # path = 'gdb13.rand1M.smi.gz' # dataset = gdb_dataset(path) # # print(len(dataset)) # mol,_ = dataset[0] # graph = mol_to_nx(mol) # graph_sub = graph.subgraph([0,3,5,7,9]) # graph_sub_new = nx.convert_node_labels_to_integers(graph_sub,label_attribute='old') # graph_sub_node = graph_sub.nodes() # graph_sub_new_node = graph_sub_new.nodes() # matrix = nx.adjacency_matrix(graph_sub) # np_matrix = matrix.toarray() # print(np_matrix) # print('end')
Example #8
Source File: align.py From hgraph2graph with MIT License | 6 votes |
def align(xy_tuple): x,y = xy_tuple xmol, ymol = Chem.MolFromSmiles(x), Chem.MolFromSmiles(y) x = Chem.MolToSmiles(xmol, isomericSmiles=False) xmol = Chem.MolFromSmiles(x) xleaf = get_leaves(xmol) yleaf = get_leaves(ymol) best_i,best_j = 0,0 best = 1000000 for i in xleaf: for j in yleaf: new_x = Chem.MolToSmiles(xmol, rootedAtAtom=i, isomericSmiles=False) new_y = Chem.MolToSmiles(ymol, rootedAtAtom=j, isomericSmiles=False) le = min(len(new_x), len(new_y)) // 2 dist = Levenshtein.distance(new_x[:le], new_y[:le]) if dist < best: best_i, best_j = i, j best = dist return Chem.MolToSmiles(xmol, rootedAtAtom=best_i, isomericSmiles=False), Chem.MolToSmiles(ymol, rootedAtAtom=best_j, isomericSmiles=False)
Example #9
Source File: data_loader.py From PADME with MIT License | 6 votes |
def featurize_smiles_np(arr, featurizer, log_every_N=1000, verbose=True): """Featurize individual compounds in a numpy array. Given a featurizer that operates on individual chemical compounds or macromolecules, compute & add features for that compound to the features array """ features = [] for ind, elem in enumerate(arr.tolist()): mol = Chem.MolFromSmiles(elem) if mol: new_order = rdmolfiles.CanonicalRankAtoms(mol) mol = rdmolops.RenumberAtoms(mol, new_order) if ind % log_every_N == 0: log("Featurizing sample %d" % ind, verbose) features.append(featurizer.featurize([mol])) valid_inds = np.array( [1 if elt.size > 0 else 0 for elt in features], dtype=bool) features = [elt for (is_valid, elt) in zip(valid_inds, features) if is_valid] features = np.squeeze(np.array(features)) return features.reshape(-1,)
Example #10
Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License | 6 votes |
def getSubstructSmi(mol,env,propsToSmiles=True): """ >>> getSubstructSmi(Chem.MolFromSmiles('Cc1ncccc1'),((0,1,2))) '[cH;R;D2]:[n;R;D2]:[c;R;D3]-[CH3;R0;D1]' """ atomsToUse=set() if not len(env): return '' for b in env: atomsToUse.add(mol.GetBondWithIdx(b).GetBeginAtomIdx()) atomsToUse.add(mol.GetBondWithIdx(b).GetEndAtomIdx()) # no isomeric smiles since we don't include that in the fingerprints smi = Chem.MolFragmentToSmiles(mol,atomsToUse,isomericSmiles=False, bondsToUse=env,allHsExplicit=True, allBondsExplicit=True) if propsToSmiles: order = eval(mol.GetProp("_smilesAtomOutputOrder")) smi = writePropsToSmiles(mol,smi,order) return smi
Example #11
Source File: drd2_scorer.py From hgraph2graph with MIT License | 6 votes |
def get_score(smile): if clf_model is None: load_model() mol = Chem.MolFromSmiles(smile) if mol: fp = fingerprints_from_mol(mol) score = clf_model.predict_proba(fp)[:, 1] return float(score) return 0.0
Example #12
Source File: dataset.py From hgraph2graph with MIT License | 6 votes |
def __getitem__(self, idx): mol = Chem.MolFromSmiles(self.batches[idx]) leaves = get_leaves(mol) smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] ) smiles_list = sorted(list(smiles_list)) #To ensure reproducibility safe_list = [] for s in smiles_list: hmol = MolGraph(s) ok = True for node,attr in hmol.mol_tree.nodes(data=True): if attr['label'] not in self.vocab.vmap: ok = False if ok: safe_list.append(s) if len(safe_list) > 0: return MolGraph.tensorize(safe_list, self.vocab, self.avocab) else: return None
Example #13
Source File: chemutils.py From hgraph2graph with MIT License | 6 votes |
def get_clique_mol(mol, atoms): smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True) new_mol = Chem.MolFromSmiles(smiles, sanitize=False) new_mol = copy_edit_mol(new_mol).GetMol() new_mol = sanitize(new_mol) #if tmp_mol is not None: new_mol = tmp_mol return new_mol
Example #14
Source File: test_tensorflowEncoders.py From deepchem with MIT License | 6 votes |
def test_fit(self): tf_enc = TensorflowMoleculeEncoder.zinc_encoder() smiles = [ "Cn1cnc2c1c(=O)n(C)c(=O)n2C", "O=C(O)[C@@H]1/C(=C/CO)O[C@@H]2CC(=O)N21", "Cn1c2nncnc2c(=O)n(C)c1=O", "Cn1cnc2c1c(=O)[nH]c(=O)n2C", "NC(=O)c1ncc[nH]c1=O", "O=C1OCc2c1[nH]c(=O)[nH]c2=O", "Cn1c(N)c(N)c(=O)n(C)c1=O", "CNc1nc2c([nH]1)c(=O)[nH]c(=O)n2C", "CC(=O)N1CN(C(C)=O)[C@@H](O)[C@@H]1O", "CC(=O)N1CN(C(C)=O)[C@H](O)[C@H]1O", "Cc1[nH]c(=O)[nH]c(=O)c1CO", "O=C1NCCCc2c1no[n+]2[O-]", "Cc1nc(C(N)=O)c(N)n1CCO", "O=c1[nH]cc(N2CCOCC2)c(=O)[nH]1" ] featurizer = dc.feat.one_hot.OneHotFeaturizer(zinc_charset, 120) mols = [Chem.MolFromSmiles(x) for x in smiles] features = featurizer.featurize(mols) dataset = DiskDataset.from_numpy(features, features) prediction = tf_enc.predict_on_batch(dataset.X) tf_de = TensorflowMoleculeDecoder.zinc_decoder() one_hot_decoded = tf_de.predict_on_batch(prediction) decoded_smiles = featurizer.untransform(one_hot_decoded) assert len(decoded_smiles) == len(smiles)
Example #15
Source File: mpnn.py From deepchem with MIT License | 6 votes |
def construct_multigraph(smile): g = OrderedDict({}) h = OrderedDict({}) molecule = Chem.MolFromSmiles(smile) for i in xrange(0, molecule.GetNumAtoms()): atom_i = molecule.GetAtomWithIdx(i) h[i] = Variable(torch.FloatTensor(dc.feat.graph_features.atom_features(atom_i))).view(1, 75) for j in xrange(0, molecule.GetNumAtoms()): e_ij = molecule.GetBondBetweenAtoms(i, j) if e_ij != None: e_ij = map(lambda x: 1 if x == True else 0, dc.feat.graph_features.bond_features(e_ij)) # ADDED edge feat e_ij = Variable(torch.FloatTensor(e_ij).view(1, 6)) atom_j = molecule.GetAtomWithIdx(j) if i not in g: g[i] = [] g[i].append( (e_ij, j) ) return g, h
Example #16
Source File: eval_mol2vec_results.py From deepchem with MIT License | 6 votes |
def main() : model = models.KeyedVectors.load_word2vec_format("vec.txt") embeddings = list() # Using canonical smiles for glycine, as in original research paper mol = Chem.MolFromSmiles("C(C(=O)O)N") try: info = {} rdMolDescriptors.GetMorganFingerprint(mol, 0, bitInfo=info) keys = info.keys() keys_list = list(keys) totalvec = np.zeros(200) for k in keys_list: wordvec = model.wv[str(k)] totalvec = np.add(totalvec, wordvec) embeddings.append(totalvec) except Exception as e: print(e) pass print(embeddings[0])
Example #17
Source File: data_loader.py From PADME with MIT License | 5 votes |
def featurize_smiles_df(df, featurizer, field, log_every_N=1000, verbose=True): """Featurize individual compounds in dataframe. Given a featurizer that operates on individual chemical compounds or macromolecules, compute & add features for that compound to the features dataframe """ sample_elems = df[field].tolist() features = [] stderr_fileno = sys.stderr.fileno() stderr_save = os.dup(stderr_fileno) stderr_fd = open('./logs/error.log', 'a') os.dup2(stderr_fd.fileno(), stderr_fileno) for ind, elem in enumerate(sample_elems): mol = Chem.MolFromSmiles(elem) # TODO (ytz) this is a bandage solution to reorder the atoms so # that they're always in the same canonical order. Presumably this # should be correctly implemented in the future for graph mols. if mol: new_order = rdmolfiles.CanonicalRankAtoms(mol) mol = rdmolops.RenumberAtoms(mol, new_order) if ind % log_every_N == 0: log("Featurizing sample %d" % ind, verbose) features.append(featurizer.featurize([mol], smiles=elem)) stderr_fd.close() os.dup2(stderr_save, stderr_fileno) valid_inds = np.array( [1 if elt.size > 0 else 0 for elt in features], dtype=bool) features = [elt for (is_valid, elt) in zip(valid_inds, features) if is_valid] #return np.squeeze(np.array(features), axis=1), valid_inds return np.array(features), valid_inds
Example #18
Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def getMorganEnvironment(mol, bitInfo, fp=None, minRad=0): """ >>> m = Chem.MolFromSmiles('CC(O)C') >>> bi = {} >>> fp = AllChem.GetMorganFingerprintAsBitVect(m,2,2048,bitInfo=bi) >>> getMorganEnvironment(m,bi) defaultdict(<class 'list'>, {1057: [[], []], 227: [[1]], 709: [[0, 1, 2]], 1: [[]], 283: [[0], [2]], 807: [[]]}) >>> getMorganEnvironment(m,bi,minRad=1) defaultdict(<class 'list'>, {283: [[0], [2]], 227: [[1]], 709: [[0, 1, 2]]}) >>> list(fp.GetOnBits()) [1, 227, 283, 709, 807, 1057] >>> getMorganEnvironment(m,bi,minRad=1,fp=fp) defaultdict(<class 'list'>, {283: [[0], [2]], 227: [[1]], 709: [[0, 1, 2]]}) >>> list(fp.GetOnBits()) [227, 283, 709] """ bitPaths=defaultdict(list) for bit,info in bitInfo.items(): for atomID,radius in info: if radius < minRad: if fp != None: fp[bit]=0 continue env = Chem.FindAtomEnvironmentOfRadiusN(mol,radius,atomID) bitPaths[bit].append(list(env)) return bitPaths
Example #19
Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def writePropsToSmiles(mol,smi,order): """ >>> writePropsToSmiles(Chem.MolFromSmiles('Cc1ncccc1'),'[cH]:[n]:[c]-[CH3]',(3,2,1,0)) '[cH;R;D2]:[n;R;D2]:[c;R;D3]-[CH3;R0;D1]' """ finalsmi = copy.deepcopy(smi) for i,a in enumerate(order,1): atom = mol.GetAtomWithIdx(a) if not atom.GetAtomicNum(): continue finalsmi = _includeRingMembership(finalsmi, i, noRingAtom = not atom.IsInRing()) finalsmi = _includeDegree(finalsmi, i, atom.GetDegree()) return finalsmi
Example #20
Source File: jtprop_vae.py From icml18-jtnn with MIT License | 5 votes |
def decode(self, tree_vec, mol_vec, prob_decode): pred_root,pred_nodes = self.decoder.decode(tree_vec, prob_decode) #Mark nid & is_leaf & atommap for i,node in enumerate(pred_nodes): node.nid = i + 1 node.is_leaf = (len(node.neighbors) == 1) if len(node.neighbors) > 1: set_atommap(node.mol, node.nid) tree_mess = self.jtnn([pred_root])[0] cur_mol = copy_edit_mol(pred_root.mol) global_amap = [{}] + [{} for node in pred_nodes] global_amap[1] = {atom.GetIdx():atom.GetIdx() for atom in cur_mol.GetAtoms()} cur_mol = self.dfs_assemble(tree_mess, mol_vec, pred_nodes, cur_mol, global_amap, [], pred_root, None, prob_decode) if cur_mol is None: return None cur_mol = cur_mol.GetMol() set_atommap(cur_mol) cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol)) if cur_mol is None: return None smiles2D = Chem.MolToSmiles(cur_mol) stereo_cands = decode_stereo(smiles2D) if len(stereo_cands) == 1: return stereo_cands[0] stereo_vecs = self.mpn(mol2graph(stereo_cands)) stereo_vecs = self.G_mean(stereo_vecs) scores = nn.CosineSimilarity()(stereo_vecs, mol_vec) _,max_id = scores.max(dim=0) return stereo_cands[max_id.data[0]]
Example #21
Source File: mol_tree.py From icml18-jtnn with MIT License | 5 votes |
def get_slots(smiles): mol = Chem.MolFromSmiles(smiles) return [(atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in mol.GetAtoms()]
Example #22
Source File: drawFPBits.py From CheTo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _drawBricsFrag(smi,molSize=(150,150),kekulize=True,baseRad=0.05,svg=True,**kwargs): # delete smarts specific syntax from the pattern smi = re.sub(r"\;R\d?\;D\d+", "", smi) mol = Chem.MolFromSmiles(smi, sanitize=True) mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=kekulize) # Drawing drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1]) if not svg: drawer = rdMolDraw2D.MolDraw2DCairo(molSize[0],molSize[1]) drawer.DrawMolecule(mc,**kwargs) drawer.FinishDrawing() return drawer.GetDrawingText()
Example #23
Source File: chemutils.py From dgl with Apache License 2.0 | 5 votes |
def get_clique_mol(mol, atoms): smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True) new_mol = Chem.MolFromSmiles(smiles, sanitize=False) new_mol = copy_edit_mol(new_mol).GetMol() new_mol = sanitize(new_mol) #We assume this is not None return new_mol
Example #24
Source File: chemutils.py From dgl with Apache License 2.0 | 5 votes |
def copy_edit_mol(mol): new_mol = Chem.RWMol(Chem.MolFromSmiles('')) for atom in mol.GetAtoms(): new_atom = copy_atom(atom) new_mol.AddAtom(new_atom) for bond in mol.GetBonds(): a1 = bond.GetBeginAtom().GetIdx() a2 = bond.GetEndAtom().GetIdx() bt = bond.GetBondType() new_mol.AddBond(a1, a2, bt) return new_mol
Example #25
Source File: chemutils.py From dgl with Apache License 2.0 | 5 votes |
def get_mol(smiles): mol = Chem.MolFromSmiles(smiles) if mol is None: return None Chem.Kekulize(mol) return mol
Example #26
Source File: mol_tree.py From dgl with Apache License 2.0 | 5 votes |
def get_slots(smiles): mol = Chem.MolFromSmiles(smiles) return [(atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in mol.GetAtoms()]
Example #27
Source File: mol_tree_nx.py From dgl with Apache License 2.0 | 5 votes |
def _recover_node(self, i, original_mol): node = self.nodes_dict[i] clique = [] clique.extend(node['clique']) if not node['is_leaf']: for cidx in node['clique']: original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(node['nid']) for j in self.successors(i).numpy(): nei_node = self.nodes_dict[j] clique.extend(nei_node['clique']) if nei_node['is_leaf']: # Leaf node, no need to mark continue for cidx in nei_node['clique']: # allow singleton node override the atom mapping if cidx not in node['clique'] or len(nei_node['clique']) == 1: atom = original_mol.GetAtomWithIdx(cidx) atom.SetAtomMapNum(nei_node['nid']) clique = list(set(clique)) label_mol = get_clique_mol(original_mol, clique) node['label'] = Chem.MolToSmiles(Chem.MolFromSmiles(get_smiles(label_mol))) node['label_mol'] = get_mol(node['label']) for cidx in clique: original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(0) return node['label']
Example #28
Source File: dataset_utils.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License | 5 votes |
def sort_dataset(in_path, out_path): """ Sorts the dataset of smiles from input path by molecular complexity as proxied by the BertzCT index, and outputs the new sorted dataset :param in_path: :param out_path: :return: """ def _calc_bertz_ct(smiles): return GraphDescriptors.BertzCT(Chem.MolFromSmiles(smiles)) in_df = load_dataset(in_path) in_df['BertzCT'] = in_df.smiles.apply(_calc_bertz_ct) sorted_df = in_df.sort_values(by=['BertzCT']) sorted_df['smiles'].to_csv(out_path, index=False)
Example #29
Source File: canonicalization.py From ochem_predict_nn with MIT License | 5 votes |
def fix_smiles(self, old_smiles, removeMap = True): ''' For a given SMILES string, this function "fixes" common mistakes found in the Lowe parsed database: - N=c[nH] structures are turned into the normal [NH]-c[n] forms - iminols are turned into amides/carbamates It applies the reactions in self.rxns until the SMILES string doesn't change ''' mol = Chem.MolFromSmiles(old_smiles) if removeMap: [x.ClearProp('molAtomMapNumber') for x in mol.GetAtoms()] if not mol: return old_smiles new_smiles = Chem.MolToSmiles(mol, isomericSmiles = USE_STEREOCHEMISTRY) old_smiles = '' while new_smiles != old_smiles: old_smiles = new_smiles for rxn in self.rxns: outcomes = rxn.RunReactants((mol,)) if not outcomes: continue else: mol = outcomes[0][0] Chem.SanitizeMol(mol) new_smiles = Chem.MolToSmiles(mol, isomericSmiles = USE_STEREOCHEMISTRY) return new_smiles
Example #30
Source File: donkey.py From deepchem with MIT License | 5 votes |
def generate_scaffold(smiles, include_chirality=False): """Compute the Bemis-Murcko scaffold for a SMILES string.""" mol = Chem.MolFromSmiles(smiles) engine = ScaffoldGenerator(include_chirality=include_chirality) scaffold = engine.get_scaffold(mol) return scaffold