Python rdkit.Chem.MolToSmiles() Examples
The following are 30
code examples of rdkit.Chem.MolToSmiles().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem
, or try the search function
.
Example #1
Source File: dataset.py From hgraph2graph with MIT License | 7 votes |
def __getitem__(self, idx): mol = Chem.MolFromSmiles(self.batches[idx]) leaves = get_leaves(mol) smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] ) smiles_list = sorted(list(smiles_list)) #To ensure reproducibility safe_list = [] for s in smiles_list: hmol = MolGraph(s) ok = True for node,attr in hmol.mol_tree.nodes(data=True): if attr['label'] not in self.vocab.vmap: ok = False if ok: safe_list.append(s) if len(safe_list) > 0: return MolGraph.tensorize(safe_list, self.vocab, self.avocab) else: return None
Example #2
Source File: chemutils.py From icml18-jtnn with MIT License | 6 votes |
def decode_test(): wrong = 0 for tot,s in enumerate(sys.stdin): s = s.split()[0] tree = MolTree(s) tree.recover() cur_mol = copy_edit_mol(tree.nodes[0].mol) global_amap = [{}] + [{} for node in tree.nodes] global_amap[1] = {atom.GetIdx():atom.GetIdx() for atom in cur_mol.GetAtoms()} dfs_assemble(cur_mol, global_amap, [], tree.nodes[0], None) cur_mol = cur_mol.GetMol() cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol)) set_atommap(cur_mol) dec_smiles = Chem.MolToSmiles(cur_mol) gold_smiles = Chem.MolToSmiles(Chem.MolFromSmiles(s)) if gold_smiles != dec_smiles: print gold_smiles, dec_smiles wrong += 1 print wrong, tot + 1
Example #3
Source File: rdkit_grid_featurizer.py From deepchem with MIT License | 6 votes |
def compute_all_ecfp(mol, indices=None, degree=2): """Obtain molecular fragment for all atoms emanating outward to given degree. For each fragment, compute SMILES string (for now) and hash to an int. Return a dictionary mapping atom index to hashed SMILES. """ ecfp_dict = {} from rdkit import Chem for i in range(mol.GetNumAtoms()): if indices is not None and i not in indices: continue env = Chem.FindAtomEnvironmentOfRadiusN(mol, degree, i, useHs=True) submol = Chem.PathToSubmol(mol, env) smile = Chem.MolToSmiles(submol) ecfp_dict[i] = "%s,%s" % (mol.GetAtoms()[i].GetAtomicNum(), smile) return ecfp_dict
Example #4
Source File: goal_directed_generation.py From guacamol_baselines with MIT License | 6 votes |
def next_state(self): smiles = self.smiles # TODO: this seems dodgy... for i in range(100): mol = add_atom(self.mol, self.stats) smiles = Chem.MolToSmiles(mol) if smiles != self.smiles: break next_state = State(scoring_function=self.scoring_function, mol=mol, smiles=smiles, max_atoms=self.turn - 1, max_children=self.max_children, stats=self.stats, seed=self.seed) return next_state
Example #5
Source File: chemutils.py From hgraph2graph with MIT License | 6 votes |
def get_inter_label(mol, atoms, inter_atoms): new_mol = get_clique_mol(mol, atoms) if new_mol.GetNumBonds() == 0: inter_atom = list(inter_atoms)[0] for a in new_mol.GetAtoms(): a.SetAtomMapNum(0) return new_mol, [ (inter_atom, Chem.MolToSmiles(new_mol)) ] inter_label = [] for a in new_mol.GetAtoms(): idx = idxfunc(a) if idx in inter_atoms and is_anchor(a, inter_atoms): inter_label.append( (idx, get_anchor_smiles(new_mol, idx)) ) for a in new_mol.GetAtoms(): a.SetAtomMapNum( 1 if idxfunc(a) in inter_atoms else 0 ) return new_mol, inter_label
Example #6
Source File: normalize.py From MolVS with MIT License | 6 votes |
def _apply_transform(self, mol, rule): """Repeatedly apply normalization transform to molecule until no changes occur. It is possible for multiple products to be produced when a rule is applied. The rule is applied repeatedly to each of the products, until no further changes occur or after 20 attempts. If there are multiple unique products after the final application, the first product (sorted alphabetically by SMILES) is chosen. """ mols = [mol] for n in six.moves.range(20): products = {} for mol in mols: for product in [x[0] for x in rule.RunReactants((mol,))]: if Chem.SanitizeMol(product, catchErrors=True) == 0: products[Chem.MolToSmiles(product, isomericSmiles=True)] = product if products: mols = [products[s] for s in sorted(products)] else: # If n == 0, the rule was not applicable and we return None return mols[0] if n > 0 else None
Example #7
Source File: dataset.py From hgraph2graph with MIT License | 6 votes |
def __getitem__(self, idx): mol = Chem.MolFromSmiles(self.batches[idx]) leaves = get_leaves(mol) smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] ) smiles_list = sorted(list(smiles_list)) #To ensure reproducibility safe_list = [] for s in smiles_list: hmol = MolGraph(s) ok = True for node,attr in hmol.mol_tree.nodes(data=True): if attr['label'] not in self.vocab.vmap: ok = False if ok: safe_list.append(s) if len(safe_list) > 0: return MolGraph.tensorize(safe_list, self.vocab, self.avocab) else: return None
Example #8
Source File: align.py From hgraph2graph with MIT License | 6 votes |
def align(xy_tuple): x,y = xy_tuple xmol, ymol = Chem.MolFromSmiles(x), Chem.MolFromSmiles(y) x = Chem.MolToSmiles(xmol, isomericSmiles=False) xmol = Chem.MolFromSmiles(x) xleaf = get_leaves(xmol) yleaf = get_leaves(ymol) best_i,best_j = 0,0 best = 1000000 for i in xleaf: for j in yleaf: new_x = Chem.MolToSmiles(xmol, rootedAtAtom=i, isomericSmiles=False) new_y = Chem.MolToSmiles(ymol, rootedAtAtom=j, isomericSmiles=False) le = min(len(new_x), len(new_y)) // 2 dist = Levenshtein.distance(new_x[:le], new_y[:le]) if dist < best: best_i, best_j = i, j best = dist return Chem.MolToSmiles(xmol, rootedAtAtom=best_i, isomericSmiles=False), Chem.MolToSmiles(ymol, rootedAtAtom=best_j, isomericSmiles=False)
Example #9
Source File: smiles_featurizers.py From deepchem with MIT License | 6 votes |
def _featurize(self, mol): """Featurizes a SMILES sequence.""" from rdkit import Chem smile = Chem.MolToSmiles(mol) if len(smile) > self.max_len: return list() smile_list = list(smile) # Extend shorter strings with padding if len(smile) < self.max_len: smile_list.extend([PAD_TOKEN] * (self.max_len - len(smile))) # Padding before and after smile_list += [PAD_TOKEN] * self.pad_len smile_list = [PAD_TOKEN] * self.pad_len + smile_list smile_seq = self.to_seq(smile_list) return smile_seq
Example #10
Source File: one_hot.py From deepchem with MIT License | 6 votes |
def featurize(self, mols, verbose=True, log_every_n=1000): """ Parameters ---------- mols: obj List of rdkit Molecule Objects verbose: bool How much logging log_every_n: How often to log Returns ------- obj numpy array of features """ from rdkit import Chem smiles = [Chem.MolToSmiles(mol) for mol in mols] if self.charset is None: self.charset = self._create_charset(smiles) return np.array([self.one_hot_encoded(smile) for smile in smiles])
Example #11
Source File: lowe_interactive_predict.py From ochem_predict_nn with MIT License | 6 votes |
def score_candidates(reactants, candidate_list, xs): pred = model.predict(xs, batch_size = 20)[0] rank = ss.rankdata(pred) fname = raw_input('Enter file name to save to: ') + '.dat' with open(os.path.join(FROOT, fname), 'w') as fid: fid.write('FOR REACTANTS {}\n'.format(Chem.MolToSmiles(reactants))) fid.write('Candidate product\tCandidate edit\tProbability\tRank\n') for (c, candidate) in enumerate(candidate_list): candidate_smile = candidate[0] candidate_edit = candidate[1] fid.write('{}\t{}\t{}\t{}\n'.format( candidate_smile, candidate_edit, pred[c], 1 + len(pred) - rank[c] )) print('Wrote to file {}'.format(os.path.join(FROOT, fname)))
Example #12
Source File: clean_uspto.py From GLN with MIT License | 6 votes |
def get_rxn_smiles(prod, reactants): prod_smi = Chem.MolToSmiles(prod, True) # Get rid of reactants when they don't contribute to this prod prod_maps = set(re.findall('\:([[0-9]+)\]', prod_smi)) reactants_smi_list = [] for mol in reactants: if mol is None: continue used = False for a in mol.GetAtoms(): if a.HasProp('molAtomMapNumber'): if a.GetProp('molAtomMapNumber') in prod_maps: used = True else: a.ClearProp('molAtomMapNumber') if used: reactants_smi_list.append(Chem.MolToSmiles(mol, True)) reactants_smi = '.'.join(reactants_smi_list) return '{}>>{}'.format(reactants_smi, prod_smi)
Example #13
Source File: chem.py From reinvent-randomized with MIT License | 6 votes |
def randomize_smiles(mol, random_type="restricted"): """ Returns a random SMILES given a SMILES of a molecule. :param mol: A Mol object :param random_type: The type (unrestricted, restricted) of randomization performed. :return : A random SMILES string of the same molecule or None if the molecule is invalid. """ if not mol: return None if random_type == "unrestricted": return rkc.MolToSmiles(mol, canonical=False, doRandom=True, isomericSmiles=False) if random_type == "restricted": new_atom_order = list(range(mol.GetNumAtoms())) random.shuffle(new_atom_order) random_mol = rkc.RenumberAtoms(mol, newOrder=new_atom_order) return rkc.MolToSmiles(random_mol, canonical=False, isomericSmiles=False) raise ValueError("Type '{}' is not valid".format(random_type))
Example #14
Source File: chemutils.py From dgl with Apache License 2.0 | 6 votes |
def decode_stereo(smiles2D): mol = Chem.MolFromSmiles(smiles2D) dec_isomers = list(EnumerateStereoisomers(mol)) dec_isomers = [Chem.MolFromSmiles(Chem.MolToSmiles(mol, isomericSmiles=True)) for mol in dec_isomers] smiles3D = [Chem.MolToSmiles(mol, isomericSmiles=True) for mol in dec_isomers] chiralN = [atom.GetIdx() for atom in dec_isomers[0].GetAtoms() if int(atom.GetChiralTag()) > 0 and atom.GetSymbol() == "N"] if len(chiralN) > 0: for mol in dec_isomers: for idx in chiralN: mol.GetAtomWithIdx(idx).SetChiralTag(Chem.rdchem.ChiralType.CHI_UNSPECIFIED) smiles3D.append(Chem.MolToSmiles(mol, isomericSmiles=True)) return smiles3D
Example #15
Source File: save.py From PADME with MIT License | 6 votes |
def load_sdf_files(input_files, clean_mols): """Load SDF file into dataframe.""" dataframes = [] for input_file in input_files: # Tasks are stored in .sdf.csv file raw_df = next(load_csv_files([input_file + ".csv"], shard_size=None)) # Structures are stored in .sdf file print("Reading structures from %s." % input_file) suppl = Chem.SDMolSupplier(str(input_file), clean_mols, False, False) df_rows = [] for ind, mol in enumerate(suppl): if mol is not None: smiles = Chem.MolToSmiles(mol) df_rows.append([ind, smiles, mol]) mol_df = pd.DataFrame(df_rows, columns=('mol_id', 'smiles', 'mol')) dataframes.append(pd.concat([mol_df, raw_df], axis=1, join='inner')) return dataframes
Example #16
Source File: crossover.py From guacamol_baselines with MIT License | 5 votes |
def crossover(parent_A, parent_B): parent_smiles = [Chem.MolToSmiles(parent_A), Chem.MolToSmiles(parent_B)] try: Chem.Kekulize(parent_A, clearAromaticFlags=True) Chem.Kekulize(parent_B, clearAromaticFlags=True) except ValueError: pass for i in range(10): if random.random() <= 0.5: # print 'non-ring crossover' new_mol = crossover_non_ring(parent_A, parent_B) if new_mol is not None: new_smiles = Chem.MolToSmiles(new_mol) if new_smiles is not None and new_smiles not in parent_smiles: return new_mol else: # print 'ring crossover' new_mol = crossover_ring(parent_A, parent_B) if new_mol is not None: new_smiles = Chem.MolToSmiles(new_mol) if new_smiles is not None and new_smiles not in parent_smiles: return new_mol return None
Example #17
Source File: chem.py From reinvent-randomized with MIT License | 5 votes |
def to_smiles(mol): """ Converts a Mol object into a canonical SMILES string. :param mol: Mol object. :return: A SMILES string. """ return rkc.MolToSmiles(mol, isomericSmiles=False)
Example #18
Source File: struct_utils.py From AMPL with MIT License | 5 votes |
def base_smiles_from_inchi(inchi_str, useIsomericSmiles=True, removeCharges=False, workers=1): """ Generate a standardized SMILES string for the largest fragment of the molecule specified by InChi string inchi_str. Replace any rare isotopes with the most common ones for each element. If removeCharges is True, add hydrogens as needed to eliminate charges. If useIsomericSmiles is True (the default), retain stereochemistry info in the generated SMILES string. Note that inchi_str may be a list, in which case a list of SMILES strings is generated. If workers > 1 and inchi_str is a list, the calculations are parallelized over the given number of worker threads. """ if isinstance(inchi_str,list): from functools import partial func = partial(base_smiles_from_inchi, useIsomericSmiles=useIsomericSmiles, removeCharges=removeCharges) if workers > 1: from multiprocessing import pool batchsize = 200 batches = [inchi_str[i:i+batchsize] for i in range(0, len(inchi_str), batchsize)] with pool.Pool(workers) as p: base_smiles = p.map(func,batches) base_smiles = [y for x in base_smiles for y in x] #Flatten results else: base_smiles = [func(inchi) for inchi in inchi_str] else: # Actual standardization code, everything above here is for multiprocessing and list parsing std_mol = base_mol_from_inchi(inchi_str, useIsomericSmiles, removeCharges) if std_mol is None: base_smiles = "" else: base_smiles = Chem.MolToSmiles(std_mol, isomericSmiles=useIsomericSmiles) return base_smiles
Example #19
Source File: resonance.py From MolVS with MIT License | 5 votes |
def enumerate_resonance_smiles(smiles): """Return a set of resonance forms as SMILES strings, given a SMILES string. :param smiles: A SMILES string. :returns: A set containing SMILES strings for every possible resonance form. :rtype: set of strings. """ mol = Chem.MolFromSmiles(smiles) #Chem.SanitizeMol(mol) # MolFromSmiles does Sanitize by default mesomers = ResonanceEnumerator().enumerate(mol) return {Chem.MolToSmiles(m, isomericSmiles=True) for m in mesomers}
Example #20
Source File: struct_utils.py From AMPL with MIT License | 5 votes |
def base_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=False, workers=1): """ Generate a standardized SMILES string for the largest fragment of the molecule specified by orig_smiles. Replace any rare isotopes with the most common ones for each element. If removeCharges is True, add hydrogens as needed to eliminate charges. """ if isinstance(orig_smiles,list): from functools import partial func = partial(base_smiles_from_smiles,useIsomericSmiles=useIsomericSmiles,removeCharges=removeCharges) if workers > 1: from multiprocessing import pool batchsize = 200 batches = [orig_smiles[i:i+batchsize] for i in range(0, len(orig_smiles), batchsize)] with pool.Pool(workers) as p: base_smiles = p.map(func,batches) base_smiles = [y for x in base_smiles for y in x] #Flatten results else: base_smiles = [func(smi) for smi in orig_smiles] else: # Actual standardization code, everything above here is for multiprocessing and list parsing std_mol = base_mol_from_smiles(orig_smiles, useIsomericSmiles, removeCharges) if std_mol is None: base_smiles = "" else: base_smiles = Chem.MolToSmiles(std_mol, isomericSmiles=useIsomericSmiles) return base_smiles
Example #21
Source File: struct_utils.py From AMPL with MIT License | 5 votes |
def get_rdkit_smiles(orig_smiles, useIsomericSmiles=True): """ Given a SMILES string, regenerate a "canonical" SMILES string for the same molecule using the implementation in RDKit. If useIsomericSmiles is false, stereochemistry information will be removed in the generated string. """ mol = Chem.MolFromSmiles(orig_smiles) if mol is None: return "" else: return Chem.MolToSmiles(mol, isomericSmiles=useIsomericSmiles)
Example #22
Source File: cli.py From MolVS with MIT License | 5 votes |
def _write_mol(mol, args): if args.outtype in {'smi', 'smiles'} or args.outfile.name.endswith('smi') or args.outfile.name.endswith('smiles'): args.outfile.write(Chem.MolToSmiles(mol)) args.outfile.write('\n') elif args.outtype in {'mol', 'sdf'} or args.outfile.name.endswith('mol') or args.outfile.name.endswith('sdf'): args.outfile.write(Chem.MolToMolBlock(mol)) else: args.outfile.write(Chem.MolToSmiles(mol)) args.outfile.write('\n')
Example #23
Source File: sascorer.py From icml18-jtnn with MIT License | 5 votes |
def processMols(mols): print('smiles\tName\tsa_score') for i,m in enumerate(mols): if m is None: continue s = calculateScore(m) smiles = Chem.MolToSmiles(m) print(smiles+"\t"+m.GetProp('_Name') + "\t%3f"%s)
Example #24
Source File: chemutils.py From icml18-jtnn with MIT License | 5 votes |
def decode_stereo(smiles2D): mol = Chem.MolFromSmiles(smiles2D) dec_isomers = list(EnumerateStereoisomers(mol)) dec_isomers = [Chem.MolFromSmiles(Chem.MolToSmiles(mol, isomericSmiles=True)) for mol in dec_isomers] smiles3D = [Chem.MolToSmiles(mol, isomericSmiles=True) for mol in dec_isomers] chiralN = [atom.GetIdx() for atom in dec_isomers[0].GetAtoms() if int(atom.GetChiralTag()) > 0 and atom.GetSymbol() == "N"] if len(chiralN) > 0: for mol in dec_isomers: for idx in chiralN: mol.GetAtomWithIdx(idx).SetChiralTag(Chem.rdchem.ChiralType.CHI_UNSPECIFIED) smiles3D.append(Chem.MolToSmiles(mol, isomericSmiles=True)) return smiles3D
Example #25
Source File: chemutils.py From icml18-jtnn with MIT License | 5 votes |
def get_smiles(mol): return Chem.MolToSmiles(mol, kekuleSmiles=True)
Example #26
Source File: jtnn_vae.py From icml18-jtnn with MIT License | 5 votes |
def decode(self, x_tree_vecs, x_mol_vecs, prob_decode): #currently do not support batch decoding assert x_tree_vecs.size(0) == 1 and x_mol_vecs.size(0) == 1 pred_root,pred_nodes = self.decoder.decode(x_tree_vecs, prob_decode) if len(pred_nodes) == 0: return None elif len(pred_nodes) == 1: return pred_root.smiles #Mark nid & is_leaf & atommap for i,node in enumerate(pred_nodes): node.nid = i + 1 node.is_leaf = (len(node.neighbors) == 1) if len(node.neighbors) > 1: set_atommap(node.mol, node.nid) scope = [(0, len(pred_nodes))] jtenc_holder,mess_dict = JTNNEncoder.tensorize_nodes(pred_nodes, scope) _,tree_mess = self.jtnn(*jtenc_holder) tree_mess = (tree_mess, mess_dict) #Important: tree_mess is a matrix, mess_dict is a python dict x_mol_vecs = self.A_assm(x_mol_vecs).squeeze() #bilinear cur_mol = copy_edit_mol(pred_root.mol) global_amap = [{}] + [{} for node in pred_nodes] global_amap[1] = {atom.GetIdx():atom.GetIdx() for atom in cur_mol.GetAtoms()} cur_mol,_ = self.dfs_assemble(tree_mess, x_mol_vecs, pred_nodes, cur_mol, global_amap, [], pred_root, None, prob_decode, check_aroma=True) if cur_mol is None: cur_mol = copy_edit_mol(pred_root.mol) global_amap = [{}] + [{} for node in pred_nodes] global_amap[1] = {atom.GetIdx():atom.GetIdx() for atom in cur_mol.GetAtoms()} cur_mol,pre_mol = self.dfs_assemble(tree_mess, x_mol_vecs, pred_nodes, cur_mol, global_amap, [], pred_root, None, prob_decode, check_aroma=False) if cur_mol is None: cur_mol = pre_mol if cur_mol is None: return None cur_mol = cur_mol.GetMol() set_atommap(cur_mol) cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol)) return Chem.MolToSmiles(cur_mol) if cur_mol is not None else None
Example #27
Source File: mol_tree.py From icml18-jtnn with MIT License | 5 votes |
def __init__(self, smiles): self.smiles = smiles self.mol = get_mol(smiles) #Stereo Generation (currently disabled) #mol = Chem.MolFromSmiles(smiles) #self.smiles3D = Chem.MolToSmiles(mol, isomericSmiles=True) #self.smiles2D = Chem.MolToSmiles(mol) #self.stereo_cands = decode_stereo(self.smiles2D) cliques, edges = tree_decomp(self.mol) self.nodes = [] root = 0 for i,c in enumerate(cliques): cmol = get_clique_mol(self.mol, c) node = MolTreeNode(get_smiles(cmol), c) self.nodes.append(node) if min(c) == 0: root = i for x,y in edges: self.nodes[x].add_neighbor(self.nodes[y]) self.nodes[y].add_neighbor(self.nodes[x]) if root > 0: self.nodes[0],self.nodes[root] = self.nodes[root],self.nodes[0] for i,node in enumerate(self.nodes): node.nid = i + 1 if len(node.neighbors) > 1: #Leaf node mol is not marked set_atommap(node.mol, node.nid) node.is_leaf = (len(node.neighbors) == 1)
Example #28
Source File: sascorer.py From icml18-jtnn with MIT License | 5 votes |
def processMols(mols): print('smiles\tName\tsa_score') for i,m in enumerate(mols): if m is None: continue s = calculateScore(m) smiles = Chem.MolToSmiles(m) print(smiles+"\t"+m.GetProp('_Name') + "\t%3f"%s)
Example #29
Source File: goal_directed_generation.py From guacamol_baselines with MIT License | 5 votes |
def score_mol(mol, score_fn): return score_fn(Chem.MolToSmiles(mol))
Example #30
Source File: vectorizers.py From Deep-Drug-Coder with MIT License | 5 votes |
def randomize_smiles(self, smiles): """Perform a randomization of a SMILES string must be RDKit sanitizable""" mol = Chem.MolFromSmiles(smiles) nmol = self.randomize_mol(mol) return Chem.MolToSmiles(nmol, canonical=self.canonical, isomericSmiles=self.isomericSmiles)