Python Examples of rdkit.Chem.MolFromSmiles

Source File: properties.py From hgraph2graph with MIT License

8 votes

def similarity(a, b):
    if a is None or b is None: 
        return 0.0
    amol = Chem.MolFromSmiles(a)
    bmol = Chem.MolFromSmiles(b)
    if amol is None or bmol is None:
        return 0.0

    fp1 = AllChem.GetMorganFingerprintAsBitVect(amol, 2, nBits=2048, useChirality=False)
    fp2 = AllChem.GetMorganFingerprintAsBitVect(bmol, 2, nBits=2048, useChirality=False)
    return DataStructs.TanimotoSimilarity(fp1, fp2)

Source File: dataset.py From hgraph2graph with MIT License

7 votes

def __getitem__(self, idx):
        mol = Chem.MolFromSmiles(self.batches[idx])
        leaves = get_leaves(mol)
        smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] )
        smiles_list = sorted(list(smiles_list)) #To ensure reproducibility

        safe_list = []
        for s in smiles_list:
            hmol = MolGraph(s)
            ok = True
            for node,attr in hmol.mol_tree.nodes(data=True):
                if attr['label'] not in self.vocab.vmap:
                    ok = False
            if ok: safe_list.append(s)
        
        if len(safe_list) > 0:
            return MolGraph.tensorize(safe_list, self.vocab, self.avocab)
        else:
            return None

Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License

6 votes

def generateAtomInvariant(mol):
    """

    >>> generateAtomInvariant(Chem.MolFromSmiles("Cc1ncccc1"))
    [341294046, 3184205312, 522345510, 1545984525, 1545984525, 1545984525, 1545984525]

    """
    num_atoms = mol.GetNumAtoms()
    invariants = [0]*num_atoms
    for i,a in enumerate(mol.GetAtoms()):
        descriptors=[]
        descriptors.append(a.GetAtomicNum())
        descriptors.append(a.GetTotalDegree())
        descriptors.append(a.GetTotalNumHs())
        descriptors.append(a.IsInRing())
        descriptors.append(a.GetIsAromatic())
        invariants[i]=hash(tuple(descriptors))& 0xffffffff
    return invariants


#------------------------------------
#
#  doctest boilerplate
#

Source File: test_rdkit_grid_features.py From deepchem with MIT License

6 votes

def setUp(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))

    # simple flat ring
    from rdkit.Chem import MolFromSmiles
    self.cycle4 = MolFromSmiles('C1CCC1')
    self.cycle4.Compute2DCoords()

    # load and sanitize two real molecules
    _, self.prot = rgf.load_molecule(
        os.path.join(current_dir, '3ws9_protein_fixer_rdkit.pdb'),
        add_hydrogens=False,
        calc_charges=False,
        sanitize=True)

    _, self.lig = rgf.load_molecule(
        os.path.join(current_dir, '3ws9_ligand.sdf'),
        add_hydrogens=False,
        calc_charges=False,
        sanitize=True)

Source File: chemutils.py From hgraph2graph with MIT License

6 votes

def copy_edit_mol(mol):
    new_mol = Chem.RWMol(Chem.MolFromSmiles(''))
    for atom in mol.GetAtoms():
        new_atom = copy_atom(atom)
        new_mol.AddAtom(new_atom)

    for bond in mol.GetBonds():
        a1 = bond.GetBeginAtom().GetIdx()
        a2 = bond.GetEndAtom().GetIdx()
        bt = bond.GetBondType()
        new_mol.AddBond(a1, a2, bt)
        #if bt == Chem.rdchem.BondType.AROMATIC and not aromatic:
        #    bt = Chem.rdchem.BondType.SINGLE
    return new_mol

Source File: chemTopicModel.py From CheTo with BSD 3-Clause "New" or "Revised" License

6 votes

def _generateMolFrags(datachunk, vocabulary, fragmentMethod, fragIdx=None):
    if fragIdx is None and fragmentMethod == 'Brics':
        return
    result={}
    for idx, smi in datachunk:
        mol = Chem.MolFromSmiles(str(smi))
        if mol == None:
            continue
        fp,_=_generateFPs(mol,fragmentMethod=fragmentMethod)
        if fp is None:
            continue
        tmp={}
        for k,v in fp.items():
            if k not in vocabulary:
                continue
            # save memory: for BRICS use index instead of long complicated SMILES
            if fragmentMethod == 'Brics':
                tmp[fragIdx[k]]=v
            else:
                tmp[k]=v
        result[idx]=tmp
    return result

########### chemical topic modeling class ###################

Source File: dataset_utils.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License

6 votes

def __getitem__(self, item):
    """
    Returns an rdkit mol object
    :param item:
    :return:
    """
    smiles = self.df['smiles'][item]
    mol = Chem.MolFromSmiles(smiles)
    return mol

# # TESTS
# path = 'gdb13.rand1M.smi.gz'
# dataset = gdb_dataset(path)
#
# print(len(dataset))
# mol,_ = dataset[0]
# graph = mol_to_nx(mol)
# graph_sub = graph.subgraph([0,3,5,7,9])
# graph_sub_new = nx.convert_node_labels_to_integers(graph_sub,label_attribute='old')
# graph_sub_node = graph_sub.nodes()
# graph_sub_new_node = graph_sub_new.nodes()
# matrix = nx.adjacency_matrix(graph_sub)
# np_matrix = matrix.toarray()
# print(np_matrix)
# print('end')

Source File: align.py From hgraph2graph with MIT License

6 votes

def align(xy_tuple):
    x,y = xy_tuple
    xmol, ymol = Chem.MolFromSmiles(x), Chem.MolFromSmiles(y)
    x = Chem.MolToSmiles(xmol, isomericSmiles=False)
    xmol = Chem.MolFromSmiles(x)

    xleaf = get_leaves(xmol)
    yleaf = get_leaves(ymol)

    best_i,best_j = 0,0
    best = 1000000
    for i in xleaf:
        for j in yleaf:
            new_x = Chem.MolToSmiles(xmol, rootedAtAtom=i, isomericSmiles=False)
            new_y = Chem.MolToSmiles(ymol, rootedAtAtom=j, isomericSmiles=False)
            le = min(len(new_x), len(new_y)) // 2
            dist = Levenshtein.distance(new_x[:le], new_y[:le])
            if dist < best:
                best_i, best_j = i, j
                best = dist

    return Chem.MolToSmiles(xmol, rootedAtAtom=best_i, isomericSmiles=False), Chem.MolToSmiles(ymol, rootedAtAtom=best_j, isomericSmiles=False)

Source File: data_loader.py From PADME with MIT License

6 votes

def featurize_smiles_np(arr, featurizer, log_every_N=1000, verbose=True):
  """Featurize individual compounds in a numpy array.

  Given a featurizer that operates on individual chemical compounds
  or macromolecules, compute & add features for that compound to the
  features array
  """
  features = []
  for ind, elem in enumerate(arr.tolist()):
    mol = Chem.MolFromSmiles(elem)
    if mol:
      new_order = rdmolfiles.CanonicalRankAtoms(mol)
      mol = rdmolops.RenumberAtoms(mol, new_order)
    if ind % log_every_N == 0:
      log("Featurizing sample %d" % ind, verbose)
    features.append(featurizer.featurize([mol]))

  valid_inds = np.array(
      [1 if elt.size > 0 else 0 for elt in features], dtype=bool)
  features = [elt for (is_valid, elt) in zip(valid_inds, features) if is_valid]
  features = np.squeeze(np.array(features))
  return features.reshape(-1,)

Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License

6 votes

def getSubstructSmi(mol,env,propsToSmiles=True):
    """

    >>> getSubstructSmi(Chem.MolFromSmiles('Cc1ncccc1'),((0,1,2)))
    '[cH;R;D2]:[n;R;D2]:[c;R;D3]-[CH3;R0;D1]'

    """
    atomsToUse=set()
    if not len(env):
        return ''
    for b in env:
        atomsToUse.add(mol.GetBondWithIdx(b).GetBeginAtomIdx())
        atomsToUse.add(mol.GetBondWithIdx(b).GetEndAtomIdx())
    # no isomeric smiles since we don't include that in the fingerprints
    smi = Chem.MolFragmentToSmiles(mol,atomsToUse,isomericSmiles=False,
                                   bondsToUse=env,allHsExplicit=True, allBondsExplicit=True)
    if propsToSmiles:
        order = eval(mol.GetProp("_smilesAtomOutputOrder"))
        smi = writePropsToSmiles(mol,smi,order)
    return smi

Source File: drd2_scorer.py From hgraph2graph with MIT License

6 votes

def get_score(smile):
    if clf_model is None:
        load_model()

    mol = Chem.MolFromSmiles(smile)
    if mol:
        fp = fingerprints_from_mol(mol)
        score = clf_model.predict_proba(fp)[:, 1]
        return float(score)
    return 0.0

Source File: dataset.py From hgraph2graph with MIT License

6 votes

def __getitem__(self, idx):
        mol = Chem.MolFromSmiles(self.batches[idx])
        leaves = get_leaves(mol)
        smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] )
        smiles_list = sorted(list(smiles_list)) #To ensure reproducibility

        safe_list = []
        for s in smiles_list:
            hmol = MolGraph(s)
            ok = True
            for node,attr in hmol.mol_tree.nodes(data=True):
                if attr['label'] not in self.vocab.vmap:
                    ok = False
            if ok: safe_list.append(s)
        
        if len(safe_list) > 0:
            return MolGraph.tensorize(safe_list, self.vocab, self.avocab)
        else:
            return None

Source File: chemutils.py From hgraph2graph with MIT License

6 votes

def get_clique_mol(mol, atoms):
    smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True)
    new_mol = Chem.MolFromSmiles(smiles, sanitize=False)
    new_mol = copy_edit_mol(new_mol).GetMol()
    new_mol = sanitize(new_mol) 
    #if tmp_mol is not None: new_mol = tmp_mol
    return new_mol

Source File: test_tensorflowEncoders.py From deepchem with MIT License

6 votes

def test_fit(self):
    tf_enc = TensorflowMoleculeEncoder.zinc_encoder()

    smiles = [
        "Cn1cnc2c1c(=O)n(C)c(=O)n2C", "O=C(O)[C@@H]1/C(=C/CO)O[C@@H]2CC(=O)N21",
        "Cn1c2nncnc2c(=O)n(C)c1=O", "Cn1cnc2c1c(=O)[nH]c(=O)n2C",
        "NC(=O)c1ncc[nH]c1=O", "O=C1OCc2c1[nH]c(=O)[nH]c2=O",
        "Cn1c(N)c(N)c(=O)n(C)c1=O", "CNc1nc2c([nH]1)c(=O)[nH]c(=O)n2C",
        "CC(=O)N1CN(C(C)=O)[C@@H](O)[C@@H]1O",
        "CC(=O)N1CN(C(C)=O)[C@H](O)[C@H]1O", "Cc1[nH]c(=O)[nH]c(=O)c1CO",
        "O=C1NCCCc2c1no[n+]2[O-]", "Cc1nc(C(N)=O)c(N)n1CCO",
        "O=c1[nH]cc(N2CCOCC2)c(=O)[nH]1"
    ]

    featurizer = dc.feat.one_hot.OneHotFeaturizer(zinc_charset, 120)
    mols = [Chem.MolFromSmiles(x) for x in smiles]
    features = featurizer.featurize(mols)

    dataset = DiskDataset.from_numpy(features, features)
    prediction = tf_enc.predict_on_batch(dataset.X)
    tf_de = TensorflowMoleculeDecoder.zinc_decoder()
    one_hot_decoded = tf_de.predict_on_batch(prediction)
    decoded_smiles = featurizer.untransform(one_hot_decoded)
    assert len(decoded_smiles) == len(smiles)

Source File: mpnn.py From deepchem with MIT License

6 votes

def construct_multigraph(smile):
  g = OrderedDict({})
  h = OrderedDict({})

  molecule = Chem.MolFromSmiles(smile)
  for i in xrange(0, molecule.GetNumAtoms()):
    atom_i = molecule.GetAtomWithIdx(i)
    h[i] = Variable(torch.FloatTensor(dc.feat.graph_features.atom_features(atom_i))).view(1, 75)
    for j in xrange(0, molecule.GetNumAtoms()):
      e_ij = molecule.GetBondBetweenAtoms(i, j)
      if e_ij != None:
        e_ij =  map(lambda x: 1 if x == True else 0, dc.feat.graph_features.bond_features(e_ij)) # ADDED edge feat
        e_ij = Variable(torch.FloatTensor(e_ij).view(1, 6))
        atom_j = molecule.GetAtomWithIdx(j)
        if i not in g:
          g[i] = []
        g[i].append( (e_ij, j) )

  return g, h

Source File: eval_mol2vec_results.py From deepchem with MIT License

6 votes

def main() :
    model = models.KeyedVectors.load_word2vec_format("vec.txt")
    embeddings = list()

    # Using canonical smiles for glycine, as in original research paper
    mol = Chem.MolFromSmiles("C(C(=O)O)N")
    try:
        info = {}
        rdMolDescriptors.GetMorganFingerprint(mol, 0, bitInfo=info)
        keys = info.keys()
        keys_list = list(keys)
        totalvec = np.zeros(200)
        for k in keys_list:
            wordvec = model.wv[str(k)]
            totalvec = np.add(totalvec, wordvec)
        embeddings.append(totalvec)
    except Exception as e:
        print(e)
        pass

    print(embeddings[0])

Source File: data_loader.py From PADME with MIT License

5 votes

def featurize_smiles_df(df, featurizer, field, log_every_N=1000, verbose=True):
  """Featurize individual compounds in dataframe.

  Given a featurizer that operates on individual chemical compounds 
  or macromolecules, compute & add features for that compound to the 
  features dataframe
  """
  sample_elems = df[field].tolist()

  features = []
  stderr_fileno = sys.stderr.fileno()
  stderr_save = os.dup(stderr_fileno)
  stderr_fd = open('./logs/error.log', 'a')
  os.dup2(stderr_fd.fileno(), stderr_fileno)
  for ind, elem in enumerate(sample_elems):

    mol = Chem.MolFromSmiles(elem)    
    # TODO (ytz) this is a bandage solution to reorder the atoms so
    # that they're always in the same canonical order. Presumably this
    # should be correctly implemented in the future for graph mols.
    if mol:
      new_order = rdmolfiles.CanonicalRankAtoms(mol)
      mol = rdmolops.RenumberAtoms(mol, new_order)
    if ind % log_every_N == 0:
      log("Featurizing sample %d" % ind, verbose)
    features.append(featurizer.featurize([mol], smiles=elem))

  stderr_fd.close()
  os.dup2(stderr_save, stderr_fileno)
  
  valid_inds = np.array(
      [1 if elt.size > 0 else 0 for elt in features], dtype=bool)
  features = [elt for (is_valid, elt) in zip(valid_inds, features) if is_valid]
  
  #return np.squeeze(np.array(features), axis=1), valid_inds
  return np.array(features), valid_inds

Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License

5 votes

def getMorganEnvironment(mol, bitInfo, fp=None, minRad=0):
    """

    >>> m = Chem.MolFromSmiles('CC(O)C')
    >>> bi = {}
    >>> fp = AllChem.GetMorganFingerprintAsBitVect(m,2,2048,bitInfo=bi)
    >>> getMorganEnvironment(m,bi)
    defaultdict(<class 'list'>, {1057: [[], []], 227: [[1]], 709: [[0, 1, 2]], 1: [[]], 283: [[0], [2]], 807: [[]]})
    >>> getMorganEnvironment(m,bi,minRad=1)
    defaultdict(<class 'list'>, {283: [[0], [2]], 227: [[1]], 709: [[0, 1, 2]]})
    >>> list(fp.GetOnBits())
    [1, 227, 283, 709, 807, 1057]
    >>> getMorganEnvironment(m,bi,minRad=1,fp=fp)
    defaultdict(<class 'list'>, {283: [[0], [2]], 227: [[1]], 709: [[0, 1, 2]]})
    >>> list(fp.GetOnBits())
    [227, 283, 709]

    """
    bitPaths=defaultdict(list)
    for bit,info in bitInfo.items():
        for atomID,radius in info:
            if radius < minRad:
                if fp != None:
                    fp[bit]=0
                continue
            env = Chem.FindAtomEnvironmentOfRadiusN(mol,radius,atomID)
            bitPaths[bit].append(list(env))
    return bitPaths

Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License

5 votes

def writePropsToSmiles(mol,smi,order):
    """

    >>> writePropsToSmiles(Chem.MolFromSmiles('Cc1ncccc1'),'[cH]:[n]:[c]-[CH3]',(3,2,1,0))
    '[cH;R;D2]:[n;R;D2]:[c;R;D3]-[CH3;R0;D1]'

    """
    finalsmi = copy.deepcopy(smi)
    for i,a in enumerate(order,1):
        atom = mol.GetAtomWithIdx(a)
        if not atom.GetAtomicNum():
            continue
        finalsmi = _includeRingMembership(finalsmi, i, noRingAtom = not atom.IsInRing())
        finalsmi = _includeDegree(finalsmi, i, atom.GetDegree())
    return finalsmi

Source File: jtprop_vae.py From icml18-jtnn with MIT License

5 votes

def decode(self, tree_vec, mol_vec, prob_decode):
        pred_root,pred_nodes = self.decoder.decode(tree_vec, prob_decode)

        #Mark nid & is_leaf & atommap
        for i,node in enumerate(pred_nodes):
            node.nid = i + 1
            node.is_leaf = (len(node.neighbors) == 1)
            if len(node.neighbors) > 1:
                set_atommap(node.mol, node.nid)

        tree_mess = self.jtnn([pred_root])[0]

        cur_mol = copy_edit_mol(pred_root.mol)
        global_amap = [{}] + [{} for node in pred_nodes]
        global_amap[1] = {atom.GetIdx():atom.GetIdx() for atom in cur_mol.GetAtoms()}

        cur_mol = self.dfs_assemble(tree_mess, mol_vec, pred_nodes, cur_mol, global_amap, [], pred_root, None, prob_decode)
        if cur_mol is None: 
            return None

        cur_mol = cur_mol.GetMol()
        set_atommap(cur_mol)
        cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol))
        if cur_mol is None: return None

        smiles2D = Chem.MolToSmiles(cur_mol)
        stereo_cands = decode_stereo(smiles2D)
        if len(stereo_cands) == 1: 
            return stereo_cands[0]
        stereo_vecs = self.mpn(mol2graph(stereo_cands))
        stereo_vecs = self.G_mean(stereo_vecs)
        scores = nn.CosineSimilarity()(stereo_vecs, mol_vec)
        _,max_id = scores.max(dim=0)
        return stereo_cands[max_id.data[0]]

Source File: mol_tree.py From icml18-jtnn with MIT License

5 votes

def get_slots(smiles):
    mol = Chem.MolFromSmiles(smiles)
    return [(atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in mol.GetAtoms()]

Source File: drawFPBits.py From CheTo with BSD 3-Clause "New" or "Revised" License

5 votes

def _drawBricsFrag(smi,molSize=(150,150),kekulize=True,baseRad=0.05,svg=True,**kwargs):
    
    # delete smarts specific syntax from the pattern
    smi = re.sub(r"\;R\d?\;D\d+", "", smi)
    mol = Chem.MolFromSmiles(smi, sanitize=True)
    mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=kekulize)
            
    # Drawing
    drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
    if not svg:
        drawer = rdMolDraw2D.MolDraw2DCairo(molSize[0],molSize[1])
    drawer.DrawMolecule(mc,**kwargs)
    drawer.FinishDrawing()
    return drawer.GetDrawingText()

Source File: chemutils.py From dgl with Apache License 2.0

5 votes

def get_clique_mol(mol, atoms):
    smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True)
    new_mol = Chem.MolFromSmiles(smiles, sanitize=False)
    new_mol = copy_edit_mol(new_mol).GetMol()
    new_mol = sanitize(new_mol) #We assume this is not None
    return new_mol

Source File: chemutils.py From dgl with Apache License 2.0

5 votes

def copy_edit_mol(mol):
    new_mol = Chem.RWMol(Chem.MolFromSmiles(''))
    for atom in mol.GetAtoms():
        new_atom = copy_atom(atom)
        new_mol.AddAtom(new_atom)
    for bond in mol.GetBonds():
        a1 = bond.GetBeginAtom().GetIdx()
        a2 = bond.GetEndAtom().GetIdx()
        bt = bond.GetBondType()
        new_mol.AddBond(a1, a2, bt)
    return new_mol

Source File: chemutils.py From dgl with Apache License 2.0

5 votes

def get_mol(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None: 
        return None
    Chem.Kekulize(mol)
    return mol

Source File: mol_tree.py From dgl with Apache License 2.0

5 votes

def get_slots(smiles):
    mol = Chem.MolFromSmiles(smiles)
    return [(atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in mol.GetAtoms()]

Source File: mol_tree_nx.py From dgl with Apache License 2.0

5 votes

def _recover_node(self, i, original_mol):
        node = self.nodes_dict[i]

        clique = []
        clique.extend(node['clique'])
        if not node['is_leaf']:
            for cidx in node['clique']:
                original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(node['nid'])

        for j in self.successors(i).numpy():
            nei_node = self.nodes_dict[j]
            clique.extend(nei_node['clique'])
            if nei_node['is_leaf']: # Leaf node, no need to mark
                continue
            for cidx in nei_node['clique']:
                # allow singleton node override the atom mapping
                if cidx not in node['clique'] or len(nei_node['clique']) == 1:
                    atom = original_mol.GetAtomWithIdx(cidx)
                    atom.SetAtomMapNum(nei_node['nid'])

        clique = list(set(clique))
        label_mol = get_clique_mol(original_mol, clique)
        node['label'] = Chem.MolToSmiles(Chem.MolFromSmiles(get_smiles(label_mol)))
        node['label_mol'] = get_mol(node['label'])

        for cidx in clique:
            original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(0)

        return node['label']

Source File: dataset_utils.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License

5 votes

def sort_dataset(in_path, out_path):
    """
    Sorts the dataset of smiles from input path by molecular complexity as
    proxied by the BertzCT index, and outputs the new sorted dataset
    :param in_path:
    :param out_path:
    :return:
    """
    def _calc_bertz_ct(smiles):
        return GraphDescriptors.BertzCT(Chem.MolFromSmiles(smiles))

    in_df = load_dataset(in_path)
    in_df['BertzCT'] = in_df.smiles.apply(_calc_bertz_ct)
    sorted_df = in_df.sort_values(by=['BertzCT'])
    sorted_df['smiles'].to_csv(out_path, index=False)

Source File: canonicalization.py From ochem_predict_nn with MIT License

5 votes

def fix_smiles(self, old_smiles, removeMap = True):
		'''
		For a given SMILES string, this function "fixes" common mistakes
		found in the Lowe parsed database:
		- N=c[nH] structures are turned into the normal [NH]-c[n] forms
		- iminols are turned into amides/carbamates

		It applies the reactions in self.rxns until the SMILES string doesn't change
		'''
		mol = Chem.MolFromSmiles(old_smiles)
		if removeMap: [x.ClearProp('molAtomMapNumber') for x in mol.GetAtoms()]
		if not mol: 
			return old_smiles 

		new_smiles = Chem.MolToSmiles(mol, isomericSmiles = USE_STEREOCHEMISTRY)
		old_smiles = ''
		while new_smiles != old_smiles:
			old_smiles = new_smiles
			for rxn in self.rxns:
				outcomes = rxn.RunReactants((mol,))
				if not outcomes: 
					continue
				else:
					mol = outcomes[0][0]
					Chem.SanitizeMol(mol)
					new_smiles = Chem.MolToSmiles(mol, isomericSmiles = USE_STEREOCHEMISTRY)

		return new_smiles

Source File: donkey.py From deepchem with MIT License

5 votes

def generate_scaffold(smiles, include_chirality=False):
  """Compute the Bemis-Murcko scaffold for a SMILES string."""
  mol = Chem.MolFromSmiles(smiles)
  engine = ScaffoldGenerator(include_chirality=include_chirality)
  scaffold = engine.get_scaffold(mol)
  return scaffold

Python rdkit.Chem.MolFromSmiles() Examples