Python rdkit.Chem.MolFromSmiles() Examples

The following are 30 code examples of rdkit.Chem.MolFromSmiles(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rdkit.Chem , or try the search function .
Example #1
Source File: properties.py    From hgraph2graph with MIT License 8 votes vote down vote up
def similarity(a, b):
    if a is None or b is None: 
        return 0.0
    amol = Chem.MolFromSmiles(a)
    bmol = Chem.MolFromSmiles(b)
    if amol is None or bmol is None:
        return 0.0

    fp1 = AllChem.GetMorganFingerprintAsBitVect(amol, 2, nBits=2048, useChirality=False)
    fp2 = AllChem.GetMorganFingerprintAsBitVect(bmol, 2, nBits=2048, useChirality=False)
    return DataStructs.TanimotoSimilarity(fp1, fp2) 
Example #2
Source File: dataset.py    From hgraph2graph with MIT License 7 votes vote down vote up
def __getitem__(self, idx):
        mol = Chem.MolFromSmiles(self.batches[idx])
        leaves = get_leaves(mol)
        smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] )
        smiles_list = sorted(list(smiles_list)) #To ensure reproducibility

        safe_list = []
        for s in smiles_list:
            hmol = MolGraph(s)
            ok = True
            for node,attr in hmol.mol_tree.nodes(data=True):
                if attr['label'] not in self.vocab.vmap:
                    ok = False
            if ok: safe_list.append(s)
        
        if len(safe_list) > 0:
            return MolGraph.tensorize(safe_list, self.vocab, self.avocab)
        else:
            return None 
Example #3
Source File: utilsFP.py    From CheTo with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def generateAtomInvariant(mol):
    """

    >>> generateAtomInvariant(Chem.MolFromSmiles("Cc1ncccc1"))
    [341294046, 3184205312, 522345510, 1545984525, 1545984525, 1545984525, 1545984525]

    """
    num_atoms = mol.GetNumAtoms()
    invariants = [0]*num_atoms
    for i,a in enumerate(mol.GetAtoms()):
        descriptors=[]
        descriptors.append(a.GetAtomicNum())
        descriptors.append(a.GetTotalDegree())
        descriptors.append(a.GetTotalNumHs())
        descriptors.append(a.IsInRing())
        descriptors.append(a.GetIsAromatic())
        invariants[i]=hash(tuple(descriptors))& 0xffffffff
    return invariants


#------------------------------------
#
#  doctest boilerplate
# 
Example #4
Source File: test_rdkit_grid_features.py    From deepchem with MIT License 6 votes vote down vote up
def setUp(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))

    # simple flat ring
    from rdkit.Chem import MolFromSmiles
    self.cycle4 = MolFromSmiles('C1CCC1')
    self.cycle4.Compute2DCoords()

    # load and sanitize two real molecules
    _, self.prot = rgf.load_molecule(
        os.path.join(current_dir, '3ws9_protein_fixer_rdkit.pdb'),
        add_hydrogens=False,
        calc_charges=False,
        sanitize=True)

    _, self.lig = rgf.load_molecule(
        os.path.join(current_dir, '3ws9_ligand.sdf'),
        add_hydrogens=False,
        calc_charges=False,
        sanitize=True) 
Example #5
Source File: chemutils.py    From hgraph2graph with MIT License 6 votes vote down vote up
def copy_edit_mol(mol):
    new_mol = Chem.RWMol(Chem.MolFromSmiles(''))
    for atom in mol.GetAtoms():
        new_atom = copy_atom(atom)
        new_mol.AddAtom(new_atom)

    for bond in mol.GetBonds():
        a1 = bond.GetBeginAtom().GetIdx()
        a2 = bond.GetEndAtom().GetIdx()
        bt = bond.GetBondType()
        new_mol.AddBond(a1, a2, bt)
        #if bt == Chem.rdchem.BondType.AROMATIC and not aromatic:
        #    bt = Chem.rdchem.BondType.SINGLE
    return new_mol 
Example #6
Source File: chemTopicModel.py    From CheTo with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _generateMolFrags(datachunk, vocabulary, fragmentMethod, fragIdx=None):
    if fragIdx is None and fragmentMethod == 'Brics':
        return
    result={}
    for idx, smi in datachunk:
        mol = Chem.MolFromSmiles(str(smi))
        if mol == None:
            continue
        fp,_=_generateFPs(mol,fragmentMethod=fragmentMethod)
        if fp is None:
            continue
        tmp={}
        for k,v in fp.items():
            if k not in vocabulary:
                continue
            # save memory: for BRICS use index instead of long complicated SMILES
            if fragmentMethod == 'Brics':
                tmp[fragIdx[k]]=v
            else:
                tmp[k]=v
        result[idx]=tmp
    return result

########### chemical topic modeling class ################### 
Example #7
Source File: dataset_utils.py    From rl_graph_generation with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __getitem__(self, item):
    """
    Returns an rdkit mol object
    :param item:
    :return:
    """
    smiles = self.df['smiles'][item]
    mol = Chem.MolFromSmiles(smiles)
    return mol

# # TESTS
# path = 'gdb13.rand1M.smi.gz'
# dataset = gdb_dataset(path)
#
# print(len(dataset))
# mol,_ = dataset[0]
# graph = mol_to_nx(mol)
# graph_sub = graph.subgraph([0,3,5,7,9])
# graph_sub_new = nx.convert_node_labels_to_integers(graph_sub,label_attribute='old')
# graph_sub_node = graph_sub.nodes()
# graph_sub_new_node = graph_sub_new.nodes()
# matrix = nx.adjacency_matrix(graph_sub)
# np_matrix = matrix.toarray()
# print(np_matrix)
# print('end') 
Example #8
Source File: align.py    From hgraph2graph with MIT License 6 votes vote down vote up
def align(xy_tuple):
    x,y = xy_tuple
    xmol, ymol = Chem.MolFromSmiles(x), Chem.MolFromSmiles(y)
    x = Chem.MolToSmiles(xmol, isomericSmiles=False)
    xmol = Chem.MolFromSmiles(x)

    xleaf = get_leaves(xmol)
    yleaf = get_leaves(ymol)

    best_i,best_j = 0,0
    best = 1000000
    for i in xleaf:
        for j in yleaf:
            new_x = Chem.MolToSmiles(xmol, rootedAtAtom=i, isomericSmiles=False)
            new_y = Chem.MolToSmiles(ymol, rootedAtAtom=j, isomericSmiles=False)
            le = min(len(new_x), len(new_y)) // 2
            dist = Levenshtein.distance(new_x[:le], new_y[:le])
            if dist < best:
                best_i, best_j = i, j
                best = dist

    return Chem.MolToSmiles(xmol, rootedAtAtom=best_i, isomericSmiles=False), Chem.MolToSmiles(ymol, rootedAtAtom=best_j, isomericSmiles=False) 
Example #9
Source File: data_loader.py    From PADME with MIT License 6 votes vote down vote up
def featurize_smiles_np(arr, featurizer, log_every_N=1000, verbose=True):
  """Featurize individual compounds in a numpy array.

  Given a featurizer that operates on individual chemical compounds
  or macromolecules, compute & add features for that compound to the
  features array
  """
  features = []
  for ind, elem in enumerate(arr.tolist()):
    mol = Chem.MolFromSmiles(elem)
    if mol:
      new_order = rdmolfiles.CanonicalRankAtoms(mol)
      mol = rdmolops.RenumberAtoms(mol, new_order)
    if ind % log_every_N == 0:
      log("Featurizing sample %d" % ind, verbose)
    features.append(featurizer.featurize([mol]))

  valid_inds = np.array(
      [1 if elt.size > 0 else 0 for elt in features], dtype=bool)
  features = [elt for (is_valid, elt) in zip(valid_inds, features) if is_valid]
  features = np.squeeze(np.array(features))
  return features.reshape(-1,) 
Example #10
Source File: utilsFP.py    From CheTo with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def getSubstructSmi(mol,env,propsToSmiles=True):
    """

    >>> getSubstructSmi(Chem.MolFromSmiles('Cc1ncccc1'),((0,1,2)))
    '[cH;R;D2]:[n;R;D2]:[c;R;D3]-[CH3;R0;D1]'

    """
    atomsToUse=set()
    if not len(env):
        return ''
    for b in env:
        atomsToUse.add(mol.GetBondWithIdx(b).GetBeginAtomIdx())
        atomsToUse.add(mol.GetBondWithIdx(b).GetEndAtomIdx())
    # no isomeric smiles since we don't include that in the fingerprints
    smi = Chem.MolFragmentToSmiles(mol,atomsToUse,isomericSmiles=False,
                                   bondsToUse=env,allHsExplicit=True, allBondsExplicit=True)
    if propsToSmiles:
        order = eval(mol.GetProp("_smilesAtomOutputOrder"))
        smi = writePropsToSmiles(mol,smi,order)
    return smi 
Example #11
Source File: drd2_scorer.py    From hgraph2graph with MIT License 6 votes vote down vote up
def get_score(smile):
    if clf_model is None:
        load_model()

    mol = Chem.MolFromSmiles(smile)
    if mol:
        fp = fingerprints_from_mol(mol)
        score = clf_model.predict_proba(fp)[:, 1]
        return float(score)
    return 0.0 
Example #12
Source File: dataset.py    From hgraph2graph with MIT License 6 votes vote down vote up
def __getitem__(self, idx):
        mol = Chem.MolFromSmiles(self.batches[idx])
        leaves = get_leaves(mol)
        smiles_list = set( [Chem.MolToSmiles(mol, rootedAtAtom=i, isomericSmiles=False) for i in leaves] )
        smiles_list = sorted(list(smiles_list)) #To ensure reproducibility

        safe_list = []
        for s in smiles_list:
            hmol = MolGraph(s)
            ok = True
            for node,attr in hmol.mol_tree.nodes(data=True):
                if attr['label'] not in self.vocab.vmap:
                    ok = False
            if ok: safe_list.append(s)
        
        if len(safe_list) > 0:
            return MolGraph.tensorize(safe_list, self.vocab, self.avocab)
        else:
            return None 
Example #13
Source File: chemutils.py    From hgraph2graph with MIT License 6 votes vote down vote up
def get_clique_mol(mol, atoms):
    smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True)
    new_mol = Chem.MolFromSmiles(smiles, sanitize=False)
    new_mol = copy_edit_mol(new_mol).GetMol()
    new_mol = sanitize(new_mol) 
    #if tmp_mol is not None: new_mol = tmp_mol
    return new_mol 
Example #14
Source File: test_tensorflowEncoders.py    From deepchem with MIT License 6 votes vote down vote up
def test_fit(self):
    tf_enc = TensorflowMoleculeEncoder.zinc_encoder()

    smiles = [
        "Cn1cnc2c1c(=O)n(C)c(=O)n2C", "O=C(O)[C@@H]1/C(=C/CO)O[C@@H]2CC(=O)N21",
        "Cn1c2nncnc2c(=O)n(C)c1=O", "Cn1cnc2c1c(=O)[nH]c(=O)n2C",
        "NC(=O)c1ncc[nH]c1=O", "O=C1OCc2c1[nH]c(=O)[nH]c2=O",
        "Cn1c(N)c(N)c(=O)n(C)c1=O", "CNc1nc2c([nH]1)c(=O)[nH]c(=O)n2C",
        "CC(=O)N1CN(C(C)=O)[C@@H](O)[C@@H]1O",
        "CC(=O)N1CN(C(C)=O)[C@H](O)[C@H]1O", "Cc1[nH]c(=O)[nH]c(=O)c1CO",
        "O=C1NCCCc2c1no[n+]2[O-]", "Cc1nc(C(N)=O)c(N)n1CCO",
        "O=c1[nH]cc(N2CCOCC2)c(=O)[nH]1"
    ]

    featurizer = dc.feat.one_hot.OneHotFeaturizer(zinc_charset, 120)
    mols = [Chem.MolFromSmiles(x) for x in smiles]
    features = featurizer.featurize(mols)

    dataset = DiskDataset.from_numpy(features, features)
    prediction = tf_enc.predict_on_batch(dataset.X)
    tf_de = TensorflowMoleculeDecoder.zinc_decoder()
    one_hot_decoded = tf_de.predict_on_batch(prediction)
    decoded_smiles = featurizer.untransform(one_hot_decoded)
    assert len(decoded_smiles) == len(smiles) 
Example #15
Source File: mpnn.py    From deepchem with MIT License 6 votes vote down vote up
def construct_multigraph(smile):
  g = OrderedDict({})
  h = OrderedDict({})

  molecule = Chem.MolFromSmiles(smile)
  for i in xrange(0, molecule.GetNumAtoms()):
    atom_i = molecule.GetAtomWithIdx(i)
    h[i] = Variable(torch.FloatTensor(dc.feat.graph_features.atom_features(atom_i))).view(1, 75)
    for j in xrange(0, molecule.GetNumAtoms()):
      e_ij = molecule.GetBondBetweenAtoms(i, j)
      if e_ij != None:
        e_ij =  map(lambda x: 1 if x == True else 0, dc.feat.graph_features.bond_features(e_ij)) # ADDED edge feat
        e_ij = Variable(torch.FloatTensor(e_ij).view(1, 6))
        atom_j = molecule.GetAtomWithIdx(j)
        if i not in g:
          g[i] = []
        g[i].append( (e_ij, j) )

  return g, h 
Example #16
Source File: eval_mol2vec_results.py    From deepchem with MIT License 6 votes vote down vote up
def main() :
    model = models.KeyedVectors.load_word2vec_format("vec.txt")
    embeddings = list()

    # Using canonical smiles for glycine, as in original research paper
    mol = Chem.MolFromSmiles("C(C(=O)O)N")
    try:
        info = {}
        rdMolDescriptors.GetMorganFingerprint(mol, 0, bitInfo=info)
        keys = info.keys()
        keys_list = list(keys)
        totalvec = np.zeros(200)
        for k in keys_list:
            wordvec = model.wv[str(k)]
            totalvec = np.add(totalvec, wordvec)
        embeddings.append(totalvec)
    except Exception as e:
        print(e)
        pass

    print(embeddings[0]) 
Example #17
Source File: data_loader.py    From PADME with MIT License 5 votes vote down vote up
def featurize_smiles_df(df, featurizer, field, log_every_N=1000, verbose=True):
  """Featurize individual compounds in dataframe.

  Given a featurizer that operates on individual chemical compounds 
  or macromolecules, compute & add features for that compound to the 
  features dataframe
  """
  sample_elems = df[field].tolist()

  features = []
  stderr_fileno = sys.stderr.fileno()
  stderr_save = os.dup(stderr_fileno)
  stderr_fd = open('./logs/error.log', 'a')
  os.dup2(stderr_fd.fileno(), stderr_fileno)
  for ind, elem in enumerate(sample_elems):

    mol = Chem.MolFromSmiles(elem)    
    # TODO (ytz) this is a bandage solution to reorder the atoms so
    # that they're always in the same canonical order. Presumably this
    # should be correctly implemented in the future for graph mols.
    if mol:
      new_order = rdmolfiles.CanonicalRankAtoms(mol)
      mol = rdmolops.RenumberAtoms(mol, new_order)
    if ind % log_every_N == 0:
      log("Featurizing sample %d" % ind, verbose)
    features.append(featurizer.featurize([mol], smiles=elem))

  stderr_fd.close()
  os.dup2(stderr_save, stderr_fileno)
  
  valid_inds = np.array(
      [1 if elt.size > 0 else 0 for elt in features], dtype=bool)
  features = [elt for (is_valid, elt) in zip(valid_inds, features) if is_valid]
  
  #return np.squeeze(np.array(features), axis=1), valid_inds
  return np.array(features), valid_inds 
Example #18
Source File: utilsFP.py    From CheTo with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def getMorganEnvironment(mol, bitInfo, fp=None, minRad=0):
    """

    >>> m = Chem.MolFromSmiles('CC(O)C')
    >>> bi = {}
    >>> fp = AllChem.GetMorganFingerprintAsBitVect(m,2,2048,bitInfo=bi)
    >>> getMorganEnvironment(m,bi)
    defaultdict(<class 'list'>, {1057: [[], []], 227: [[1]], 709: [[0, 1, 2]], 1: [[]], 283: [[0], [2]], 807: [[]]})
    >>> getMorganEnvironment(m,bi,minRad=1)
    defaultdict(<class 'list'>, {283: [[0], [2]], 227: [[1]], 709: [[0, 1, 2]]})
    >>> list(fp.GetOnBits())
    [1, 227, 283, 709, 807, 1057]
    >>> getMorganEnvironment(m,bi,minRad=1,fp=fp)
    defaultdict(<class 'list'>, {283: [[0], [2]], 227: [[1]], 709: [[0, 1, 2]]})
    >>> list(fp.GetOnBits())
    [227, 283, 709]

    """
    bitPaths=defaultdict(list)
    for bit,info in bitInfo.items():
        for atomID,radius in info:
            if radius < minRad:
                if fp != None:
                    fp[bit]=0
                continue
            env = Chem.FindAtomEnvironmentOfRadiusN(mol,radius,atomID)
            bitPaths[bit].append(list(env))
    return bitPaths 
Example #19
Source File: utilsFP.py    From CheTo with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def writePropsToSmiles(mol,smi,order):
    """

    >>> writePropsToSmiles(Chem.MolFromSmiles('Cc1ncccc1'),'[cH]:[n]:[c]-[CH3]',(3,2,1,0))
    '[cH;R;D2]:[n;R;D2]:[c;R;D3]-[CH3;R0;D1]'

    """
    finalsmi = copy.deepcopy(smi)
    for i,a in enumerate(order,1):
        atom = mol.GetAtomWithIdx(a)
        if not atom.GetAtomicNum():
            continue
        finalsmi = _includeRingMembership(finalsmi, i, noRingAtom = not atom.IsInRing())
        finalsmi = _includeDegree(finalsmi, i, atom.GetDegree())
    return finalsmi 
Example #20
Source File: jtprop_vae.py    From icml18-jtnn with MIT License 5 votes vote down vote up
def decode(self, tree_vec, mol_vec, prob_decode):
        pred_root,pred_nodes = self.decoder.decode(tree_vec, prob_decode)

        #Mark nid & is_leaf & atommap
        for i,node in enumerate(pred_nodes):
            node.nid = i + 1
            node.is_leaf = (len(node.neighbors) == 1)
            if len(node.neighbors) > 1:
                set_atommap(node.mol, node.nid)

        tree_mess = self.jtnn([pred_root])[0]

        cur_mol = copy_edit_mol(pred_root.mol)
        global_amap = [{}] + [{} for node in pred_nodes]
        global_amap[1] = {atom.GetIdx():atom.GetIdx() for atom in cur_mol.GetAtoms()}

        cur_mol = self.dfs_assemble(tree_mess, mol_vec, pred_nodes, cur_mol, global_amap, [], pred_root, None, prob_decode)
        if cur_mol is None: 
            return None

        cur_mol = cur_mol.GetMol()
        set_atommap(cur_mol)
        cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol))
        if cur_mol is None: return None

        smiles2D = Chem.MolToSmiles(cur_mol)
        stereo_cands = decode_stereo(smiles2D)
        if len(stereo_cands) == 1: 
            return stereo_cands[0]
        stereo_vecs = self.mpn(mol2graph(stereo_cands))
        stereo_vecs = self.G_mean(stereo_vecs)
        scores = nn.CosineSimilarity()(stereo_vecs, mol_vec)
        _,max_id = scores.max(dim=0)
        return stereo_cands[max_id.data[0]] 
Example #21
Source File: mol_tree.py    From icml18-jtnn with MIT License 5 votes vote down vote up
def get_slots(smiles):
    mol = Chem.MolFromSmiles(smiles)
    return [(atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in mol.GetAtoms()] 
Example #22
Source File: drawFPBits.py    From CheTo with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _drawBricsFrag(smi,molSize=(150,150),kekulize=True,baseRad=0.05,svg=True,**kwargs):
    
    # delete smarts specific syntax from the pattern
    smi = re.sub(r"\;R\d?\;D\d+", "", smi)
    mol = Chem.MolFromSmiles(smi, sanitize=True)
    mc = rdMolDraw2D.PrepareMolForDrawing(mol, kekulize=kekulize)
            
    # Drawing
    drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
    if not svg:
        drawer = rdMolDraw2D.MolDraw2DCairo(molSize[0],molSize[1])
    drawer.DrawMolecule(mc,**kwargs)
    drawer.FinishDrawing()
    return drawer.GetDrawingText() 
Example #23
Source File: chemutils.py    From dgl with Apache License 2.0 5 votes vote down vote up
def get_clique_mol(mol, atoms):
    smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True)
    new_mol = Chem.MolFromSmiles(smiles, sanitize=False)
    new_mol = copy_edit_mol(new_mol).GetMol()
    new_mol = sanitize(new_mol) #We assume this is not None
    return new_mol 
Example #24
Source File: chemutils.py    From dgl with Apache License 2.0 5 votes vote down vote up
def copy_edit_mol(mol):
    new_mol = Chem.RWMol(Chem.MolFromSmiles(''))
    for atom in mol.GetAtoms():
        new_atom = copy_atom(atom)
        new_mol.AddAtom(new_atom)
    for bond in mol.GetBonds():
        a1 = bond.GetBeginAtom().GetIdx()
        a2 = bond.GetEndAtom().GetIdx()
        bt = bond.GetBondType()
        new_mol.AddBond(a1, a2, bt)
    return new_mol 
Example #25
Source File: chemutils.py    From dgl with Apache License 2.0 5 votes vote down vote up
def get_mol(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None: 
        return None
    Chem.Kekulize(mol)
    return mol 
Example #26
Source File: mol_tree.py    From dgl with Apache License 2.0 5 votes vote down vote up
def get_slots(smiles):
    mol = Chem.MolFromSmiles(smiles)
    return [(atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in mol.GetAtoms()] 
Example #27
Source File: mol_tree_nx.py    From dgl with Apache License 2.0 5 votes vote down vote up
def _recover_node(self, i, original_mol):
        node = self.nodes_dict[i]

        clique = []
        clique.extend(node['clique'])
        if not node['is_leaf']:
            for cidx in node['clique']:
                original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(node['nid'])

        for j in self.successors(i).numpy():
            nei_node = self.nodes_dict[j]
            clique.extend(nei_node['clique'])
            if nei_node['is_leaf']: # Leaf node, no need to mark
                continue
            for cidx in nei_node['clique']:
                # allow singleton node override the atom mapping
                if cidx not in node['clique'] or len(nei_node['clique']) == 1:
                    atom = original_mol.GetAtomWithIdx(cidx)
                    atom.SetAtomMapNum(nei_node['nid'])

        clique = list(set(clique))
        label_mol = get_clique_mol(original_mol, clique)
        node['label'] = Chem.MolToSmiles(Chem.MolFromSmiles(get_smiles(label_mol)))
        node['label_mol'] = get_mol(node['label'])

        for cidx in clique:
            original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(0)

        return node['label'] 
Example #28
Source File: dataset_utils.py    From rl_graph_generation with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def sort_dataset(in_path, out_path):
    """
    Sorts the dataset of smiles from input path by molecular complexity as
    proxied by the BertzCT index, and outputs the new sorted dataset
    :param in_path:
    :param out_path:
    :return:
    """
    def _calc_bertz_ct(smiles):
        return GraphDescriptors.BertzCT(Chem.MolFromSmiles(smiles))

    in_df = load_dataset(in_path)
    in_df['BertzCT'] = in_df.smiles.apply(_calc_bertz_ct)
    sorted_df = in_df.sort_values(by=['BertzCT'])
    sorted_df['smiles'].to_csv(out_path, index=False) 
Example #29
Source File: canonicalization.py    From ochem_predict_nn with MIT License 5 votes vote down vote up
def fix_smiles(self, old_smiles, removeMap = True):
		'''
		For a given SMILES string, this function "fixes" common mistakes
		found in the Lowe parsed database:
		- N=c[nH] structures are turned into the normal [NH]-c[n] forms
		- iminols are turned into amides/carbamates

		It applies the reactions in self.rxns until the SMILES string doesn't change
		'''
		mol = Chem.MolFromSmiles(old_smiles)
		if removeMap: [x.ClearProp('molAtomMapNumber') for x in mol.GetAtoms()]
		if not mol: 
			return old_smiles 

		new_smiles = Chem.MolToSmiles(mol, isomericSmiles = USE_STEREOCHEMISTRY)
		old_smiles = ''
		while new_smiles != old_smiles:
			old_smiles = new_smiles
			for rxn in self.rxns:
				outcomes = rxn.RunReactants((mol,))
				if not outcomes: 
					continue
				else:
					mol = outcomes[0][0]
					Chem.SanitizeMol(mol)
					new_smiles = Chem.MolToSmiles(mol, isomericSmiles = USE_STEREOCHEMISTRY)

		return new_smiles 
Example #30
Source File: donkey.py    From deepchem with MIT License 5 votes vote down vote up
def generate_scaffold(smiles, include_chirality=False):
  """Compute the Bemis-Murcko scaffold for a SMILES string."""
  mol = Chem.MolFromSmiles(smiles)
  engine = ScaffoldGenerator(include_chirality=include_chirality)
  scaffold = engine.get_scaffold(mol)
  return scaffold