Python rdkit.Chem.MolFromSmarts() Examples

The following are 30 code examples of rdkit.Chem.MolFromSmarts().
Example #1
Source File:    From guacamol with MIT License 7 votes vote down vote up
def initialise_neutralisation_reactions():
    patts = (
        # Imidazoles
        ('[n+;H]', 'n'),
        # Amines
        ('[N+;!H0]', 'N'),
        # Carboxylic acids and alcohols
        ('[$([O-]);!$([O-][#7])]', 'O'),
        # Thiols
        ('[S-;X1]', 'S'),
        # Sulfonamides
        ('[$([N-;X2]S(=O)=O)]', 'N'),
        # Enamines
        ('[$([N-;X2][C,N]=C)]', 'N'),
        # Tetrazoles
        ('[n-]', '[nH]'),
        # Sulfoxides
        ('[$([S-]=O)]', 'S'),
        # Amides
        ('[$([N-]C=O)]', 'N'),
    return [(Chem.MolFromSmarts(x), Chem.MolFromSmiles(y, False)) for x, y in patts] 
Example #2
Source File:    From GLN with MIT License 6 votes vote down vote up
def new_mol(self, name):
        if self.sanitized:
            mol = Chem.MolFromSmiles(name)
            mol = Chem.MolFromSmarts(name)
        if mol is None:            
            return None
            mg = MolGraph(name, self.sanitized, mol=mol)
            if self.fp_degree > 0:
                bi = {} if self.fp_info else None
                feat = AllChem.GetMorganFingerprint(mol, self.fp_degree, bitInfo=bi, invariants=self._get_inv(mol))
                on_bits = list(feat.GetNonzeroElements().keys())
                mg.fingerprints = on_bits
                mg.fp_info = bi
            return mg 
Example #3
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def BuildPatts(rawV=None):
    """ Internal Use Only
    global esPatterns, _rawD
    if rawV is None:
        rawV = _rawD

    esPatterns = [None] * len(rawV)
    for i, (name, sma) in enumerate(rawV):
            patt = Chem.MolFromSmarts(sma)
                "WARNING: problems with pattern %s (name: %s), skipped.\n" % (sma, name)
            esPatterns[i] = name, patt 
Example #4
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self):
        log.debug("Initializing MetalDisconnector")
        # Initialize SMARTS to identify relevant substructures
        # TODO: Use atomic numbers instead of element symbols in SMARTS to allow for isotopes?
        self._metal_nof = Chem.MolFromSmarts(
        self._metal_non = Chem.MolFromSmarts(
        self._free_metal = Chem.MolFromSmarts("[Li,Na,K,Mg,CaX0+0]".encode("utf8"))
        self._carboxylic = Chem.MolFromSmarts("[CX3](=O)[OX2H1]".encode("utf8")) 
Example #5
Source File:    From guacamol_baselines with MIT License 6 votes vote down vote up
def cut(mol):
    if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')):
        return None

    bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]')))  # single bond not in ring

    bs = [mol.GetBondBetweenAtoms(bis[0], bis[1]).GetIdx()]

    fragments_mol = Chem.FragmentOnBonds(mol, bs, addDummies=True, dummyLabels=[(1, 1)])

        return Chem.GetMolFrags(fragments_mol, asMols=True, sanitizeFrags=True)
    except ValueError:
        return None

    return None 
Example #6
Source File:    From guacamol_baselines with MIT License 6 votes vote down vote up
def ring_OK(mol):
    if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')):
        return True

    ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]'))

    cycle_list = mol.GetRingInfo().AtomRings()
    max_cycle_length = max([len(j) for j in cycle_list])
    macro_cycle = max_cycle_length > 6

    double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]'))

    return not ring_allene and not macro_cycle and not double_bond_in_small_ring

# TODO: set from main? calculate for dataset? 
Example #7
Source File:    From guacamol_baselines with MIT License 6 votes vote down vote up
def add_atom(rdkit_mol, stats: Stats):
    old_mol = Chem.Mol(rdkit_mol)
    if np.random.random() < 0.63:  # probability of adding ring atom
        rxn_smarts = np.random.choice(stats.rxn_smarts_ring_list, p=stats.p_ring)
        if not rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4,r5]')) \
                or AllChem.CalcNumAliphaticRings(rdkit_mol) == 0:
            rxn_smarts = np.random.choice(stats.rxn_smarts_make_ring, p=stats.p_ring)
            if np.random.random() < 0.036:  # probability of starting a fused ring
                rxn_smarts = rxn_smarts.replace("!", "")
        if rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')):
            rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]'
            rxn_smarts = np.random.choice(stats.rxn_smarts_list, p=stats.p)

    rdkit_mol = run_rxn(rxn_smarts, rdkit_mol)
    if valences_not_too_large(rdkit_mol):
        return rdkit_mol
        return old_mol 
Example #8
Source File:    From oddt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _amide_bond(bond):
    a1 = bond.GetBeginAtom()
    a2 = bond.GetEndAtom()
    if (a1.GetAtomicNum() == 6 and a2.GetAtomicNum() == 7 or
            a2.GetAtomicNum() == 6 and a1.GetAtomicNum() == 7):
        patt = Chem.MolFromSmarts('C(=O)-N')
        for m in bond.GetOwningMol().GetSubstructMatches(patt):
            if a1.GetIdx() in m and a2.GetIdx() in m:
                return True
    return False 
Example #9
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        log.debug("Initializing Uncharger")
        #: Neutralizable positive charge (with hydrogens attached)
        self._pos_h = Chem.MolFromSmarts("[+!H0!$(*~[-])]".encode("utf8"))
        #: Non-neutralizable positive charge (no hydrogens attached)
        self._pos_quat = Chem.MolFromSmarts("[+H0!$(*~[-])]".encode("utf8"))
        #: Negative charge, not bonded to a positive charge with no hydrogens
        self._neg = Chem.MolFromSmarts("[-!$(*~[+H0])]".encode("utf8"))
        #: Negative oxygen bonded to [C,P,S]=O, negative aromatic nitrogen?
        self._neg_acid = Chem.MolFromSmarts(
Example #10
Source File:    From solubility with MIT License 5 votes vote down vote up
def __init__(self):
        self.aromatic_query = Chem.MolFromSmarts("a")
        self.Descriptor = namedtuple("Descriptor", "mw logp rotors ap") 
Example #11
Source File:    From chemprop with MIT License 5 votes vote down vote up
def __init__(self, args: Namespace):
        self.smarts = []
        with open(args.functional_group_smarts, 'r') as f:
            for line in f:
Example #12
Source File:    From DrugEx with MIT License 5 votes vote down vote up
def substructure(fname, sub, is_active=False):
    """ Calculating the percentage of molecules that contains the given substructure
    in the given dataset.

        sub (str): molecular substructure with SMARTS representation.
        is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False)
            if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected.
            (Default: False)

        percentage (float): percentage of molecules (xx.xx%) that contains the given substructure
    sub = Chem.MolFromSmarts(sub)
    df = pd.read_table(fname).drop_duplicates(subset='CANONICAL_SMILES')
    if 'SCORE' in df.columns:
        df = df[df.SCORE > (0.5 if is_active else 0.0)]
    elif 'PCHEMBL_VALUE' in df.columns:
        df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0.0)]
    num = 0
    for smile in df.CANONICAL_SMILES:
        mol = Chem.MolFromSmiles(smile)
        if mol.HasSubstructMatch(sub):
            num += 1
            # print(smile)
    percentage = num * 100 / len(df)
    return percentage 
Example #13
Source File:    From BCAI_kaggle_CHAMPS with MIT License 5 votes vote down vote up
def get_proto_mol(atomicNumList):
    mol = Chem.MolFromSmarts("[#"+str(atomicNumList[0])+"]")
    rwMol = Chem.RWMol(mol)
    for i in range(1,len(atomicNumList)):
        a = Chem.Atom(atomicNumList[i])
    mol = rwMol.GetMol()

    return mol 
Example #14
Source File:    From BCAI_kaggle_CHAMPS with MIT License 5 votes vote down vote up
def clean_charges(mol):
# this hack should not be needed any more but is kept just in case

    rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',

    fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False)

    for i,fragment in enumerate(fragments):
        for smarts in rxn_smarts:
            patt = Chem.MolFromSmarts(smarts.split(">>")[0])
            while fragment.HasSubstructMatch(patt):
                rxn = AllChem.ReactionFromSmarts(smarts)
                ps = rxn.RunReactants((fragment,))
                fragment = ps[0][0]
        if i == 0:
            mol = fragment
            mol = Chem.CombineMols(mol,fragment)

    return mol 
Example #15
Source File:    From guacamol with MIT License 5 votes vote down vote up
def __init__(self, target: str, inverse=False) -> None:

        :param target: The SMARTS string to match.
        :param inverse: Specifies whether the SMARTS is desired (False) or an antipattern, which we don't want to see
                        in the molecules (inverse=False)
        self.inverse = inverse
        self.smarts = target = Chem.MolFromSmarts(target)

        assert target is not None 
Example #16
Source File:    From oddt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def isrotor(self):
        if self.Bond.IsInRing():
            return False
        rot_mol = Chem.MolFromSmarts(SMARTS_DEF['rot_bond'])
        Chem.GetSSSR(rot_mol)  # MolFromSmarts don't initialize ring info
        rot_bond = rot_mol.GetBondWithIdx(0)
        if self.Bond.Match(rot_bond):
            a1, a2 = self.atoms
            if a1.atomicnum > 1 and a2.atomicnum > 1:
                a1_n = sum(n.atomicnum > 1 for n in a1.neighbors)
                a2_n = sum(n.atomicnum > 1 for n in a2.neighbors)
                if a1_n > 1 and a2_n > 1:
                    return True
        return False 
Example #17
Source File:    From GB-GA with MIT License 5 votes vote down vote up
def change_atom(mol):
  choices = ['#6','#7','#8','#9','#16','#17','#35']
  p = [0.15,0.15,0.14,0.14,0.14,0.14,0.14]
  X = np.random.choice(choices, p=p)
  while not mol.HasSubstructMatch(Chem.MolFromSmarts('['+X+']')):
    X = np.random.choice(choices, p=p)
  Y = np.random.choice(choices, p=p)
  while Y == X:
    Y = np.random.choice(choices, p=p)
  return '[X:1]>>[Y:1]'.replace('X',X).replace('Y',Y) 
Example #18
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def CalculateChiv4c(mol):
    Calculation of valence molecular connectivity chi index for cluster




        Input: mol is a molecule object.

        Output: result is a numeric value
    accum = 0.0
    deltas = [x.GetDegree() for x in mol.GetAtoms()]
    patt = Chem.MolFromSmarts("*~*(~*)(~*)~*")
    HPatt = mol.GetSubstructMatches(patt)
    # print HPatt
    for cluster in HPatt:
        deltas = [_AtomHKDeltas(mol.GetAtomWithIdx(x)) for x in cluster]
        while 0 in deltas:
        if deltas != []:
            deltas1 = numpy.array(deltas, numpy.float)
            accum = accum + 1.0 / numpy.sqrt(
    return accum 
Example #19
Source File:    From GB-GA with MIT License 5 votes vote down vote up
def crossover_ring(parent_A,parent_B):
  ring_smarts = Chem.MolFromSmarts('[R]')
  if not parent_A.HasSubstructMatch(ring_smarts) and not parent_B.HasSubstructMatch(ring_smarts):
    return None
  rxn_smarts1 = ['[*:1]~[1*].[1*]~[*:2]>>[*:1]-[*:2]','[*:1]~[1*].[1*]~[*:2]>>[*:1]=[*:2]']
  rxn_smarts2 = ['([*:1]~[1*].[1*]~[*:2])>>[*:1]-[*:2]','([*:1]~[1*].[1*]~[*:2])>>[*:1]=[*:2]']
  for i in range(10):
    fragments_A = cut_ring(parent_A)
    fragments_B = cut_ring(parent_B)
    #print [Chem.MolToSmiles(x) for x in list(fragments_A)+list(fragments_B)]
    if fragments_A == None or fragments_B == None:
      return None
    new_mol_trial = []
    for rs in rxn_smarts1:
      rxn1 = AllChem.ReactionFromSmarts(rs)
      new_mol_trial = []
      for fa in fragments_A:
        for fb in fragments_B:

    new_mols = []
    for rs in rxn_smarts2:
      rxn2 = AllChem.ReactionFromSmarts(rs)
      for m in new_mol_trial:
        m = m[0]
        if mol_OK(m):
          new_mols += list(rxn2.RunReactants((m,)))
    new_mols2 = []
    for m in new_mols:
      m = m[0]
      if mol_OK(m) and ring_OK(m):
    if len(new_mols2) > 0:
      return random.choice(new_mols2)
  return None 
Example #20
Source File:    From GB-GA with MIT License 5 votes vote down vote up
def ring_OK(mol):
  if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')):
    return True
  ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]'))
  cycle_list = mol.GetRingInfo().AtomRings() 
  max_cycle_length = max([ len(j) for j in cycle_list ])
  macro_cycle = max_cycle_length > 6
  double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]'))
  return not ring_allene and not macro_cycle and not double_bond_in_small_ring 
Example #21
Source File:    From GB-GA with MIT License 5 votes vote down vote up
def cut_ring(mol):
  for i in range(10):
    if random.random() < 0.5:
      if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')): 
      	return None
      bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')))
      bis = ((bis[0],bis[1]),(bis[2],bis[3]),)
      if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')): 
      	return None
      bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')))
      bis = ((bis[0],bis[1]),(bis[1],bis[2]),)
    #print bis
    bs = [mol.GetBondBetweenAtoms(x,y).GetIdx() for x,y in bis]

    fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1),(1,1)])

      fragments = Chem.GetMolFrags(fragments_mol,asMols=True)
      return None

    if len(fragments) == 2:
      return fragments
  return None 
Example #22
Source File:    From GB-GA with MIT License 5 votes vote down vote up
def cut(mol):
  if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): 
  	return None
  bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]'))) #single bond not in ring
  #print bis,bis[0],bis[1]
  bs = [mol.GetBondBetweenAtoms(bis[0],bis[1]).GetIdx()]

  fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1)])

    fragments = Chem.GetMolFrags(fragments_mol,asMols=True)
    return fragments
    return None 
Example #23
Source File:    From rd_filters with MIT License 5 votes vote down vote up
def build_rule_list(self, alert_name_list):
        Read the alerts csv file and select the rule sets defined in alert_name_list
        :param alert_name_list: list of alert sets to use
        self.rule_df = self.rule_df[self.rule_df.rule_set_name.isin(alert_name_list)]
        tmp_rule_list = self.rule_df[["rule_id", "smarts", "max", "description"]].values.tolist()
        for rule_id, smarts, max_val, desc in tmp_rule_list:
            smarts_mol = Chem.MolFromSmarts(smarts)
            if smarts_mol:
                self.rule_list.append([smarts_mol, max_val, desc])
                print(f"Error parsing SMARTS for rule {rule_id}", file=sys.stderr) 
Example #24
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def base(self):
        log.debug("Loading AcidBasePair base: %s",
        return Chem.MolFromSmarts(self.base_str.encode("utf8")) 
Example #25
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def acid(self):
        log.debug("Loading AcidBasePair acid: %s",
        return Chem.MolFromSmarts(self.acid_str.encode("utf8")) 
Example #26
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def smarts(self):
        return Chem.MolFromSmarts(self.smarts_str.encode("utf8")) 
Example #27
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def tautomer(self):
        return Chem.MolFromSmarts(self.tautomer_str.encode("utf8")) 
Example #28
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def InitKeys(keyList, keyDict):
    """ *Internal Use Only*

   generates SMARTS patterns for the keys, run once

    assert len(keyList) == len(keyDict.keys()), "length mismatch"
    for key in keyDict.keys():
        patt, count = keyDict[key]
        if patt != "?":
            sma = Chem.MolFromSmarts(patt)
            if not sma:
                print("SMARTS parser error for key #%d: %s" % (key, patt))
                keyList[key - 1] = sma, count 
Example #29
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _ReadPatts(fileName):

  *Internal Use Only*

  parses the pattern list from the data file
    patts = {}
    order = []
    with open(fileName, "r") as f:
        lines = f.readlines()
    for line in lines:
        if line[0] != "#":
            splitLine = line.split("\t")
            if len(splitLine) >= 4 and splitLine[0] != "":
                sma = splitLine[1]
                if sma != "SMARTS":
                    sma.replace('"', "")
                    p = Chem.MolFromSmarts(sma)
                    if p:
                        cha = string.strip(splitLine[0])
                        if cha not in order:
                        l = patts.get(cha, [])
                        l.append((sma, p))
                        patts[cha] = l
                    print("Problems parsing smarts: %s" % (sma))
    return order, patts

Example #30
Source File:    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def AssignAtomType(mol):
    Assign the atoms in the mol object into each of the PPP type

    according to PPP list definition.

    Note: res is a dict form such as {'A': [2], 'P': [], 'N': [4]}
    res = dict()
    for ppptype in PPP:
        temp = []
        for i in PPP[ppptype]:
            patt = Chem.MolFromSmarts(i)
            atomindex = mol.GetSubstructMatches(patt)
            atomindex = [k[0] for k in atomindex]
        res.update({ppptype: temp})
    temp = ContructLFromGraphSearch(mol)
    res.update({"L": temp})

    return res
