Python Examples of rdkit.Chem.MolFromSmarts

Source File: chemistry.py From guacamol with MIT License

7 votes

def initialise_neutralisation_reactions():
    patts = (
        # Imidazoles
        ('[n+;H]', 'n'),
        # Amines
        ('[N+;!H0]', 'N'),
        # Carboxylic acids and alcohols
        ('[$([O-]);!$([O-][#7])]', 'O'),
        # Thiols
        ('[S-;X1]', 'S'),
        # Sulfonamides
        ('[$([N-;X2]S(=O)=O)]', 'N'),
        # Enamines
        ('[$([N-;X2][C,N]=C)]', 'N'),
        # Tetrazoles
        ('[n-]', '[nH]'),
        # Sulfoxides
        ('[$([S-]=O)]', 'S'),
        # Amides
        ('[$([N-]C=O)]', 'N'),
    )
    return [(Chem.MolFromSmarts(x), Chem.MolFromSmiles(y, False)) for x, y in patts]

Source File: mol_utils.py From GLN with MIT License

6 votes

def new_mol(self, name):
        if self.sanitized:
            mol = Chem.MolFromSmiles(name)
        else:
            mol = Chem.MolFromSmarts(name)
        if mol is None:            
            return None
        else:
            mg = MolGraph(name, self.sanitized, mol=mol)
            if self.fp_degree > 0:
                bi = {} if self.fp_info else None
                feat = AllChem.GetMorganFingerprint(mol, self.fp_degree, bitInfo=bi, invariants=self._get_inv(mol))
                on_bits = list(feat.GetNonzeroElements().keys())
                mg.fingerprints = on_bits
                mg.fp_info = bi
            return mg

Source File: AtomTypes.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

6 votes

def BuildPatts(rawV=None):
    """ Internal Use Only
  """
    global esPatterns, _rawD
    if rawV is None:
        rawV = _rawD

    esPatterns = [None] * len(rawV)
    for i, (name, sma) in enumerate(rawV):
        try:
            patt = Chem.MolFromSmarts(sma)
        except:
            sys.stderr.write(
                "WARNING: problems with pattern %s (name: %s), skipped.\n" % (sma, name)
            )
        else:
            esPatterns[i] = name, patt

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

6 votes

def __init__(self):
        log.debug("Initializing MetalDisconnector")
        # Initialize SMARTS to identify relevant substructures
        # TODO: Use atomic numbers instead of element symbols in SMARTS to allow for isotopes?
        self._metal_nof = Chem.MolFromSmarts(
            "[Li,Na,K,Rb,Cs,F,Be,Mg,Ca,Sr,Ba,Ra,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Al,Ga,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In,Sn,Hf,Ta,W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi]~[N,O,F]".encode(
                "utf8"
            )
        )
        self._metal_non = Chem.MolFromSmarts(
            "[Al,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,Hf,Ta,W,Re,Os,Ir,Pt,Au]~[B,C,Si,P,As,Sb,S,Se,Te,Cl,Br,I,At]".encode(
                "utf8"
            )
        )
        self._free_metal = Chem.MolFromSmarts("[Li,Na,K,Mg,CaX0+0]".encode("utf8"))
        self._carboxylic = Chem.MolFromSmarts("[CX3](=O)[OX2H1]".encode("utf8"))

Source File: crossover.py From guacamol_baselines with MIT License

6 votes

def cut(mol):
    if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')):
        return None

    bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]')))  # single bond not in ring

    bs = [mol.GetBondBetweenAtoms(bis[0], bis[1]).GetIdx()]

    fragments_mol = Chem.FragmentOnBonds(mol, bs, addDummies=True, dummyLabels=[(1, 1)])

    try:
        return Chem.GetMolFrags(fragments_mol, asMols=True, sanitizeFrags=True)
    except ValueError:
        return None

    return None

Source File: crossover.py From guacamol_baselines with MIT License

6 votes

def ring_OK(mol):
    if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')):
        return True

    ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]'))

    cycle_list = mol.GetRingInfo().AtomRings()
    max_cycle_length = max([len(j) for j in cycle_list])
    macro_cycle = max_cycle_length > 6

    double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]'))

    return not ring_allene and not macro_cycle and not double_bond_in_small_ring


# TODO: set from main? calculate for dataset?

Source File: goal_directed_generation.py From guacamol_baselines with MIT License

6 votes

def add_atom(rdkit_mol, stats: Stats):
    old_mol = Chem.Mol(rdkit_mol)
    if np.random.random() < 0.63:  # probability of adding ring atom
        rxn_smarts = np.random.choice(stats.rxn_smarts_ring_list, p=stats.p_ring)
        if not rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4,r5]')) \
                or AllChem.CalcNumAliphaticRings(rdkit_mol) == 0:
            rxn_smarts = np.random.choice(stats.rxn_smarts_make_ring, p=stats.p_ring)
            if np.random.random() < 0.036:  # probability of starting a fused ring
                rxn_smarts = rxn_smarts.replace("!", "")
    else:
        if rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')):
            rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]'
        else:
            rxn_smarts = np.random.choice(stats.rxn_smarts_list, p=stats.p)

    rdkit_mol = run_rxn(rxn_smarts, rdkit_mol)
    if valences_not_too_large(rdkit_mol):
        return rdkit_mol
    else:
        return old_mol

Source File: __init__.py From oddt with BSD 3-Clause "New" or "Revised" License

5 votes

def _amide_bond(bond):
    a1 = bond.GetBeginAtom()
    a2 = bond.GetEndAtom()
    if (a1.GetAtomicNum() == 6 and a2.GetAtomicNum() == 7 or
            a2.GetAtomicNum() == 6 and a1.GetAtomicNum() == 7):
        # https://github.com/rdkit/rdkit/blob/master/Data/FragmentDescriptors.csv
        patt = Chem.MolFromSmarts('C(=O)-N')
        for m in bond.GetOwningMol().GetSubstructMatches(patt):
            if a1.GetIdx() in m and a2.GetIdx() in m:
                return True
    return False

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self):
        log.debug("Initializing Uncharger")
        #: Neutralizable positive charge (with hydrogens attached)
        self._pos_h = Chem.MolFromSmarts("[+!H0!$(*~[-])]".encode("utf8"))
        #: Non-neutralizable positive charge (no hydrogens attached)
        self._pos_quat = Chem.MolFromSmarts("[+H0!$(*~[-])]".encode("utf8"))
        #: Negative charge, not bonded to a positive charge with no hydrogens
        self._neg = Chem.MolFromSmarts("[-!$(*~[+H0])]".encode("utf8"))
        #: Negative oxygen bonded to [C,P,S]=O, negative aromatic nitrogen?
        self._neg_acid = Chem.MolFromSmarts(
            "[$([O-][C,P,S]=O),$([n-]1nnnc1),$(n1[n-]nnc1)]".encode("utf8")
        )

Source File: esol.py From solubility with MIT License

5 votes

def __init__(self):
        self.aromatic_query = Chem.MolFromSmarts("a")
        self.Descriptor = namedtuple("Descriptor", "mw logp rotors ap")

Source File: functional_groups.py From chemprop with MIT License

5 votes

def __init__(self, args: Namespace):
        self.smarts = []
        with open(args.functional_group_smarts, 'r') as f:
            for line in f:
                self.smarts.append(Chem.MolFromSmarts(line.strip()))

Source File: metric.py From DrugEx with MIT License

5 votes

def substructure(fname, sub, is_active=False):
    """ Calculating the percentage of molecules that contains the given substructure
    in the given dataset.

    Arguments:
        sub (str): molecular substructure with SMARTS representation.
        is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False)
            if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected.
            (Default: False)

    Returns:
        percentage (float): percentage of molecules (xx.xx%) that contains the given substructure
    """
    sub = Chem.MolFromSmarts(sub)
    df = pd.read_table(fname).drop_duplicates(subset='CANONICAL_SMILES')
    if 'SCORE' in df.columns:
        df = df[df.SCORE > (0.5 if is_active else 0.0)]
    elif 'PCHEMBL_VALUE' in df.columns:
        df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0.0)]
    num = 0
    for smile in df.CANONICAL_SMILES:
        mol = Chem.MolFromSmiles(smile)
        if mol.HasSubstructMatch(sub):
            num += 1
            # print(smile)
    percentage = num * 100 / len(df)
    return percentage

Source File: xyz2mol.py From BCAI_kaggle_CHAMPS with MIT License

5 votes

def get_proto_mol(atomicNumList):
    mol = Chem.MolFromSmarts("[#"+str(atomicNumList[0])+"]")
    rwMol = Chem.RWMol(mol)
    for i in range(1,len(atomicNumList)):
        a = Chem.Atom(atomicNumList[i])
        rwMol.AddAtom(a)
    
    mol = rwMol.GetMol()

    return mol

Source File: xyz2mol.py From BCAI_kaggle_CHAMPS with MIT License

5 votes

def clean_charges(mol):
# this hack should not be needed any more but is kept just in case
#

    rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',
                  '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]',
                  '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]',
                  '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]',
                  '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]',
                  '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]']

    fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False)

    for i,fragment in enumerate(fragments):
        for smarts in rxn_smarts:
            patt = Chem.MolFromSmarts(smarts.split(">>")[0])
            while fragment.HasSubstructMatch(patt):
                rxn = AllChem.ReactionFromSmarts(smarts)
                ps = rxn.RunReactants((fragment,))
                fragment = ps[0][0]
        if i == 0:
            mol = fragment
        else:
            mol = Chem.CombineMols(mol,fragment)

    return mol

Source File: common_scoring_functions.py From guacamol with MIT License

5 votes

def __init__(self, target: str, inverse=False) -> None:
        """

        :param target: The SMARTS string to match.
        :param inverse: Specifies whether the SMARTS is desired (False) or an antipattern, which we don't want to see
                        in the molecules (inverse=False)
        """
        super().__init__()
        self.inverse = inverse
        self.smarts = target
        self.target = Chem.MolFromSmarts(target)

        assert target is not None

Source File: rdk.py From oddt with BSD 3-Clause "New" or "Revised" License

5 votes

def isrotor(self):
        Chem.GetSSSR(self.Bond.GetOwningMol())
        if self.Bond.IsInRing():
            return False
        rot_mol = Chem.MolFromSmarts(SMARTS_DEF['rot_bond'])
        Chem.GetSSSR(rot_mol)  # MolFromSmarts don't initialize ring info
        rot_bond = rot_mol.GetBondWithIdx(0)
        if self.Bond.Match(rot_bond):
            a1, a2 = self.atoms
            if a1.atomicnum > 1 and a2.atomicnum > 1:
                a1_n = sum(n.atomicnum > 1 for n in a1.neighbors)
                a2_n = sum(n.atomicnum > 1 for n in a2.neighbors)
                if a1_n > 1 and a2_n > 1:
                    return True
        return False

Source File: mutate.py From GB-GA with MIT License

5 votes

def change_atom(mol):
  choices = ['#6','#7','#8','#9','#16','#17','#35']
  p = [0.15,0.15,0.14,0.14,0.14,0.14,0.14]
  
  X = np.random.choice(choices, p=p)
  while not mol.HasSubstructMatch(Chem.MolFromSmarts('['+X+']')):
    X = np.random.choice(choices, p=p)
  Y = np.random.choice(choices, p=p)
  while Y == X:
    Y = np.random.choice(choices, p=p)
  
  return '[X:1]>>[Y:1]'.replace('X',X).replace('Y',Y)

Source File: connectivity.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def CalculateChiv4c(mol):
    """
    #################################################################
    Calculation of valence molecular connectivity chi index for cluster

    ---->Chiv4c

    Usage:

        result=CalculateChiv4c(mol)

        Input: mol is a molecule object.

        Output: result is a numeric value
    #################################################################
    """
    accum = 0.0
    deltas = [x.GetDegree() for x in mol.GetAtoms()]
    patt = Chem.MolFromSmarts("*~*(~*)(~*)~*")
    HPatt = mol.GetSubstructMatches(patt)
    # print HPatt
    for cluster in HPatt:
        deltas = [_AtomHKDeltas(mol.GetAtomWithIdx(x)) for x in cluster]
        while 0 in deltas:
            deltas.remove(0)
        if deltas != []:
            deltas1 = numpy.array(deltas, numpy.float)
            accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
    return accum

Source File: crossover.py From GB-GA with MIT License

5 votes

def crossover_ring(parent_A,parent_B):
  ring_smarts = Chem.MolFromSmarts('[R]')
  if not parent_A.HasSubstructMatch(ring_smarts) and not parent_B.HasSubstructMatch(ring_smarts):
    return None
  
  rxn_smarts1 = ['[*:1]~[1*].[1*]~[*:2]>>[*:1]-[*:2]','[*:1]~[1*].[1*]~[*:2]>>[*:1]=[*:2]']
  rxn_smarts2 = ['([*:1]~[1*].[1*]~[*:2])>>[*:1]-[*:2]','([*:1]~[1*].[1*]~[*:2])>>[*:1]=[*:2]']
  for i in range(10):
    fragments_A = cut_ring(parent_A)
    fragments_B = cut_ring(parent_B)
    #print [Chem.MolToSmiles(x) for x in list(fragments_A)+list(fragments_B)]
    if fragments_A == None or fragments_B == None:
      return None
    
    new_mol_trial = []
    for rs in rxn_smarts1:
      rxn1 = AllChem.ReactionFromSmarts(rs)
      new_mol_trial = []
      for fa in fragments_A:
        for fb in fragments_B:
          new_mol_trial.append(rxn1.RunReactants((fa,fb))[0]) 

    new_mols = []
    for rs in rxn_smarts2:
      rxn2 = AllChem.ReactionFromSmarts(rs)
      for m in new_mol_trial:
        m = m[0]
        if mol_OK(m):
          new_mols += list(rxn2.RunReactants((m,)))
    
    new_mols2 = []
    for m in new_mols:
      m = m[0]
      if mol_OK(m) and ring_OK(m):
        new_mols2.append(m)
    
    if len(new_mols2) > 0:
      return random.choice(new_mols2)
    
  return None

Source File: crossover.py From GB-GA with MIT License

5 votes

def ring_OK(mol):
  if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')):
    return True
  
  ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]'))
  
  cycle_list = mol.GetRingInfo().AtomRings() 
  max_cycle_length = max([ len(j) for j in cycle_list ])
  macro_cycle = max_cycle_length > 6
  
  double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]'))
  
  return not ring_allene and not macro_cycle and not double_bond_in_small_ring

Source File: crossover.py From GB-GA with MIT License

5 votes

def cut_ring(mol):
  for i in range(10):
    if random.random() < 0.5:
      if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')): 
      	return None
      bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')))
      bis = ((bis[0],bis[1]),(bis[2],bis[3]),)
    else:
      if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')): 
      	return None
      bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')))
      bis = ((bis[0],bis[1]),(bis[1],bis[2]),)
    
    #print bis
    bs = [mol.GetBondBetweenAtoms(x,y).GetIdx() for x,y in bis]

    fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1),(1,1)])

    try:
      fragments = Chem.GetMolFrags(fragments_mol,asMols=True)
    except:
      return None

    if len(fragments) == 2:
      return fragments
    
  return None

Source File: crossover.py From GB-GA with MIT License

5 votes

def cut(mol):
  if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): 
  	return None
  bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]'))) #single bond not in ring
  #print bis,bis[0],bis[1]
  bs = [mol.GetBondBetweenAtoms(bis[0],bis[1]).GetIdx()]

  fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1)])

  try:
    fragments = Chem.GetMolFrags(fragments_mol,asMols=True)
    return fragments
  except:
    return None

Source File: rd_filters.py From rd_filters with MIT License

5 votes

def build_rule_list(self, alert_name_list):
        """
        Read the alerts csv file and select the rule sets defined in alert_name_list
        :param alert_name_list: list of alert sets to use
        :return:
        """
        self.rule_df = self.rule_df[self.rule_df.rule_set_name.isin(alert_name_list)]
        tmp_rule_list = self.rule_df[["rule_id", "smarts", "max", "description"]].values.tolist()
        for rule_id, smarts, max_val, desc in tmp_rule_list:
            smarts_mol = Chem.MolFromSmarts(smarts)
            if smarts_mol:
                self.rule_list.append([smarts_mol, max_val, desc])
            else:
                print(f"Error parsing SMARTS for rule {rule_id}", file=sys.stderr)

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def base(self):
        log.debug("Loading AcidBasePair base: %s", self.name)
        return Chem.MolFromSmarts(self.base_str.encode("utf8"))

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def acid(self):
        log.debug("Loading AcidBasePair acid: %s", self.name)
        return Chem.MolFromSmarts(self.acid_str.encode("utf8"))

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def smarts(self):
        return Chem.MolFromSmarts(self.smarts_str.encode("utf8"))

Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def tautomer(self):
        return Chem.MolFromSmarts(self.tautomer_str.encode("utf8"))

Source File: PubChemFingerprints.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def InitKeys(keyList, keyDict):
    """ *Internal Use Only*

   generates SMARTS patterns for the keys, run once

  """
    assert len(keyList) == len(keyDict.keys()), "length mismatch"
    for key in keyDict.keys():
        patt, count = keyDict[key]
        if patt != "?":
            sma = Chem.MolFromSmarts(patt)
            if not sma:
                print("SMARTS parser error for key #%d: %s" % (key, patt))
            else:
                keyList[key - 1] = sma, count

Source File: ghosecrippen.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def _ReadPatts(fileName):

    """
  #################################################################
  *Internal Use Only*

  parses the pattern list from the data file
  #################################################################
  """
    patts = {}
    order = []
    with open(fileName, "r") as f:
        lines = f.readlines()
    for line in lines:
        if line[0] != "#":
            splitLine = line.split("\t")
            if len(splitLine) >= 4 and splitLine[0] != "":
                sma = splitLine[1]
                if sma != "SMARTS":
                    sma.replace('"', "")
                    p = Chem.MolFromSmarts(sma)
                    if p:
                        cha = string.strip(splitLine[0])
                        if cha not in order:
                            order.append(cha)
                        l = patts.get(cha, [])
                        l.append((sma, p))
                        patts[cha] = l
                else:
                    print("Problems parsing smarts: %s" % (sma))
    return order, patts


###########################################################################

Source File: cats2d.py From PyBioMed with BSD 3-Clause "New" or "Revised" License

5 votes

def AssignAtomType(mol):
    """
    #################################################################
    Assign the atoms in the mol object into each of the PPP type

    according to PPP list definition.

    Note: res is a dict form such as {'A': [2], 'P': [], 'N': [4]}
    #################################################################
    """
    res = dict()
    for ppptype in PPP:
        temp = []
        for i in PPP[ppptype]:
            patt = Chem.MolFromSmarts(i)
            atomindex = mol.GetSubstructMatches(patt)
            atomindex = [k[0] for k in atomindex]
            temp.extend(atomindex)
        res.update({ppptype: temp})
    temp = ContructLFromGraphSearch(mol)
    temp.extend(res["L"])
    res.update({"L": temp})

    return res


###################################

Python rdkit.Chem.MolFromSmarts() Examples