Python rdkit.Chem.MolFromSmarts() Examples

The following are 30 code examples of rdkit.Chem.MolFromSmarts(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rdkit.Chem , or try the search function .
Example #1
Source File: chemistry.py    From guacamol with MIT License 7 votes vote down vote up
def initialise_neutralisation_reactions():
    patts = (
        # Imidazoles
        ('[n+;H]', 'n'),
        # Amines
        ('[N+;!H0]', 'N'),
        # Carboxylic acids and alcohols
        ('[$([O-]);!$([O-][#7])]', 'O'),
        # Thiols
        ('[S-;X1]', 'S'),
        # Sulfonamides
        ('[$([N-;X2]S(=O)=O)]', 'N'),
        # Enamines
        ('[$([N-;X2][C,N]=C)]', 'N'),
        # Tetrazoles
        ('[n-]', '[nH]'),
        # Sulfoxides
        ('[$([S-]=O)]', 'S'),
        # Amides
        ('[$([N-]C=O)]', 'N'),
    )
    return [(Chem.MolFromSmarts(x), Chem.MolFromSmiles(y, False)) for x, y in patts] 
Example #2
Source File: mol_utils.py    From GLN with MIT License 6 votes vote down vote up
def new_mol(self, name):
        if self.sanitized:
            mol = Chem.MolFromSmiles(name)
        else:
            mol = Chem.MolFromSmarts(name)
        if mol is None:            
            return None
        else:
            mg = MolGraph(name, self.sanitized, mol=mol)
            if self.fp_degree > 0:
                bi = {} if self.fp_info else None
                feat = AllChem.GetMorganFingerprint(mol, self.fp_degree, bitInfo=bi, invariants=self._get_inv(mol))
                on_bits = list(feat.GetNonzeroElements().keys())
                mg.fingerprints = on_bits
                mg.fp_info = bi
            return mg 
Example #3
Source File: AtomTypes.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def BuildPatts(rawV=None):
    """ Internal Use Only
  """
    global esPatterns, _rawD
    if rawV is None:
        rawV = _rawD

    esPatterns = [None] * len(rawV)
    for i, (name, sma) in enumerate(rawV):
        try:
            patt = Chem.MolFromSmarts(sma)
        except:
            sys.stderr.write(
                "WARNING: problems with pattern %s (name: %s), skipped.\n" % (sma, name)
            )
        else:
            esPatterns[i] = name, patt 
Example #4
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self):
        log.debug("Initializing MetalDisconnector")
        # Initialize SMARTS to identify relevant substructures
        # TODO: Use atomic numbers instead of element symbols in SMARTS to allow for isotopes?
        self._metal_nof = Chem.MolFromSmarts(
            "[Li,Na,K,Rb,Cs,F,Be,Mg,Ca,Sr,Ba,Ra,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Al,Ga,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In,Sn,Hf,Ta,W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi]~[N,O,F]".encode(
                "utf8"
            )
        )
        self._metal_non = Chem.MolFromSmarts(
            "[Al,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,Hf,Ta,W,Re,Os,Ir,Pt,Au]~[B,C,Si,P,As,Sb,S,Se,Te,Cl,Br,I,At]".encode(
                "utf8"
            )
        )
        self._free_metal = Chem.MolFromSmarts("[Li,Na,K,Mg,CaX0+0]".encode("utf8"))
        self._carboxylic = Chem.MolFromSmarts("[CX3](=O)[OX2H1]".encode("utf8")) 
Example #5
Source File: crossover.py    From guacamol_baselines with MIT License 6 votes vote down vote up
def cut(mol):
    if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')):
        return None

    bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]')))  # single bond not in ring

    bs = [mol.GetBondBetweenAtoms(bis[0], bis[1]).GetIdx()]

    fragments_mol = Chem.FragmentOnBonds(mol, bs, addDummies=True, dummyLabels=[(1, 1)])

    try:
        return Chem.GetMolFrags(fragments_mol, asMols=True, sanitizeFrags=True)
    except ValueError:
        return None

    return None 
Example #6
Source File: crossover.py    From guacamol_baselines with MIT License 6 votes vote down vote up
def ring_OK(mol):
    if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')):
        return True

    ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]'))

    cycle_list = mol.GetRingInfo().AtomRings()
    max_cycle_length = max([len(j) for j in cycle_list])
    macro_cycle = max_cycle_length > 6

    double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]'))

    return not ring_allene and not macro_cycle and not double_bond_in_small_ring


# TODO: set from main? calculate for dataset? 
Example #7
Source File: goal_directed_generation.py    From guacamol_baselines with MIT License 6 votes vote down vote up
def add_atom(rdkit_mol, stats: Stats):
    old_mol = Chem.Mol(rdkit_mol)
    if np.random.random() < 0.63:  # probability of adding ring atom
        rxn_smarts = np.random.choice(stats.rxn_smarts_ring_list, p=stats.p_ring)
        if not rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4,r5]')) \
                or AllChem.CalcNumAliphaticRings(rdkit_mol) == 0:
            rxn_smarts = np.random.choice(stats.rxn_smarts_make_ring, p=stats.p_ring)
            if np.random.random() < 0.036:  # probability of starting a fused ring
                rxn_smarts = rxn_smarts.replace("!", "")
    else:
        if rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')):
            rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]'
        else:
            rxn_smarts = np.random.choice(stats.rxn_smarts_list, p=stats.p)

    rdkit_mol = run_rxn(rxn_smarts, rdkit_mol)
    if valences_not_too_large(rdkit_mol):
        return rdkit_mol
    else:
        return old_mol 
Example #8
Source File: __init__.py    From oddt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _amide_bond(bond):
    a1 = bond.GetBeginAtom()
    a2 = bond.GetEndAtom()
    if (a1.GetAtomicNum() == 6 and a2.GetAtomicNum() == 7 or
            a2.GetAtomicNum() == 6 and a1.GetAtomicNum() == 7):
        # https://github.com/rdkit/rdkit/blob/master/Data/FragmentDescriptors.csv
        patt = Chem.MolFromSmarts('C(=O)-N')
        for m in bond.GetOwningMol().GetSubstructMatches(patt):
            if a1.GetIdx() in m and a2.GetIdx() in m:
                return True
    return False 
Example #9
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        log.debug("Initializing Uncharger")
        #: Neutralizable positive charge (with hydrogens attached)
        self._pos_h = Chem.MolFromSmarts("[+!H0!$(*~[-])]".encode("utf8"))
        #: Non-neutralizable positive charge (no hydrogens attached)
        self._pos_quat = Chem.MolFromSmarts("[+H0!$(*~[-])]".encode("utf8"))
        #: Negative charge, not bonded to a positive charge with no hydrogens
        self._neg = Chem.MolFromSmarts("[-!$(*~[+H0])]".encode("utf8"))
        #: Negative oxygen bonded to [C,P,S]=O, negative aromatic nitrogen?
        self._neg_acid = Chem.MolFromSmarts(
            "[$([O-][C,P,S]=O),$([n-]1nnnc1),$(n1[n-]nnc1)]".encode("utf8")
        ) 
Example #10
Source File: esol.py    From solubility with MIT License 5 votes vote down vote up
def __init__(self):
        self.aromatic_query = Chem.MolFromSmarts("a")
        self.Descriptor = namedtuple("Descriptor", "mw logp rotors ap") 
Example #11
Source File: functional_groups.py    From chemprop with MIT License 5 votes vote down vote up
def __init__(self, args: Namespace):
        self.smarts = []
        with open(args.functional_group_smarts, 'r') as f:
            for line in f:
                self.smarts.append(Chem.MolFromSmarts(line.strip())) 
Example #12
Source File: metric.py    From DrugEx with MIT License 5 votes vote down vote up
def substructure(fname, sub, is_active=False):
    """ Calculating the percentage of molecules that contains the given substructure
    in the given dataset.

    Arguments:
        sub (str): molecular substructure with SMARTS representation.
        is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False)
            if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected.
            (Default: False)

    Returns:
        percentage (float): percentage of molecules (xx.xx%) that contains the given substructure
    """
    sub = Chem.MolFromSmarts(sub)
    df = pd.read_table(fname).drop_duplicates(subset='CANONICAL_SMILES')
    if 'SCORE' in df.columns:
        df = df[df.SCORE > (0.5 if is_active else 0.0)]
    elif 'PCHEMBL_VALUE' in df.columns:
        df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0.0)]
    num = 0
    for smile in df.CANONICAL_SMILES:
        mol = Chem.MolFromSmiles(smile)
        if mol.HasSubstructMatch(sub):
            num += 1
            # print(smile)
    percentage = num * 100 / len(df)
    return percentage 
Example #13
Source File: xyz2mol.py    From BCAI_kaggle_CHAMPS with MIT License 5 votes vote down vote up
def get_proto_mol(atomicNumList):
    mol = Chem.MolFromSmarts("[#"+str(atomicNumList[0])+"]")
    rwMol = Chem.RWMol(mol)
    for i in range(1,len(atomicNumList)):
        a = Chem.Atom(atomicNumList[i])
        rwMol.AddAtom(a)
    
    mol = rwMol.GetMol()

    return mol 
Example #14
Source File: xyz2mol.py    From BCAI_kaggle_CHAMPS with MIT License 5 votes vote down vote up
def clean_charges(mol):
# this hack should not be needed any more but is kept just in case
#

    rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]',
                  '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]',
                  '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]',
                  '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]',
                  '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]',
                  '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]']

    fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False)

    for i,fragment in enumerate(fragments):
        for smarts in rxn_smarts:
            patt = Chem.MolFromSmarts(smarts.split(">>")[0])
            while fragment.HasSubstructMatch(patt):
                rxn = AllChem.ReactionFromSmarts(smarts)
                ps = rxn.RunReactants((fragment,))
                fragment = ps[0][0]
        if i == 0:
            mol = fragment
        else:
            mol = Chem.CombineMols(mol,fragment)

    return mol 
Example #15
Source File: common_scoring_functions.py    From guacamol with MIT License 5 votes vote down vote up
def __init__(self, target: str, inverse=False) -> None:
        """

        :param target: The SMARTS string to match.
        :param inverse: Specifies whether the SMARTS is desired (False) or an antipattern, which we don't want to see
                        in the molecules (inverse=False)
        """
        super().__init__()
        self.inverse = inverse
        self.smarts = target
        self.target = Chem.MolFromSmarts(target)

        assert target is not None 
Example #16
Source File: rdk.py    From oddt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def isrotor(self):
        Chem.GetSSSR(self.Bond.GetOwningMol())
        if self.Bond.IsInRing():
            return False
        rot_mol = Chem.MolFromSmarts(SMARTS_DEF['rot_bond'])
        Chem.GetSSSR(rot_mol)  # MolFromSmarts don't initialize ring info
        rot_bond = rot_mol.GetBondWithIdx(0)
        if self.Bond.Match(rot_bond):
            a1, a2 = self.atoms
            if a1.atomicnum > 1 and a2.atomicnum > 1:
                a1_n = sum(n.atomicnum > 1 for n in a1.neighbors)
                a2_n = sum(n.atomicnum > 1 for n in a2.neighbors)
                if a1_n > 1 and a2_n > 1:
                    return True
        return False 
Example #17
Source File: mutate.py    From GB-GA with MIT License 5 votes vote down vote up
def change_atom(mol):
  choices = ['#6','#7','#8','#9','#16','#17','#35']
  p = [0.15,0.15,0.14,0.14,0.14,0.14,0.14]
  
  X = np.random.choice(choices, p=p)
  while not mol.HasSubstructMatch(Chem.MolFromSmarts('['+X+']')):
    X = np.random.choice(choices, p=p)
  Y = np.random.choice(choices, p=p)
  while Y == X:
    Y = np.random.choice(choices, p=p)
  
  return '[X:1]>>[Y:1]'.replace('X',X).replace('Y',Y) 
Example #18
Source File: connectivity.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def CalculateChiv4c(mol):
    """
    #################################################################
    Calculation of valence molecular connectivity chi index for cluster

    ---->Chiv4c

    Usage:

        result=CalculateChiv4c(mol)

        Input: mol is a molecule object.

        Output: result is a numeric value
    #################################################################
    """
    accum = 0.0
    deltas = [x.GetDegree() for x in mol.GetAtoms()]
    patt = Chem.MolFromSmarts("*~*(~*)(~*)~*")
    HPatt = mol.GetSubstructMatches(patt)
    # print HPatt
    for cluster in HPatt:
        deltas = [_AtomHKDeltas(mol.GetAtomWithIdx(x)) for x in cluster]
        while 0 in deltas:
            deltas.remove(0)
        if deltas != []:
            deltas1 = numpy.array(deltas, numpy.float)
            accum = accum + 1.0 / numpy.sqrt(deltas1.prod())
    return accum 
Example #19
Source File: crossover.py    From GB-GA with MIT License 5 votes vote down vote up
def crossover_ring(parent_A,parent_B):
  ring_smarts = Chem.MolFromSmarts('[R]')
  if not parent_A.HasSubstructMatch(ring_smarts) and not parent_B.HasSubstructMatch(ring_smarts):
    return None
  
  rxn_smarts1 = ['[*:1]~[1*].[1*]~[*:2]>>[*:1]-[*:2]','[*:1]~[1*].[1*]~[*:2]>>[*:1]=[*:2]']
  rxn_smarts2 = ['([*:1]~[1*].[1*]~[*:2])>>[*:1]-[*:2]','([*:1]~[1*].[1*]~[*:2])>>[*:1]=[*:2]']
  for i in range(10):
    fragments_A = cut_ring(parent_A)
    fragments_B = cut_ring(parent_B)
    #print [Chem.MolToSmiles(x) for x in list(fragments_A)+list(fragments_B)]
    if fragments_A == None or fragments_B == None:
      return None
    
    new_mol_trial = []
    for rs in rxn_smarts1:
      rxn1 = AllChem.ReactionFromSmarts(rs)
      new_mol_trial = []
      for fa in fragments_A:
        for fb in fragments_B:
          new_mol_trial.append(rxn1.RunReactants((fa,fb))[0]) 

    new_mols = []
    for rs in rxn_smarts2:
      rxn2 = AllChem.ReactionFromSmarts(rs)
      for m in new_mol_trial:
        m = m[0]
        if mol_OK(m):
          new_mols += list(rxn2.RunReactants((m,)))
    
    new_mols2 = []
    for m in new_mols:
      m = m[0]
      if mol_OK(m) and ring_OK(m):
        new_mols2.append(m)
    
    if len(new_mols2) > 0:
      return random.choice(new_mols2)
    
  return None 
Example #20
Source File: crossover.py    From GB-GA with MIT License 5 votes vote down vote up
def ring_OK(mol):
  if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')):
    return True
  
  ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]'))
  
  cycle_list = mol.GetRingInfo().AtomRings() 
  max_cycle_length = max([ len(j) for j in cycle_list ])
  macro_cycle = max_cycle_length > 6
  
  double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]'))
  
  return not ring_allene and not macro_cycle and not double_bond_in_small_ring 
Example #21
Source File: crossover.py    From GB-GA with MIT License 5 votes vote down vote up
def cut_ring(mol):
  for i in range(10):
    if random.random() < 0.5:
      if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')): 
      	return None
      bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')))
      bis = ((bis[0],bis[1]),(bis[2],bis[3]),)
    else:
      if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')): 
      	return None
      bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')))
      bis = ((bis[0],bis[1]),(bis[1],bis[2]),)
    
    #print bis
    bs = [mol.GetBondBetweenAtoms(x,y).GetIdx() for x,y in bis]

    fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1),(1,1)])

    try:
      fragments = Chem.GetMolFrags(fragments_mol,asMols=True)
    except:
      return None

    if len(fragments) == 2:
      return fragments
    
  return None 
Example #22
Source File: crossover.py    From GB-GA with MIT License 5 votes vote down vote up
def cut(mol):
  if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): 
  	return None
  bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]'))) #single bond not in ring
  #print bis,bis[0],bis[1]
  bs = [mol.GetBondBetweenAtoms(bis[0],bis[1]).GetIdx()]

  fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1)])

  try:
    fragments = Chem.GetMolFrags(fragments_mol,asMols=True)
    return fragments
  except:
    return None 
Example #23
Source File: rd_filters.py    From rd_filters with MIT License 5 votes vote down vote up
def build_rule_list(self, alert_name_list):
        """
        Read the alerts csv file and select the rule sets defined in alert_name_list
        :param alert_name_list: list of alert sets to use
        :return:
        """
        self.rule_df = self.rule_df[self.rule_df.rule_set_name.isin(alert_name_list)]
        tmp_rule_list = self.rule_df[["rule_id", "smarts", "max", "description"]].values.tolist()
        for rule_id, smarts, max_val, desc in tmp_rule_list:
            smarts_mol = Chem.MolFromSmarts(smarts)
            if smarts_mol:
                self.rule_list.append([smarts_mol, max_val, desc])
            else:
                print(f"Error parsing SMARTS for rule {rule_id}", file=sys.stderr) 
Example #24
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def base(self):
        log.debug("Loading AcidBasePair base: %s", self.name)
        return Chem.MolFromSmarts(self.base_str.encode("utf8")) 
Example #25
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def acid(self):
        log.debug("Loading AcidBasePair acid: %s", self.name)
        return Chem.MolFromSmarts(self.acid_str.encode("utf8")) 
Example #26
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def smarts(self):
        return Chem.MolFromSmarts(self.smarts_str.encode("utf8")) 
Example #27
Source File: PyPretreatMolutil.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def tautomer(self):
        return Chem.MolFromSmarts(self.tautomer_str.encode("utf8")) 
Example #28
Source File: PubChemFingerprints.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def InitKeys(keyList, keyDict):
    """ *Internal Use Only*

   generates SMARTS patterns for the keys, run once

  """
    assert len(keyList) == len(keyDict.keys()), "length mismatch"
    for key in keyDict.keys():
        patt, count = keyDict[key]
        if patt != "?":
            sma = Chem.MolFromSmarts(patt)
            if not sma:
                print("SMARTS parser error for key #%d: %s" % (key, patt))
            else:
                keyList[key - 1] = sma, count 
Example #29
Source File: ghosecrippen.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _ReadPatts(fileName):

    """
  #################################################################
  *Internal Use Only*

  parses the pattern list from the data file
  #################################################################
  """
    patts = {}
    order = []
    with open(fileName, "r") as f:
        lines = f.readlines()
    for line in lines:
        if line[0] != "#":
            splitLine = line.split("\t")
            if len(splitLine) >= 4 and splitLine[0] != "":
                sma = splitLine[1]
                if sma != "SMARTS":
                    sma.replace('"', "")
                    p = Chem.MolFromSmarts(sma)
                    if p:
                        cha = string.strip(splitLine[0])
                        if cha not in order:
                            order.append(cha)
                        l = patts.get(cha, [])
                        l.append((sma, p))
                        patts[cha] = l
                else:
                    print("Problems parsing smarts: %s" % (sma))
    return order, patts


########################################################################### 
Example #30
Source File: cats2d.py    From PyBioMed with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def AssignAtomType(mol):
    """
    #################################################################
    Assign the atoms in the mol object into each of the PPP type

    according to PPP list definition.

    Note: res is a dict form such as {'A': [2], 'P': [], 'N': [4]}
    #################################################################
    """
    res = dict()
    for ppptype in PPP:
        temp = []
        for i in PPP[ppptype]:
            patt = Chem.MolFromSmarts(i)
            atomindex = mol.GetSubstructMatches(patt)
            atomindex = [k[0] for k in atomindex]
            temp.extend(atomindex)
        res.update({ppptype: temp})
    temp = ContructLFromGraphSearch(mol)
    temp.extend(res["L"])
    res.update({"L": temp})

    return res


###################################