Python rdkit.Chem.MolFromSmarts() Examples
The following are 30
code examples of rdkit.Chem.MolFromSmarts().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem
, or try the search function
.
Example #1
Source File: chemistry.py From guacamol with MIT License | 7 votes |
def initialise_neutralisation_reactions(): patts = ( # Imidazoles ('[n+;H]', 'n'), # Amines ('[N+;!H0]', 'N'), # Carboxylic acids and alcohols ('[$([O-]);!$([O-][#7])]', 'O'), # Thiols ('[S-;X1]', 'S'), # Sulfonamides ('[$([N-;X2]S(=O)=O)]', 'N'), # Enamines ('[$([N-;X2][C,N]=C)]', 'N'), # Tetrazoles ('[n-]', '[nH]'), # Sulfoxides ('[$([S-]=O)]', 'S'), # Amides ('[$([N-]C=O)]', 'N'), ) return [(Chem.MolFromSmarts(x), Chem.MolFromSmiles(y, False)) for x, y in patts]
Example #2
Source File: mol_utils.py From GLN with MIT License | 6 votes |
def new_mol(self, name): if self.sanitized: mol = Chem.MolFromSmiles(name) else: mol = Chem.MolFromSmarts(name) if mol is None: return None else: mg = MolGraph(name, self.sanitized, mol=mol) if self.fp_degree > 0: bi = {} if self.fp_info else None feat = AllChem.GetMorganFingerprint(mol, self.fp_degree, bitInfo=bi, invariants=self._get_inv(mol)) on_bits = list(feat.GetNonzeroElements().keys()) mg.fingerprints = on_bits mg.fp_info = bi return mg
Example #3
Source File: AtomTypes.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 6 votes |
def BuildPatts(rawV=None): """ Internal Use Only """ global esPatterns, _rawD if rawV is None: rawV = _rawD esPatterns = [None] * len(rawV) for i, (name, sma) in enumerate(rawV): try: patt = Chem.MolFromSmarts(sma) except: sys.stderr.write( "WARNING: problems with pattern %s (name: %s), skipped.\n" % (sma, name) ) else: esPatterns[i] = name, patt
Example #4
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self): log.debug("Initializing MetalDisconnector") # Initialize SMARTS to identify relevant substructures # TODO: Use atomic numbers instead of element symbols in SMARTS to allow for isotopes? self._metal_nof = Chem.MolFromSmarts( "[Li,Na,K,Rb,Cs,F,Be,Mg,Ca,Sr,Ba,Ra,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Al,Ga,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,In,Sn,Hf,Ta,W,Re,Os,Ir,Pt,Au,Hg,Tl,Pb,Bi]~[N,O,F]".encode( "utf8" ) ) self._metal_non = Chem.MolFromSmarts( "[Al,Sc,Ti,V,Cr,Mn,Fe,Co,Ni,Cu,Zn,Y,Zr,Nb,Mo,Tc,Ru,Rh,Pd,Ag,Cd,Hf,Ta,W,Re,Os,Ir,Pt,Au]~[B,C,Si,P,As,Sb,S,Se,Te,Cl,Br,I,At]".encode( "utf8" ) ) self._free_metal = Chem.MolFromSmarts("[Li,Na,K,Mg,CaX0+0]".encode("utf8")) self._carboxylic = Chem.MolFromSmarts("[CX3](=O)[OX2H1]".encode("utf8"))
Example #5
Source File: crossover.py From guacamol_baselines with MIT License | 6 votes |
def cut(mol): if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): return None bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]'))) # single bond not in ring bs = [mol.GetBondBetweenAtoms(bis[0], bis[1]).GetIdx()] fragments_mol = Chem.FragmentOnBonds(mol, bs, addDummies=True, dummyLabels=[(1, 1)]) try: return Chem.GetMolFrags(fragments_mol, asMols=True, sanitizeFrags=True) except ValueError: return None return None
Example #6
Source File: crossover.py From guacamol_baselines with MIT License | 6 votes |
def ring_OK(mol): if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')): return True ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]')) cycle_list = mol.GetRingInfo().AtomRings() max_cycle_length = max([len(j) for j in cycle_list]) macro_cycle = max_cycle_length > 6 double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]')) return not ring_allene and not macro_cycle and not double_bond_in_small_ring # TODO: set from main? calculate for dataset?
Example #7
Source File: goal_directed_generation.py From guacamol_baselines with MIT License | 6 votes |
def add_atom(rdkit_mol, stats: Stats): old_mol = Chem.Mol(rdkit_mol) if np.random.random() < 0.63: # probability of adding ring atom rxn_smarts = np.random.choice(stats.rxn_smarts_ring_list, p=stats.p_ring) if not rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4,r5]')) \ or AllChem.CalcNumAliphaticRings(rdkit_mol) == 0: rxn_smarts = np.random.choice(stats.rxn_smarts_make_ring, p=stats.p_ring) if np.random.random() < 0.036: # probability of starting a fused ring rxn_smarts = rxn_smarts.replace("!", "") else: if rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')): rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]' else: rxn_smarts = np.random.choice(stats.rxn_smarts_list, p=stats.p) rdkit_mol = run_rxn(rxn_smarts, rdkit_mol) if valences_not_too_large(rdkit_mol): return rdkit_mol else: return old_mol
Example #8
Source File: __init__.py From oddt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _amide_bond(bond): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() if (a1.GetAtomicNum() == 6 and a2.GetAtomicNum() == 7 or a2.GetAtomicNum() == 6 and a1.GetAtomicNum() == 7): # https://github.com/rdkit/rdkit/blob/master/Data/FragmentDescriptors.csv patt = Chem.MolFromSmarts('C(=O)-N') for m in bond.GetOwningMol().GetSubstructMatches(patt): if a1.GetIdx() in m and a2.GetIdx() in m: return True return False
Example #9
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self): log.debug("Initializing Uncharger") #: Neutralizable positive charge (with hydrogens attached) self._pos_h = Chem.MolFromSmarts("[+!H0!$(*~[-])]".encode("utf8")) #: Non-neutralizable positive charge (no hydrogens attached) self._pos_quat = Chem.MolFromSmarts("[+H0!$(*~[-])]".encode("utf8")) #: Negative charge, not bonded to a positive charge with no hydrogens self._neg = Chem.MolFromSmarts("[-!$(*~[+H0])]".encode("utf8")) #: Negative oxygen bonded to [C,P,S]=O, negative aromatic nitrogen? self._neg_acid = Chem.MolFromSmarts( "[$([O-][C,P,S]=O),$([n-]1nnnc1),$(n1[n-]nnc1)]".encode("utf8") )
Example #10
Source File: esol.py From solubility with MIT License | 5 votes |
def __init__(self): self.aromatic_query = Chem.MolFromSmarts("a") self.Descriptor = namedtuple("Descriptor", "mw logp rotors ap")
Example #11
Source File: functional_groups.py From chemprop with MIT License | 5 votes |
def __init__(self, args: Namespace): self.smarts = [] with open(args.functional_group_smarts, 'r') as f: for line in f: self.smarts.append(Chem.MolFromSmarts(line.strip()))
Example #12
Source File: metric.py From DrugEx with MIT License | 5 votes |
def substructure(fname, sub, is_active=False): """ Calculating the percentage of molecules that contains the given substructure in the given dataset. Arguments: sub (str): molecular substructure with SMARTS representation. is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False) if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected. (Default: False) Returns: percentage (float): percentage of molecules (xx.xx%) that contains the given substructure """ sub = Chem.MolFromSmarts(sub) df = pd.read_table(fname).drop_duplicates(subset='CANONICAL_SMILES') if 'SCORE' in df.columns: df = df[df.SCORE > (0.5 if is_active else 0.0)] elif 'PCHEMBL_VALUE' in df.columns: df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0.0)] num = 0 for smile in df.CANONICAL_SMILES: mol = Chem.MolFromSmiles(smile) if mol.HasSubstructMatch(sub): num += 1 # print(smile) percentage = num * 100 / len(df) return percentage
Example #13
Source File: xyz2mol.py From BCAI_kaggle_CHAMPS with MIT License | 5 votes |
def get_proto_mol(atomicNumList): mol = Chem.MolFromSmarts("[#"+str(atomicNumList[0])+"]") rwMol = Chem.RWMol(mol) for i in range(1,len(atomicNumList)): a = Chem.Atom(atomicNumList[i]) rwMol.AddAtom(a) mol = rwMol.GetMol() return mol
Example #14
Source File: xyz2mol.py From BCAI_kaggle_CHAMPS with MIT License | 5 votes |
def clean_charges(mol): # this hack should not be needed any more but is kept just in case # rxn_smarts = ['[N+:1]=[*:2]-[C-:3]>>[N+0:1]-[*:2]=[C-0:3]', '[N+:1]=[*:2]-[O-:3]>>[N+0:1]-[*:2]=[O-0:3]', '[N+:1]=[*:2]-[*:3]=[*:4]-[O-:5]>>[N+0:1]-[*:2]=[*:3]-[*:4]=[O-0:5]', '[#8:1]=[#6:2]([!-:6])[*:3]=[*:4][#6-:5]>>[*-:1][*:2]([*:6])=[*:3][*:4]=[*+0:5]', '[O:1]=[c:2][c-:3]>>[*-:1][*:2][*+0:3]', '[O:1]=[C:2][C-:3]>>[*-:1][*:2]=[*+0:3]'] fragments = Chem.GetMolFrags(mol,asMols=True,sanitizeFrags=False) for i,fragment in enumerate(fragments): for smarts in rxn_smarts: patt = Chem.MolFromSmarts(smarts.split(">>")[0]) while fragment.HasSubstructMatch(patt): rxn = AllChem.ReactionFromSmarts(smarts) ps = rxn.RunReactants((fragment,)) fragment = ps[0][0] if i == 0: mol = fragment else: mol = Chem.CombineMols(mol,fragment) return mol
Example #15
Source File: common_scoring_functions.py From guacamol with MIT License | 5 votes |
def __init__(self, target: str, inverse=False) -> None: """ :param target: The SMARTS string to match. :param inverse: Specifies whether the SMARTS is desired (False) or an antipattern, which we don't want to see in the molecules (inverse=False) """ super().__init__() self.inverse = inverse self.smarts = target self.target = Chem.MolFromSmarts(target) assert target is not None
Example #16
Source File: rdk.py From oddt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def isrotor(self): Chem.GetSSSR(self.Bond.GetOwningMol()) if self.Bond.IsInRing(): return False rot_mol = Chem.MolFromSmarts(SMARTS_DEF['rot_bond']) Chem.GetSSSR(rot_mol) # MolFromSmarts don't initialize ring info rot_bond = rot_mol.GetBondWithIdx(0) if self.Bond.Match(rot_bond): a1, a2 = self.atoms if a1.atomicnum > 1 and a2.atomicnum > 1: a1_n = sum(n.atomicnum > 1 for n in a1.neighbors) a2_n = sum(n.atomicnum > 1 for n in a2.neighbors) if a1_n > 1 and a2_n > 1: return True return False
Example #17
Source File: mutate.py From GB-GA with MIT License | 5 votes |
def change_atom(mol): choices = ['#6','#7','#8','#9','#16','#17','#35'] p = [0.15,0.15,0.14,0.14,0.14,0.14,0.14] X = np.random.choice(choices, p=p) while not mol.HasSubstructMatch(Chem.MolFromSmarts('['+X+']')): X = np.random.choice(choices, p=p) Y = np.random.choice(choices, p=p) while Y == X: Y = np.random.choice(choices, p=p) return '[X:1]>>[Y:1]'.replace('X',X).replace('Y',Y)
Example #18
Source File: connectivity.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateChiv4c(mol): """ ################################################################# Calculation of valence molecular connectivity chi index for cluster ---->Chiv4c Usage: result=CalculateChiv4c(mol) Input: mol is a molecule object. Output: result is a numeric value ################################################################# """ accum = 0.0 deltas = [x.GetDegree() for x in mol.GetAtoms()] patt = Chem.MolFromSmarts("*~*(~*)(~*)~*") HPatt = mol.GetSubstructMatches(patt) # print HPatt for cluster in HPatt: deltas = [_AtomHKDeltas(mol.GetAtomWithIdx(x)) for x in cluster] while 0 in deltas: deltas.remove(0) if deltas != []: deltas1 = numpy.array(deltas, numpy.float) accum = accum + 1.0 / numpy.sqrt(deltas1.prod()) return accum
Example #19
Source File: crossover.py From GB-GA with MIT License | 5 votes |
def crossover_ring(parent_A,parent_B): ring_smarts = Chem.MolFromSmarts('[R]') if not parent_A.HasSubstructMatch(ring_smarts) and not parent_B.HasSubstructMatch(ring_smarts): return None rxn_smarts1 = ['[*:1]~[1*].[1*]~[*:2]>>[*:1]-[*:2]','[*:1]~[1*].[1*]~[*:2]>>[*:1]=[*:2]'] rxn_smarts2 = ['([*:1]~[1*].[1*]~[*:2])>>[*:1]-[*:2]','([*:1]~[1*].[1*]~[*:2])>>[*:1]=[*:2]'] for i in range(10): fragments_A = cut_ring(parent_A) fragments_B = cut_ring(parent_B) #print [Chem.MolToSmiles(x) for x in list(fragments_A)+list(fragments_B)] if fragments_A == None or fragments_B == None: return None new_mol_trial = [] for rs in rxn_smarts1: rxn1 = AllChem.ReactionFromSmarts(rs) new_mol_trial = [] for fa in fragments_A: for fb in fragments_B: new_mol_trial.append(rxn1.RunReactants((fa,fb))[0]) new_mols = [] for rs in rxn_smarts2: rxn2 = AllChem.ReactionFromSmarts(rs) for m in new_mol_trial: m = m[0] if mol_OK(m): new_mols += list(rxn2.RunReactants((m,))) new_mols2 = [] for m in new_mols: m = m[0] if mol_OK(m) and ring_OK(m): new_mols2.append(m) if len(new_mols2) > 0: return random.choice(new_mols2) return None
Example #20
Source File: crossover.py From GB-GA with MIT License | 5 votes |
def ring_OK(mol): if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]')): return True ring_allene = mol.HasSubstructMatch(Chem.MolFromSmarts('[R]=[R]=[R]')) cycle_list = mol.GetRingInfo().AtomRings() max_cycle_length = max([ len(j) for j in cycle_list ]) macro_cycle = max_cycle_length > 6 double_bond_in_small_ring = mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4]=[r3,r4]')) return not ring_allene and not macro_cycle and not double_bond_in_small_ring
Example #21
Source File: crossover.py From GB-GA with MIT License | 5 votes |
def cut_ring(mol): for i in range(10): if random.random() < 0.5: if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R]@[R]@[R]')): return None bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R]@[R]@[R]'))) bis = ((bis[0],bis[1]),(bis[2],bis[3]),) else: if not mol.HasSubstructMatch(Chem.MolFromSmarts('[R]@[R;!D2]@[R]')): return None bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[R]@[R;!D2]@[R]'))) bis = ((bis[0],bis[1]),(bis[1],bis[2]),) #print bis bs = [mol.GetBondBetweenAtoms(x,y).GetIdx() for x,y in bis] fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1),(1,1)]) try: fragments = Chem.GetMolFrags(fragments_mol,asMols=True) except: return None if len(fragments) == 2: return fragments return None
Example #22
Source File: crossover.py From GB-GA with MIT License | 5 votes |
def cut(mol): if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): return None bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]'))) #single bond not in ring #print bis,bis[0],bis[1] bs = [mol.GetBondBetweenAtoms(bis[0],bis[1]).GetIdx()] fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1)]) try: fragments = Chem.GetMolFrags(fragments_mol,asMols=True) return fragments except: return None
Example #23
Source File: rd_filters.py From rd_filters with MIT License | 5 votes |
def build_rule_list(self, alert_name_list): """ Read the alerts csv file and select the rule sets defined in alert_name_list :param alert_name_list: list of alert sets to use :return: """ self.rule_df = self.rule_df[self.rule_df.rule_set_name.isin(alert_name_list)] tmp_rule_list = self.rule_df[["rule_id", "smarts", "max", "description"]].values.tolist() for rule_id, smarts, max_val, desc in tmp_rule_list: smarts_mol = Chem.MolFromSmarts(smarts) if smarts_mol: self.rule_list.append([smarts_mol, max_val, desc]) else: print(f"Error parsing SMARTS for rule {rule_id}", file=sys.stderr)
Example #24
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def base(self): log.debug("Loading AcidBasePair base: %s", self.name) return Chem.MolFromSmarts(self.base_str.encode("utf8"))
Example #25
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def acid(self): log.debug("Loading AcidBasePair acid: %s", self.name) return Chem.MolFromSmarts(self.acid_str.encode("utf8"))
Example #26
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def smarts(self): return Chem.MolFromSmarts(self.smarts_str.encode("utf8"))
Example #27
Source File: PyPretreatMolutil.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def tautomer(self): return Chem.MolFromSmarts(self.tautomer_str.encode("utf8"))
Example #28
Source File: PubChemFingerprints.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def InitKeys(keyList, keyDict): """ *Internal Use Only* generates SMARTS patterns for the keys, run once """ assert len(keyList) == len(keyDict.keys()), "length mismatch" for key in keyDict.keys(): patt, count = keyDict[key] if patt != "?": sma = Chem.MolFromSmarts(patt) if not sma: print("SMARTS parser error for key #%d: %s" % (key, patt)) else: keyList[key - 1] = sma, count
Example #29
Source File: ghosecrippen.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _ReadPatts(fileName): """ ################################################################# *Internal Use Only* parses the pattern list from the data file ################################################################# """ patts = {} order = [] with open(fileName, "r") as f: lines = f.readlines() for line in lines: if line[0] != "#": splitLine = line.split("\t") if len(splitLine) >= 4 and splitLine[0] != "": sma = splitLine[1] if sma != "SMARTS": sma.replace('"', "") p = Chem.MolFromSmarts(sma) if p: cha = string.strip(splitLine[0]) if cha not in order: order.append(cha) l = patts.get(cha, []) l.append((sma, p)) patts[cha] = l else: print("Problems parsing smarts: %s" % (sma)) return order, patts ###########################################################################
Example #30
Source File: cats2d.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def AssignAtomType(mol): """ ################################################################# Assign the atoms in the mol object into each of the PPP type according to PPP list definition. Note: res is a dict form such as {'A': [2], 'P': [], 'N': [4]} ################################################################# """ res = dict() for ppptype in PPP: temp = [] for i in PPP[ppptype]: patt = Chem.MolFromSmarts(i) atomindex = mol.GetSubstructMatches(patt) atomindex = [k[0] for k in atomindex] temp.extend(atomindex) res.update({ppptype: temp}) temp = ContructLFromGraphSearch(mol) temp.extend(res["L"]) res.update({"L": temp}) return res ###################################