Python rdkit.Chem.AllChem.GetMorganFingerprintAsBitVect() Examples
The following are 30
code examples of rdkit.Chem.AllChem.GetMorganFingerprintAsBitVect().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem.AllChem
, or try the search function
.
Example #1
Source File: properties.py From hgraph2graph with MIT License | 8 votes |
def similarity(a, b): if a is None or b is None: return 0.0 amol = Chem.MolFromSmiles(a) bmol = Chem.MolFromSmiles(b) if amol is None or bmol is None: return 0.0 fp1 = AllChem.GetMorganFingerprintAsBitVect(amol, 2, nBits=2048, useChirality=False) fp2 = AllChem.GetMorganFingerprintAsBitVect(bmol, 2, nBits=2048, useChirality=False) return DataStructs.TanimotoSimilarity(fp1, fp2)
Example #2
Source File: rdkit_grid_featurizer.py From deepchem with MIT License | 7 votes |
def compute_ecfp_features(mol, ecfp_degree=2, ecfp_power=11): """Computes ECFP features for provided rdkit molecule. Parameters: ----------- mol: rdkit molecule Molecule to featurize. ecfp_degree: int ECFP radius ecfp_power: int Number of bits to store ECFP features (2^ecfp_power will be length of ECFP array) Returns: -------- ecfp_array: np.ndarray Returns an array of size 2^ecfp_power where array at index i has a 1 if that ECFP fragment is found in the molecule and array at index j has a 0 if ECFP fragment not in molecule. """ from rdkit.Chem import AllChem bv = AllChem.GetMorganFingerprintAsBitVect( mol, ecfp_degree, nBits=2**ecfp_power) return np.array(bv)
Example #3
Source File: outputfingerprints.py From qsar-tools with Apache License 2.0 | 6 votes |
def calcfingerprint(mol, args): '''Return a list of the bits/cnts of the fingerprint of mol. Uses fingerprint settings from args which should have been the result of a parse of addfpargs''' if args.fp == 'rdkit': fp = Chem.RDKFingerprint(mol,fpSize=args.fpbits) return [int(x) for x in fp.ToBitString()] elif args.fp.startswith('ecfp'): diameter = int(args.fp.replace('ecfp','')) r = diameter/2 fp = Chem.GetMorganFingerprintAsBitVect(mol,r,nBits=args.fpbits) return [int(x) for x in fp.ToBitString()] elif args.fp == 'maccs': fp = MACCSkeys.GenMACCSKeys(mol) return [int(x) for x in fp.ToBitString()] elif args.fp == 'smarts': if args.smartsfile: smarts = args.smartsfile ret = [0]*len(smarts) for (i,smart) in enumerate(smarts): if mol.HasSubstructMatch(smart): ret[i] = 1 return ret else: sys.stderr.write("ERROR: Must provide SMARTS file with --smarts\n") sys.exit(-1) elif args.fp == 'fp2': smi = Chem.MolToSmiles(mol) obmol = pybel.readstring('smi',smi) fp = obmol.calcfp(fptype='FP2') ret = [0]*1021 #FP2 are mod 1021 for setbit in fp.bits: #but pybel makes the bits start at 1 for some reason assert(setbit>0) ret[setbit-1] = 1 return ret else: return []
Example #4
Source File: morgan_fingerprint.py From chemprop with MIT License | 6 votes |
def morgan_fingerprint(smiles: str, radius: int = 2, num_bits: int = 2048, use_counts: bool = False) -> np.ndarray: """ Generates a morgan fingerprint for a smiles string. :param smiles: A smiles string for a molecule. :param radius: The radius of the fingerprint. :param num_bits: The number of bits to use in the fingerprint. :param use_counts: Whether to use counts or just a bit vector for the fingerprint :return: A 1-D numpy array containing the morgan fingerprint. """ if type(smiles) == str: mol = Chem.MolFromSmiles(smiles) else: mol = smiles if use_counts: fp_vect = AllChem.GetHashedMorganFingerprint(mol, radius, nBits=num_bits) else: fp_vect = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=num_bits) fp = np.zeros((1,)) DataStructs.ConvertToNumpyArray(fp_vect, fp) return fp
Example #5
Source File: util.py From DrugEx with MIT License | 6 votes |
def ECFP_from_SMILES(cls, smiles, radius=3, bit_len=4096, scaffold=0, index=None): fps = np.zeros((len(smiles), bit_len)) for i, smile in enumerate(smiles): mol = Chem.MolFromSmiles(smile) arr = np.zeros((1,)) try: if scaffold == 1: mol = MurckoScaffold.GetScaffoldForMol(mol) elif scaffold == 2: mol = MurckoScaffold.MakeScaffoldGeneric(mol) fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=bit_len) DataStructs.ConvertToNumpyArray(fp, arr) fps[i, :] = arr except: print(smile) fps[i, :] = [0] * bit_len return pd.DataFrame(fps, index=(smiles if index is None else index))
Example #6
Source File: feature_utils.py From deep-molecular-massspec with Apache License 2.0 | 6 votes |
def make_circular_fingerprint(mol, circular_fp_key): """Returns circular fingerprint for a mol given its circular_fp_key. Args: mol : rdkit.Mol circular_fp_key : A ms_constants.CircularFingerprintKey object Returns: np.array of len circular_fp_key.fp_len """ # A dictionary to record rdkit functions to base names fp_methods_dict = { fmap_constants.CIRCULAR_FP_BASENAME: AllChem.GetMorganFingerprintAsBitVect, fmap_constants.COUNTING_CIRCULAR_FP_BASENAME: AllChem.GetHashedMorganFingerprint } fp = fp_methods_dict[circular_fp_key.fp_type]( mol, circular_fp_key.radius, nBits=circular_fp_key.fp_len) fp_arr = np.zeros(1) DataStructs.ConvertToNumpyArray(fp, fp_arr) return fp_arr
Example #7
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 6 votes |
def batch_mixed_diversity(smiles, set_smiles): # set smiles rand_smiles = random.sample(set_smiles, 100) rand_mols = [Chem.MolFromSmiles(s) for s in rand_smiles] fps = [Chem.GetMorganFingerprintAsBitVect( m, 4, nBits=2048) for m in rand_mols] # gen smiles rand_gen_smiles = random.sample(smiles, 500) gen_mols = [Chem.MolFromSmiles(s) for s in smiles] fps = [Chem.GetMorganFingerprintAsBitVect( m, 4, nBits=2048) for m in gen_mols] vals = [diversity(s, fps) + diversity(s, fps) if verify_sequence(s) else 0.0 for s in smiles] return vals
Example #8
Source File: policies.py From retrosynthesis_planner with GNU General Public License v3.0 | 6 votes |
def fingerprint_mols(mols, fp_dim): fps = [] for mol in mols: mol = Chem.MolFromSmiles(mol) # Necessary for fingerprinting # Chem.GetSymmSSSR(mol) # "When comparing the ECFP/FCFP fingerprints and # the Morgan fingerprints generated by the RDKit, # remember that the 4 in ECFP4 corresponds to the # diameter of the atom environments considered, # while the Morgan fingerprints take a radius parameter. # So the examples above, with radius=2, are roughly # equivalent to ECFP4 and FCFP4." # <http://www.rdkit.org/docs/GettingStartedInPython.html> fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=int(fp_dim)) # fold_factor = fp.GetNumBits()//fp_dim # fp = DataStructs.FoldFingerprint(fp, fold_factor) fps.append(fp) return fps
Example #9
Source File: chemistry.py From guacamol with MIT License | 6 votes |
def highest_tanimoto_precalc_fps(mol, fps): """ Args: mol: Rdkit molecule fps: precalculated ECFP4 bitvectors Returns: """ if fps is None or len(fps) == 0: return 0 fp1 = AllChem.GetMorganFingerprintAsBitVect(mol, 2, 4096) sims = np.array(DataStructs.BulkTanimotoSimilarity(fp1, fps)) return sims.max()
Example #10
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateECFP2Fingerprint(mol, radius=1): """ ################################################################# Calculate ECFP2 Usage: result=CalculateECFP2Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius) fp = tuple(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=1024)) return fp, res.GetNonzeroElements(), res
Example #11
Source File: stereo.py From hgraph2graph with MIT License | 5 votes |
def similarity(a, b, chiral=False): if a is None or b is None: return 0.0 amol = Chem.MolFromSmiles(a) bmol = Chem.MolFromSmiles(b) if amol is None or bmol is None: return 0.0 fp1 = AllChem.GetMorganFingerprintAsBitVect(amol, 2, nBits=2048, useChirality=chiral) fp2 = AllChem.GetMorganFingerprintAsBitVect(bmol, 2, nBits=2048, useChirality=chiral) return DataStructs.TanimotoSimilarity(fp1, fp2)
Example #12
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateECFP4Fingerprint(mol, radius=2): """ ################################################################# Calculate ECFP4 Usage: result=CalculateECFP4Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius) fp = tuple(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=1024)) return fp, res.GetNonzeroElements(), res
Example #13
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateECFP6Fingerprint(mol, radius=3): """ ################################################################# Calculate ECFP6 Usage: result=CalculateECFP6Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius) fp = tuple(AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=1024)) return fp, res.GetNonzeroElements(), res
Example #14
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateFCFP2Fingerprint(mol, radius=1, nBits=1024): """ ################################################################# Calculate FCFP2 Usage: result=CalculateFCFP2Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius, useFeatures=True) fp = tuple( AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits, useFeatures=True) ) return fp, res.GetNonzeroElements(), res
Example #15
Source File: fingerprint.py From PyBioMed with BSD 3-Clause "New" or "Revised" License | 5 votes |
def CalculateFCFP4Fingerprint(mol, radius=2, nBits=1024): """ ################################################################# Calculate FCFP4 Usage: result=CalculateFCFP4Fingerprint(mol) Input: mol is a molecule object. radius is a radius. Output: result is a tuple form. The first is the vector of fingerprints. The second is a dict form whose keys are the position which this molecule has some substructure. The third is the DataStructs which is used for calculating the similarity. ################################################################# """ res = AllChem.GetMorganFingerprint(mol, radius, useFeatures=True) fp = tuple( AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits, useFeatures=True) ) return fp, res.GetNonzeroElements(), res
Example #16
Source File: utils.py From moses with MIT License | 5 votes |
def fingerprint(smiles_or_mol, fp_type='maccs', dtype=None, morgan__r=2, morgan__n=1024, *args, **kwargs): """ Generates fingerprint for SMILES If smiles is invalid, returns None Returns numpy array of fingerprint bits Parameters: smiles: SMILES string type: type of fingerprint: [MACCS|morgan] dtype: if not None, specifies the dtype of returned array """ fp_type = fp_type.lower() molecule = get_mol(smiles_or_mol, *args, **kwargs) if molecule is None: return None if fp_type == 'maccs': keys = MACCSkeys.GenMACCSKeys(molecule) keys = np.array(keys.GetOnBits()) fingerprint = np.zeros(166, dtype='uint8') if len(keys) != 0: fingerprint[keys - 1] = 1 # We drop 0-th key that is always zero elif fp_type == 'morgan': fingerprint = np.asarray(Morgan(molecule, morgan__r, nBits=morgan__n), dtype='uint8') else: raise ValueError("Unknown fingerprint type {}".format(fp_type)) if dtype is not None: fingerprint = fingerprint.astype(dtype) return fingerprint
Example #17
Source File: chemistry.py From guacamol with MIT License | 5 votes |
def get_fingerprints(mols: Iterable[Chem.Mol], radius=2, length=4096): """ Converts molecules to ECFP bitvectors. Args: mols: RDKit molecules radius: ECFP fingerprint radius length: number of bits Returns: a list of fingerprints """ return [AllChem.GetMorganFingerprintAsBitVect(m, radius, length) for m in mols]
Example #18
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def batch_diversity(smiles, set_smiles): rand_smiles = random.sample(set_smiles, 100) rand_mols = [Chem.MolFromSmiles(s) for s in rand_smiles] fps = [Chem.GetMorganFingerprintAsBitVect( m, 4, nBits=2048) for m in rand_mols] vals = [diversity(s, fps) if verify_sequence(s) else 0.0 for s in smiles] return vals
Example #19
Source File: mol_metrics.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def diversity(smile, fps): val = 0.0 low_rand_dst = 0.9 mean_div_dst = 0.945 ref_mol = Chem.MolFromSmiles(smile) ref_fps = Chem.GetMorganFingerprintAsBitVect(ref_mol, 4, nBits=2048) dist = DataStructs.BulkTanimotoSimilarity( ref_fps, fps, returnDistance=True) mean_dist = np.mean(np.array(dist)) val = remap(mean_dist, low_rand_dst, mean_div_dst) val = np.clip(val, 0.0, 1.0) return val #==============
Example #20
Source File: mol_distance.py From ORGAN with GNU General Public License v2.0 | 5 votes |
def mol_diversity(smiles): df = pd.DataFrame({'smiles': smiles}) PandasTools.AddMoleculeColumnToFrame(df, 'smiles', 'mol') fps = [Chem.GetMorganFingerprintAsBitVect( m, 4, nBits=2048) for m in df['mol']] dist_1d = tanimoto_1d(fps) mean_dist = np.mean(dist_1d) return mean_dist mean_rand = 0.91549 # mean random distance mean_diverse = 0.94170 # mean diverse distance norm_dist = (mean_dist - mean_rand) / (mean_diverse - mean_rand) return norm_dist
Example #21
Source File: predict_enriched.py From PIDGINv3 with GNU General Public License v3.0 | 5 votes |
def calcFingerprints(input,qtype='smiles'): if qtype == 'smiles': m = Chem.MolFromSmiles(input) else: m = input if not m: raise MolFromSmilesError(' None mol in function') if options.preproc: m = preprocessMolecule(m) if not m: raise PreprocessViolation(' Molecule preprocessing violation') fp = AllChem.GetMorganFingerprintAsBitVect(m,2, nBits=2048) binary = fp.ToBitString() if qtype == 'sdf': return Chem.MolToSmiles(m), map(int,list(binary)), fp else: return map(int,list(binary)), fp #calculate fingerprints for chunked array of smiles
Example #22
Source File: predict.py From PIDGINv3 with GNU General Public License v3.0 | 5 votes |
def calcFingerprints(input,qtype='smiles'): if qtype == 'smiles': m = Chem.MolFromSmiles(input) else: m = input if not m: raise MolFromSmilesError(' None mol in function') if options.preproc: m = preprocessMolecule(m) if not m: raise PreprocessViolation(' Molecule preprocessing violation') fp = AllChem.GetMorganFingerprintAsBitVect(m,2, nBits=2048) binary = fp.ToBitString() if qtype == 'sdf': return Chem.MolToSmiles(m), map(int,list(binary)), fp else: return map(int,list(binary)), fp #calculate fingerprints for chunked array of smiles
Example #23
Source File: 2_to_fingerprint.py From mhfp with MIT License | 5 votes |
def convert(subset): target = '/cluster/chembl/chembl.' + str(subset) + '.smi' actives = pd.read_csv(target, sep=' ', usecols=[0], header=None) mh = MHFPEncoder() with open('/cluster/chembl/chembl.' + str(subset) + '.mhfp6', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, mh.encode_mol(mol))) f.write(fp_vals + '\n') with open('/cluster/chembl/chembl.' + str(subset) + '.mhecfp4', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, mh.from_sparse_array([*AllChem.GetMorganFingerprint(mol, 2).GetNonzeroElements()]))) f.write(fp_vals + '\n') with open('/cluster/chembl/chembl.' + str(subset) + '.ecfp4', 'w+') as f: for _, row in actives.iterrows(): mol = AllChem.MolFromSmiles(row[0]) if mol: fp_vals = ','.join(map(str, AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048))) f.write(fp_vals + '\n')
Example #24
Source File: properties.py From iclr19-graph2graph with MIT License | 5 votes |
def similarity(a, b): if a is None or b is None: return 0.0 amol = Chem.MolFromSmiles(a) bmol = Chem.MolFromSmiles(b) if amol is None or bmol is None: return 0.0 fp1 = AllChem.GetMorganFingerprintAsBitVect(amol, 2, nBits=2048, useChirality=False) fp2 = AllChem.GetMorganFingerprintAsBitVect(bmol, 2, nBits=2048, useChirality=False) return DataStructs.TanimotoSimilarity(fp1, fp2)
Example #25
Source File: fingerprinting.py From ASKCOS with Mozilla Public License 2.0 | 5 votes |
def create_rxn_Morgan2FP_separately(rsmi, psmi, rxnfpsize=gc.fingerprint_bits, pfpsize=gc.fingerprint_bits, useFeatures=False, calculate_rfp=True, useChirality=False): # Similar as the above function but takes smiles separately and returns pfp and rfp separately rsmi = rsmi.encode('utf-8') psmi = psmi.encode('utf-8') try: mol = Chem.MolFromSmiles(rsmi) except Exception as e: print(e) return try: fp_bit = AllChem.GetMorganFingerprintAsBitVect( mol=mol, radius=2, nBits=rxnfpsize, useFeatures=useFeatures, useChirality=useChirality) fp = np.empty(rxnfpsize, dtype='float32') DataStructs.ConvertToNumpyArray(fp_bit, fp) except Exception as e: print("Cannot build reactant fp due to {}".format(e)) return rfp = fp try: mol = Chem.MolFromSmiles(psmi) except Exception as e: return try: fp_bit = AllChem.GetMorganFingerprintAsBitVect( mol=mol, radius=2, nBits=pfpsize, useFeatures=useFeatures, useChirality=useChirality) fp = np.empty(pfpsize, dtype='float32') DataStructs.ConvertToNumpyArray(fp_bit, fp) except Exception as e: print("Cannot build product fp due to {}".format(e)) return pfp = fp return [pfp, rfp]
Example #26
Source File: utilsFP.py From CheTo with BSD 3-Clause "New" or "Revised" License | 5 votes |
def getMorganEnvironment(mol, bitInfo, fp=None, minRad=0): """ >>> m = Chem.MolFromSmiles('CC(O)C') >>> bi = {} >>> fp = AllChem.GetMorganFingerprintAsBitVect(m,2,2048,bitInfo=bi) >>> getMorganEnvironment(m,bi) defaultdict(<class 'list'>, {1057: [[], []], 227: [[1]], 709: [[0, 1, 2]], 1: [[]], 283: [[0], [2]], 807: [[]]}) >>> getMorganEnvironment(m,bi,minRad=1) defaultdict(<class 'list'>, {283: [[0], [2]], 227: [[1]], 709: [[0, 1, 2]]}) >>> list(fp.GetOnBits()) [1, 227, 283, 709, 807, 1057] >>> getMorganEnvironment(m,bi,minRad=1,fp=fp) defaultdict(<class 'list'>, {283: [[0], [2]], 227: [[1]], 709: [[0, 1, 2]]}) >>> list(fp.GetOnBits()) [227, 283, 709] """ bitPaths=defaultdict(list) for bit,info in bitInfo.items(): for atomID,radius in info: if radius < minRad: if fp != None: fp[bit]=0 continue env = Chem.FindAtomEnvironmentOfRadiusN(mol,radius,atomID) bitPaths[bit].append(list(env)) return bitPaths
Example #27
Source File: predict_enriched_two_libraries_decision_tree.py From PIDGINv2 with MIT License | 5 votes |
def calcFingerprints(smiles): m1 = Chem.MolFromSmiles(smiles) fp = AllChem.GetMorganFingerprintAsBitVect(m1,2, nBits=2048) binary = fp.ToBitString() return list(binary) #calculate fingerprints for chunked array of smiles
Example #28
Source File: sim_to_train.py From PIDGINv2 with MIT License | 5 votes |
def calcFingerprints(smiles): m1 = Chem.MolFromSmiles(smiles) fp = AllChem.GetMorganFingerprintAsBitVect(m1,2, nBits=2048) return fp #calculate fingerprints for chunked array of smiles
Example #29
Source File: predict_per_comp.py From PIDGINv2 with MIT License | 5 votes |
def calcFingerprints(smiles): m1 = Chem.MolFromSmiles(smiles) fp = AllChem.GetMorganFingerprintAsBitVect(m1,2, nBits=2048) binary = fp.ToBitString() return list(binary) #calculate fingerprints for chunked array of smiles
Example #30
Source File: predict_binary.py From PIDGINv2 with MIT License | 5 votes |
def calcFingerprints(smiles): m1 = Chem.MolFromSmiles(smiles) fp = AllChem.GetMorganFingerprintAsBitVect(m1,2, nBits=2048) binary = fp.ToBitString() return list(binary) #calculate fingerprints for chunked array of smiles