Python rdkit.Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect() Examples
The following are 10
code examples of rdkit.Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.Chem.rdMolDescriptors
, or try the search function
.
Example #1
Source File: molecule.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License | 6 votes |
def reward_target_molecule_similarity(mol, target, radius=2, nBits=2048, useChirality=True): """ Reward for a target molecule similarity, based on tanimoto similarity between the ECFP fingerprints of the x molecule and target molecule :param mol: rdkit mol object :param target: rdkit mol object :return: float, [0.0, 1.0] """ x = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=nBits, useChirality=useChirality) target = rdMolDescriptors.GetMorganFingerprintAsBitVect(target, radius=radius, nBits=nBits, useChirality=useChirality) return DataStructs.TanimotoSimilarity(x, target) ### TERMINAL VALUE REWARDS ###
Example #2
Source File: fingerprints.py From deepchem with MIT License | 5 votes |
def _featurize(self, mol): """ Calculate circular fingerprint. Parameters ---------- mol : RDKit Mol Molecule. """ from rdkit import Chem from rdkit.Chem import rdMolDescriptors if self.sparse: info = {} fp = rdMolDescriptors.GetMorganFingerprint( mol, self.radius, useChirality=self.chiral, useBondTypes=self.bonds, useFeatures=self.features, bitInfo=info) fp = fp.GetNonzeroElements() # convert to a dict # generate SMILES for fragments if self.smiles: fp_smiles = {} for fragment_id, count in fp.items(): root, radius = info[fragment_id][0] env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, root) frag = Chem.PathToSubmol(mol, env) smiles = Chem.MolToSmiles(frag) fp_smiles[fragment_id] = {'smiles': smiles, 'count': count} fp = fp_smiles else: fp = rdMolDescriptors.GetMorganFingerprintAsBitVect( mol, self.radius, nBits=self.size, useChirality=self.chiral, useBondTypes=self.bonds, useFeatures=self.features) return fp
Example #3
Source File: fingerprints.py From PADME with MIT License | 5 votes |
def __init__(self, mol, radius, nBits=2048, useChirality=False, useBondTypes=True, useFeatures=False, smiles=None): self.fingerprint = rdMolDescriptors.GetMorganFingerprintAsBitVect( mol, radius, nBits=nBits, useChirality=useChirality, useBondTypes=useBondTypes, useFeatures=useFeatures) self.fingerprint_array = np.asarray(self.fingerprint) self.smiles = smiles
Example #4
Source File: preprocess.py From PADME with MIT License | 5 votes |
def get_highest_similarity(input_file, output_file, comparison_file='../full_toxcast/restructured.csv', top_compounds_only=True, num_compounds=1500): df_avg = pd.read_csv(input_file, header=0, index_col=False) if top_compounds_only: df_avg = df_avg.head(num_compounds) smiles_list = df_avg['smiles'] avg_scores = df_avg['avg_score'] # default_mol = Chem.MolFromSmiles('CCCC') # default_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(default_mol, 2, nBits=1024) # mol2 = Chem.MolFromSmiles('CCCC') # fp2 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024) # sim = DataStructs.FingerprintSimilarity(default_fp, fp2) df_comparison = pd.read_csv(comparison_file, header=0, index_col=False) #df_comparison = df_comparison.head(100) comparison_smiles_list = df_comparison['smiles'] comparison_fp_list = [] similarity_list = [] for c_smiles in comparison_smiles_list: comp_mol = Chem.MolFromSmiles(c_smiles) #comp_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(comp_mol, 2, nBits=1024) comp_fp = FingerprintMols.FingerprintMol(comp_mol) comparison_fp_list.append(comp_fp) for i, smiles in enumerate(smiles_list): mol_to_test = Chem.MolFromSmiles(smiles) #fp_to_test = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol_to_test, 2, nBits=1024) fp_to_test = FingerprintMols.FingerprintMol(mol_to_test) similarity_list.append(get_highest_similarity_for_mol(fp_to_test, comparison_fp_list)) if i%500 == 0: print(i) with open(output_file, 'w', newline='') as csvfile: fieldnames = ['smiles', 'avg_score', 'max_similarity'] writer = csv.DictWriter(csvfile, fieldnames = fieldnames) writer.writeheader() for i, smiles in enumerate(smiles_list): out_line = {'smiles': smiles, 'avg_score': avg_scores[i], 'max_similarity': similarity_list[i]} writer.writerow(out_line)
Example #5
Source File: ecfp_preprocessor.py From chainer-chemistry with MIT License | 5 votes |
def get_input_features(self, mol): try: fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, self.radius) except Exception as e: logger = getLogger(__name__) logger.debug('exception caught at ECFPPreprocessor:', e) # Extracting feature failed raise MolFeatureExtractionError # TODO(Nakago): Test it. return numpy.asarray(fp, numpy.float32)
Example #6
Source File: organic_lorentz_lorenz.py From chemml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __represent(self, smiles): # The descriptor must be a binary Morgan fingerprint with radius 2 and 1024 bits. mol = Chem.MolFromSmiles(smiles.strip()) if mol is None: msg = '%s is not a valid SMILES representation'%smiles raise ValueError(msg) else: return np.array(GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024))
Example #7
Source File: fingerprint.py From XenonPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def featurize(self, x): if self.input_type == 'smiles': x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('can not convert Mol from SMILES %s' % x_) if self.input_type == 'any': if not isinstance(x, Chem.rdchem.Mol): x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('can not convert Mol from SMILES %s' % x_) return list(rdMol.GetMorganFingerprintAsBitVect(x, self.radius, nBits=self.n_bits, useFeatures=True))
Example #8
Source File: fingerprint.py From XenonPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def featurize(self, x): if self.input_type == 'smiles': x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('can not convert Mol from SMILES %s' % x_) if self.input_type == 'any': if not isinstance(x, Chem.rdchem.Mol): x_ = x x = Chem.MolFromSmiles(x) if x is None: raise ValueError('can not convert Mol from SMILES %s' % x_) return list(rdMol.GetMorganFingerprintAsBitVect(x, self.radius, nBits=self.n_bits))
Example #9
Source File: splitters.py From PADME with MIT License | 4 votes |
def split(self, dataset, seed=None, frac_train=.8, frac_valid=.1, frac_test=.1, log_every_n=None): """ Splits internal compounds randomly into train/validation/test. """ np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.) if seed is None: seed = random.randint(0, 2**30) np.random.seed(seed) num_datapoints = len(dataset) train_cutoff = int(frac_train * num_datapoints) valid_cutoff = int((frac_train + frac_valid) * num_datapoints) num_train = train_cutoff num_valid = valid_cutoff - train_cutoff num_test = num_datapoints - valid_cutoff all_mols = [] for ind, smiles in enumerate(dataset.ids): all_mols.append(Chem.MolFromSmiles(smiles)) fps = [AllChem.GetMorganFingerprintAsBitVect(x, 2, 1024) for x in all_mols] def distance(i, j): return 1 - DataStructs.DiceSimilarity(fps[i], fps[j]) picker = MaxMinPicker() testIndices = picker.LazyPick( distFunc=distance, poolSize=num_datapoints, pickSize=num_test, seed=seed) validTestIndices = picker.LazyPick( distFunc=distance, poolSize=num_datapoints, pickSize=num_valid + num_test, firstPicks=testIndices, seed=seed) allSet = set(range(num_datapoints)) testSet = set(testIndices) validSet = set(validTestIndices) - testSet trainSet = allSet - testSet - validSet assert len(testSet & validSet) == 0 assert len(testSet & trainSet) == 0 assert len(validSet & trainSet) == 0 assert (validSet | trainSet | testSet) == allSet return sorted(list(trainSet)), sorted(list(validSet)), sorted(list(testSet))
Example #10
Source File: splitters.py From PADME with MIT License | 4 votes |
def split(self, dataset, frac_train=None, frac_valid=None, frac_test=None, log_every_n=1000, cutoff=0.18): """ Splits internal compounds into train and validation based on the butina clustering algorithm. This splitting algorithm has an O(N^2) run time, where N is the number of elements in the dataset. The dataset is expected to be a classification dataset. This algorithm is designed to generate validation data that are novel chemotypes. Note that this function entirely disregards the ratios for frac_train, frac_valid, and frac_test. Furthermore, it does not generate a test set, only a train and valid set. Setting a small cutoff value will generate smaller, finer clusters of high similarity, whereas setting a large cutoff value will generate larger, coarser clusters of low similarity. """ print("Performing butina clustering with cutoff of", cutoff) mols = [] for ind, smiles in enumerate(dataset.ids): mols.append(Chem.MolFromSmiles(smiles)) n_mols = len(mols) fps = [AllChem.GetMorganFingerprintAsBitVect(x, 2, 1024) for x in mols] scaffold_sets = ClusterFps(fps, cutoff=cutoff) scaffold_sets = sorted(scaffold_sets, key=lambda x: -len(x)) ys = dataset.y valid_inds = [] for c_idx, cluster in enumerate(scaffold_sets): # for m_idx in cluster: valid_inds.extend(cluster) # continue until we find an active in all the tasks, otherwise we can't # compute a meaningful AUC # TODO (ytz): really, we want at least one active and inactive in both scenarios. # TODO (Ytz): for regression tasks we'd stop after only one cluster. active_populations = np.sum(ys[valid_inds], axis=0) if np.all(active_populations): print("# of actives per task in valid:", active_populations) print("Total # of validation points:", len(valid_inds)) break train_inds = list(itertools.chain.from_iterable(scaffold_sets[c_idx + 1:])) test_inds = [] return train_inds, valid_inds, []