Python rdkit.DataStructs.FingerprintSimilarity() Examples
The following are 2
code examples of rdkit.DataStructs.FingerprintSimilarity().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rdkit.DataStructs
, or try the search function
.
Example #1
Source File: preprocess.py From PADME with MIT License | 5 votes |
def get_highest_similarity_for_mol(fp, fp_list_to_compare): max_sim = 0 for fp_comp in fp_list_to_compare: sim = DataStructs.FingerprintSimilarity(fp, fp_comp) if sim == 1.0: return sim if sim > max_sim: max_sim = sim return max_sim
Example #2
Source File: preprocess.py From PADME with MIT License | 5 votes |
def get_highest_similarity(input_file, output_file, comparison_file='../full_toxcast/restructured.csv', top_compounds_only=True, num_compounds=1500): df_avg = pd.read_csv(input_file, header=0, index_col=False) if top_compounds_only: df_avg = df_avg.head(num_compounds) smiles_list = df_avg['smiles'] avg_scores = df_avg['avg_score'] # default_mol = Chem.MolFromSmiles('CCCC') # default_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(default_mol, 2, nBits=1024) # mol2 = Chem.MolFromSmiles('CCCC') # fp2 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024) # sim = DataStructs.FingerprintSimilarity(default_fp, fp2) df_comparison = pd.read_csv(comparison_file, header=0, index_col=False) #df_comparison = df_comparison.head(100) comparison_smiles_list = df_comparison['smiles'] comparison_fp_list = [] similarity_list = [] for c_smiles in comparison_smiles_list: comp_mol = Chem.MolFromSmiles(c_smiles) #comp_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(comp_mol, 2, nBits=1024) comp_fp = FingerprintMols.FingerprintMol(comp_mol) comparison_fp_list.append(comp_fp) for i, smiles in enumerate(smiles_list): mol_to_test = Chem.MolFromSmiles(smiles) #fp_to_test = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol_to_test, 2, nBits=1024) fp_to_test = FingerprintMols.FingerprintMol(mol_to_test) similarity_list.append(get_highest_similarity_for_mol(fp_to_test, comparison_fp_list)) if i%500 == 0: print(i) with open(output_file, 'w', newline='') as csvfile: fieldnames = ['smiles', 'avg_score', 'max_similarity'] writer = csv.DictWriter(csvfile, fieldnames = fieldnames) writer.writeheader() for i, smiles in enumerate(smiles_list): out_line = {'smiles': smiles, 'avg_score': avg_scores[i], 'max_similarity': similarity_list[i]} writer.writerow(out_line)