Python rdkit.DataStructs.FingerprintSimilarity() Examples

The following are 2 code examples of rdkit.DataStructs.FingerprintSimilarity(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rdkit.DataStructs , or try the search function .
Example #1
Source File: preprocess.py    From PADME with MIT License 5 votes vote down vote up
def get_highest_similarity_for_mol(fp, fp_list_to_compare):
  max_sim = 0
  for fp_comp in fp_list_to_compare:
    sim = DataStructs.FingerprintSimilarity(fp, fp_comp)
    if sim == 1.0:
      return sim
    if sim > max_sim:
      max_sim = sim
  return max_sim 
Example #2
Source File: preprocess.py    From PADME with MIT License 5 votes vote down vote up
def get_highest_similarity(input_file, output_file, comparison_file='../full_toxcast/restructured.csv',
  top_compounds_only=True, num_compounds=1500):
  df_avg = pd.read_csv(input_file, header=0, index_col=False)
  if top_compounds_only:
    df_avg = df_avg.head(num_compounds)
  smiles_list = df_avg['smiles']
  avg_scores = df_avg['avg_score']
  # default_mol = Chem.MolFromSmiles('CCCC')
  # default_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(default_mol, 2, nBits=1024)
  # mol2 = Chem.MolFromSmiles('CCCC')
  # fp2 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024)
  # sim = DataStructs.FingerprintSimilarity(default_fp, fp2)
  df_comparison = pd.read_csv(comparison_file, header=0, index_col=False)
  #df_comparison = df_comparison.head(100)
  comparison_smiles_list = df_comparison['smiles']
  comparison_fp_list = []
  similarity_list = []
  for c_smiles in comparison_smiles_list:
    comp_mol = Chem.MolFromSmiles(c_smiles)
    #comp_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(comp_mol, 2, nBits=1024)
    comp_fp = FingerprintMols.FingerprintMol(comp_mol)
    comparison_fp_list.append(comp_fp)

  for i, smiles in enumerate(smiles_list):
    mol_to_test = Chem.MolFromSmiles(smiles)
    #fp_to_test = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol_to_test, 2, nBits=1024)
    fp_to_test = FingerprintMols.FingerprintMol(mol_to_test)
    similarity_list.append(get_highest_similarity_for_mol(fp_to_test, comparison_fp_list))
    if i%500 == 0:
      print(i)

  with open(output_file, 'w', newline='') as csvfile:
    fieldnames = ['smiles', 'avg_score', 'max_similarity']
    writer = csv.DictWriter(csvfile, fieldnames = fieldnames)
    writer.writeheader()
    for i, smiles in enumerate(smiles_list):
      out_line = {'smiles': smiles, 'avg_score': avg_scores[i], 
        'max_similarity': similarity_list[i]}
      writer.writerow(out_line)