Python Examples of rdkit.Chem.SDWriter

Source File: converter.py From 3DGCN with MIT License

7 votes

def rotate_molecule(path, target_path, count=10):
    # Load dataset
    mols = Chem.SDMolSupplier(path)
    rotated_mols = []

    print("Loaded {} Molecules from {}".format(len(mols), path))

    print("Rotating Molecules...")
    for mol in mols:
        for _ in range(count):
            for atom in mol.GetAtoms():
                atom_idx = atom.GetIdx()

                pos = list(mol.GetConformer().GetAtomPosition(atom_idx))
                pos_rotated = np.matmul(random_rotation_matrix(), pos)

                mol.GetConformer().SetAtomPosition(atom_idx, pos_rotated)

            rotated_mols.append(mol)

    w = Chem.SDWriter(target_path)
    for m in rotated_mols:
        if m is not None:
            w.write(m)
    print("Saved {} Molecules to {}".format(len(rotated_mols), target_path))

Source File: operations.py From ScaffoldGraph with MIT License

6 votes

def __init__(self, args):

        self.args = args
        self.inputs = args.input

        if args.sdf:
            rdlogger.setLevel(4)
            self.output = SDWriter(args.output)
        else:
            self.output = open(args.output, 'w')

        self.mol_map = open(args.map_mols, 'w') if args.map_mols else None
        if self.mol_map:
            self.mol_map.write('MOLECULE_ID\tSCAFFOLD_ID\n')

        self.ann_map = open(args.map_annotations, 'w') if args.map_annotations else None
        if self.ann_map:
            self.ann_map.write('SCAFFOLD_ID\tANNOTATIONS\n')

        self.current_id = 0
        self.duplicates = 0
        self.table = {}

Source File: sdf.py From ScaffoldGraph with MIT License

6 votes

def write_sdf_file(scaffold_graph, output_file):
    """Write an SDF file from a scaffoldgraph

    Parameters
    ----------
    scaffold_graph (sg.ScaffoldGraph): graph to be converted
    output_file (str): path to output file
    """

    N = scaffold_graph.num_scaffold_nodes
    sorted_scaffolds = sorted(scaffold_graph.get_scaffold_nodes(data=True), key=lambda x: x[1]['hierarchy'])
    mapping = dict(zip([s[0] for s in sorted_scaffolds], range(0, N)))
    writer = SDWriter(output_file)
    for scaffold, data in sorted_scaffolds:
        molecule = MolFromSmiles(scaffold)
        if molecule is not None:
            subscaffolds = list(scaffold_graph.predecessors(scaffold))
            molecule.SetProp('_Name', mapping[scaffold])
            molecule.SetIntProp('HIERARCHY', scaffold_graph.nodes[scaffold]['HIERARCHY'])
            molecule.SetProp('SMILES', scaffold)
            molecule.SetProp('SUBSCAFFOLDS', ', '.join([str(mapping[s]) for s in subscaffolds]))
            writer.write(molecule)
    writer.close()

Source File: rdk.py From oddt with BSD 3-Clause "New" or "Revised" License

6 votes

def __init__(self, format, filename, overwrite=False):
        self.format = format
        self.filename = filename
        if not overwrite and os.path.isfile(self.filename):
            raise IOError("%s already exists. Use 'overwrite=True' to overwrite it." % self.filename)
        if format == "sdf":
            self._writer = Chem.SDWriter(self.filename)
        elif format == "smi":
            self._writer = Chem.SmilesWriter(self.filename, isomericSmiles=True, includeHeader=False)
        elif format in ('inchi', 'inchikey') and Chem.INCHI_AVAILABLE:
            self._writer = open(filename, 'w')
        elif format in ('mol2', 'pdbqt'):
            self._writer = gzip.open(filename, 'w') if filename.split('.')[-1] == 'gz' else open(filename, 'w')
        elif format == "pdb":
            self._writer = Chem.PDBWriter(self.filename)
        else:
            raise ValueError("%s is not a recognised RDKit format" % format)
        self.total = 0  # The total number of molecules written to the file

Source File: dataset.py From 3DGCN with MIT License

5 votes

def save_dataset(self, path, pred=None, target="test", filename=None):
        mols = []
        for idx, (x, c, y) in enumerate(zip(self.x[target], self.c[target], self.y[target])):
            x.SetProp("true", str(y * self.std + self.mean))
            if pred is not None:
                x.SetProp("pred", str(pred[idx][0] * self.std + self.mean))
            mols.append(x)

        if filename is not None:
            w = Chem.SDWriter(path + filename + ".sdf")
        else:
            w = Chem.SDWriter(path + target + ".sdf")
        for mol in mols:
            if mol is not None:
                w.write(mol)

Source File: scatter_plot.py From 3DGCN with MIT License

5 votes

def find_confusion(dataset, base_path):
    for i in range(1, 11):
        path = base_path + "trial_{}/".format(i)

        # Load true, pred value
        true_y, pred_y, diff_y = [], [], []

        mols = Chem.SDMolSupplier(path + "test.sdf")
        for mol in mols:
            diff_y.append(float(mol.GetProp("true")) - float(mol.GetProp("pred")))

        diff_y = np.array(diff_y, dtype=float)

        # Find largest, smallest error molecules
        idx = np.argsort(diff_y)
        top_1 = mols[int(idx[-1])]
        top_2 = mols[int(idx[-2])]
        btm_1 = mols[int(idx[0])]
        btm_2 = mols[int(idx[1])]

        best_idx = np.argsort(np.abs(diff_y))
        best = mols[int(best_idx[0])]

        # Save example molecules
        writer = Chem.SDWriter(path + "confusion_examples_" + dataset + "_trial" + str(i) + ".sdf")
        for mol in [top_1, top_2, btm_1, btm_2, best]:
            writer.write(mol)

Source File: operations.py From ScaffoldGraph with MIT License

5 votes

def __init__(self, args):

        self.args = args
        self.q_input = args.input_query
        self.g_input = open(args.input_graph, 'r')

        if args.sdf:
            rdlogger.setLevel(4)
            self.output = SDWriter(args.output)
        else:
            self.output = open(args.output, 'w')

        self.query = set()
        self.matching_parents = set()
        self.count = 0

Source File: __init__.py From ScaffoldGraph with MIT License

5 votes

def mock_sdf(tmp_path):
    d = tmp_path / "test_data"
    d.mkdir()
    p = d / "test.sdf"
    writer = Chem.SDWriter(str(p))
    writer.write(Chem.MolFromSmiles('CN1C(=O)CN=C(C2=C1C=CC(=C2)Cl)C3=CC=CC=C3'))
    writer.write(Chem.MolFromSmiles('CCC1=CC2=C(S1)N(C(=O)CN=C2C3=CC=CC=C3Cl)C'))
    writer.close()
    return str(p)

Source File: __init__.py From ScaffoldGraph with MIT License

5 votes

def mock_sdf_2(tmp_path):
    d = tmp_path / "test_data"
    try:
        d.mkdir()
    except FileExistsError:
        pass
    p = d / "test_2.sdf"
    writer = Chem.SDWriter(str(p))
    writer.write(Chem.MolFromSmiles('C1C(=O)NC2=C(C=C(C=C2)Br)C(=N1)C3=CC=CC=N3'))
    writer.write(Chem.MolFromSmiles('CC1=NN(C2=C1C(=NCC(=O)N2C)C3=CC=CC=C3F)C'))
    writer.close()
    return str(p)

Source File: test_sdf_file_parser.py From chainer-chemistry with MIT License

5 votes

def sdf_file(tmpdir, mols):
    # Chem.AllChem.Compute2DCoords(mol1)
    fname = os.path.join(str(tmpdir), 'test.sdf')
    writer = Chem.SDWriter(fname)
    for mol in mols:
        writer.write(mol)
    return fname

Source File: test_sdf_file_parser.py From chainer-chemistry with MIT License

5 votes

def sdf_file_long(tmpdir):
    """SDFFile with long smiles (ccc...)"""
    fname = os.path.join(str(tmpdir), 'test_long.sdf')
    writer = Chem.SDWriter(fname)
    for smiles in ['CCCCCCCCCCCC', 'CN=C=O', 'CCCCCCCCCCCCCCCC',
                   'Cc1ccccc1', 'CC1=CC2CC(CC1)O2']:
        mol = Chem.MolFromSmiles(smiles)
        writer.write(mol)
    return fname

Source File: converter.py From 3DGCN with MIT License

4 votes

def converter(path, target_path, name, target_name, process=20):
    # Load dataset
    print("Loading Dataset...")
    if ".csv" in path:
        x, y = load_csv(path, name, target_name)
        mols, props = [], []
        for smi, prop in zip(x, y):
            mol = Chem.MolFromSmiles(smi)
            if mol is not None:
                mols.append(mol)
                props.append(prop)
        mol_idx = list(range(len(mols)))

    elif ".sdf" in path:
        mols = Chem.SDMolSupplier(path)

        props = []
        for mol in mols:
            props.append(mol.GetProp(target_name))
        mol_idx = list(range(len(mols)))

    else:
        raise ValueError("Unsupported file type.")
    print("Loaded {} Molecules from {}".format(len(mols), path))

    # Optimize coordinate using multiprocessing
    print("Optimizing Conformers...")
    pool = mp.Pool(process)
    results = pool.starmap(optimize_conformer, zip(mol_idx, mols, props))

    # Collect results
    mol_list, prop_list = [], []
    for mol, prop in results:
        mol_list.append(mol)
        prop_list.append(prop)

    # Remove None and add properties
    mol_list_filtered = []
    for mol, prop in zip(mol_list, prop_list):
        if mol is not None:
            mol.SetProp("target", str(prop))
            mol_list_filtered.append(mol)
    print("{} Molecules Optimized".format(len(mol_list_filtered)))

    # Save molecules
    print("Saving File...")
    w = Chem.SDWriter(target_path)
    for m in mol_list_filtered:
        w.write(m)
    print("Saved {} Molecules to {}".format(len(mol_list_filtered), target_path))

Source File: rdkit_util.py From deepchem with MIT License

4 votes

def write_molecule(mol, outfile, is_protein=False):
  """Write molecule to a file

  This function writes a representation of the provided molecule to
  the specified `outfile`. Doesn't return anything.

  Parameters
  ----------
  mol: rdkit Mol
    Molecule to write
  outfile: str
    Filename to write mol to
  is_protein: bool, optional
    Is this molecule a protein?

  Note
  ----
  This function requires RDKit to be installed.

  Raises
  ------
  ValueError: if `outfile` isn't of a supported format.
  """
  from rdkit import Chem
  if ".pdbqt" in outfile:
    writer = Chem.PDBWriter(outfile)
    writer.write(mol)
    writer.close()
    if is_protein:
      pdbqt_utils.convert_protein_to_pdbqt(mol, outfile)
    else:
      pdbqt_utils.convert_mol_to_pdbqt(mol, outfile)
  elif ".pdb" in outfile:
    writer = Chem.PDBWriter(outfile)
    writer.write(mol)
    writer.close()
  elif ".sdf" in outfile:
    writer = Chem.SDWriter(outfile)
    writer.write(mol)
    writer.close()
  else:
    raise ValueError("Unsupported Format")

Python rdkit.Chem.SDWriter() Examples