Completed
Push — master ( 971a6f...39262f )
by Rich
04:27
created

write_smiles()   A

Complexity

Conditions 3

Size

Total Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 9.762

Importance

Changes 2
Bugs 1 Features 1
Metric Value
c 2
b 1
f 1
dl 0
loc 21
ccs 1
cts 11
cp 0.0909
rs 9.3142
cc 3
crap 9.762
1
#! /usr/bin/env python
2
#
3
# Copyright (C) 2015-2016 Rich Lewis <[email protected]>
4
# License: 3-clause BSD
5
6 1
"""
7
# skchem.io.smiles
8
9
Defining input and output operations for smiles files.
10
"""
11
12 1
import warnings
13 1
from functools import wraps
14
15 1
import pandas as pd
0 ignored issues
show
Configuration introduced by
The import pandas could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
16
17 1
from ..utils import Suppressor
18 1
from ..core import Mol
19
20 1
def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t',
0 ignored issues
show
best-practice introduced by
Too many arguments (8/5)
Loading history...
21
                title_line=False, error_bad_mol=False, warn_bad_mol=True,
22
                drop_bad_mol=True, *args, **kwargs):
23
24
    """Read a smiles file into a pandas dataframe.
25
26
    The class wraps the pandas read_csv function.
27
28
    smiles_file (str, file-like):
29
        Location of data to load, specified as a string or passed directly as a
30
        file-like object.  URLs may also be used, see the pandas.read_csv
31
        documentation.
32
    smiles_column (int):
33
        The column index at which SMILES are provided.
34
        Defaults to `0`.
35
    name_column (int):
36
        The column index at which compound names are provided, for use as the
37
        index in the DataFrame.  If None, use the default index.
38
        Defaults to `None`.
39
    delimiter (str):
40
        The delimiter used.
41
        Defaults to `\\t`.
42
    title_line (bool):
43
        Whether a title line is provided, to use as column titles.
44
        Defaults to `False`.
45
    error_bad_mol (bool):
46
        Whether an error should be raised when a molecule fails to parse.
47
        Defaults to `False`.
48
    warn_bad_mol (bool):
49
        Whether a warning should be raised when a molecule fails to parse.
50
        Defaults to `True`.
51
    drop_bad_mol (bool):
52
        If true, drop any column with smiles that failed to parse. Otherwise,
53
        the field is None. Defaults to `True`.
54
    args, kwargs:
55
        Arguments will be passed to pandas read_csv arguments.
56
57
    Returns:
58
        pandas.DataFrame:
59
            The loaded data frame, with Mols supplied in the `structure` field.
60
61
    See Also:
62
        pandas.read_csv
63
        skchem.Mol.from_smiles
64
        skchem.io.sdf
65
    """
66
67 1
    with Suppressor():
68
69
        # set the header line to pass to the pandas parser
70
        # we accept True as being line zero, as is usual for smiles
71
        # if user specifies a header already, then do nothing
72
73 1
        header = kwargs.pop('header', None)
74 1
        if title_line is True:
75 1
            header = 0
76 1
        elif header is not None:
77 1
            pass #remove from the kwargs to not pass it twice
78
        else:
79 1
            header = None
80
81
        # read the smiles file
82 1
        data = pd.read_csv(smiles_file, delimiter=delimiter, header=header,
83
                           *args, **kwargs)
84
85
        # replace the smiles column with the structure column
86 1
        lst = list(data.columns)
87 1
        lst[smiles_column] = 'structure'
88 1
        data.columns = lst
89
90 1
        def parse(row):
91
            """ Parse smiles for row """
92 1
            try:
93 1
                return Mol.from_smiles(row.structure)
0 ignored issues
show
Bug introduced by
The Class Mol does not seem to have a member named from_smiles.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
94 1
            except ValueError:
95 1
                msg = 'Molecule {} could not be decoded.'.format(row.name)
96 1
                if error_bad_mol:
97 1
                    raise ValueError(msg)
98 1
                elif warn_bad_mol:
99 1
                    warnings.warn(msg)
100
101 1
                return None
102
103 1
        data['structure'] = data['structure'].apply(str)
104 1
        data['structure'] = data.apply(parse, axis=1)
105
106 1
        if drop_bad_mol:
107 1
            data = data[data['structure'].notnull()]
108
109
        # set index if passed
110 1
        if name_column is not None:
111 1
            data = data.set_index(data.columns[name_column])
112
113 1
        cols = data.columns.tolist()
114 1
        cols.remove('structure')
115 1
        data = data[['structure'] + cols]
116 1
        return data
117
118
119 1
def write_smiles(data, smiles_path):
120
121
    """ Write a dataframe to a smiles file.
122
123
    Args:
124
        data (pd.Series or pd.DataFrame):
125
            The dataframe to write.
126
        smiles_path (str):
127
            The path to write the dataframe to.
128
    """
129
130
    if isinstance(data, pd.Series):
131
        data = data.to_frame(name='structure')
132
    data = data.copy()
133
    data['structure'] = data.structure.apply(lambda m: m.to_smiles())
134
    data = data.reset_index()
135
    cols = list(data.columns)
136
    cols.insert(0, cols.pop(cols.index('structure')))
137
    data = data.reindex(columns=cols)[cols]
138
    data.to_csv(smiles_path, sep='\t', header=None, index=None)
139
    del data
140
141
142 1
@classmethod
143 1
@wraps(read_smiles)
144
def _from_smiles_df(_, *args, **kwargs):
145
    return read_smiles(*args, **kwargs)
146
147 1
@classmethod
148 1
@wraps(read_smiles)
149
def _from_smiles_series(_, *args, **kwargs):
150
    return read_smiles(*args, **kwargs).structure
151
152 1
@wraps(write_smiles)
153
def _to_smiles_df(self, *args, **kwargs):
154
    return write_smiles(self, *args, **kwargs)
155
156 1
pd.DataFrame.from_smiles = _from_smiles_df
157 1
pd.Series.from_smiles = _from_smiles_series
158 1
pd.Series.to_smiles = _to_smiles_df
159
pd.DataFrame.to_smiles = _to_smiles_df
160