1 | #! /usr/bin/env python |
||
2 | # |
||
3 | # Copyright (C) 2015-2016 Rich Lewis <[email protected]> |
||
4 | # License: 3-clause BSD |
||
5 | |||
6 | 1 | """ |
|
7 | # skchem.io.smiles |
||
8 | |||
9 | Defining input and output operations for smiles files. |
||
10 | """ |
||
11 | |||
12 | 1 | import warnings |
|
13 | 1 | from functools import wraps |
|
14 | |||
15 | 1 | import pandas as pd |
|
0 ignored issues
–
show
|
|||
16 | |||
17 | 1 | from ..utils import Suppressor, squeeze |
|
18 | 1 | from ..core import Mol |
|
19 | |||
20 | |||
21 | 1 | def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t', |
|
0 ignored issues
–
show
|
|||
22 | title_line=False, error_bad_mol=False, warn_bad_mol=True, |
||
23 | drop_bad_mol=True, *args, **kwargs): |
||
24 | |||
25 | """Read a smiles file into a pandas dataframe. |
||
26 | |||
27 | The class wraps the pandas read_csv function. |
||
28 | |||
29 | smiles_file (str, file-like): |
||
30 | Location of data to load, specified as a string or passed directly as a |
||
31 | file-like object. URLs may also be used, see the pandas.read_csv |
||
32 | documentation. |
||
33 | smiles_column (int): |
||
34 | The column index at which SMILES are provided. |
||
35 | Defaults to `0`. |
||
36 | name_column (int): |
||
37 | The column index at which compound names are provided, for use as the |
||
38 | index in the DataFrame. If None, use the default index. |
||
39 | Defaults to `None`. |
||
40 | delimiter (str): |
||
41 | The delimiter used. |
||
42 | Defaults to `\\t`. |
||
43 | title_line (bool): |
||
44 | Whether a title line is provided, to use as column titles. |
||
45 | Defaults to `False`. |
||
46 | error_bad_mol (bool): |
||
47 | Whether an error should be raised when a molecule fails to parse. |
||
48 | Defaults to `False`. |
||
49 | warn_bad_mol (bool): |
||
50 | Whether a warning should be raised when a molecule fails to parse. |
||
51 | Defaults to `True`. |
||
52 | drop_bad_mol (bool): |
||
53 | If true, drop any column with smiles that failed to parse. Otherwise, |
||
54 | the field is None. Defaults to `True`. |
||
55 | args, kwargs: |
||
56 | Arguments will be passed to pandas read_csv arguments. |
||
57 | |||
58 | Returns: |
||
59 | pandas.DataFrame: |
||
60 | The loaded data frame, with Mols supplied in the `structure` field. |
||
61 | |||
62 | See Also: |
||
63 | pandas.read_csv |
||
64 | skchem.Mol.from_smiles |
||
65 | skchem.io.sdf |
||
66 | """ |
||
67 | |||
68 | 1 | with Suppressor(): |
|
69 | |||
70 | # set the header line to pass to the pandas parser |
||
71 | # we accept True as being line zero, as is usual for smiles |
||
72 | # if user specifies a header already, then do nothing |
||
73 | |||
74 | 1 | header = kwargs.pop('header', None) |
|
75 | 1 | if title_line is True: |
|
76 | 1 | header = 0 |
|
77 | 1 | elif header is not None: |
|
78 | 1 | pass #remove from the kwargs to not pass it twice |
|
79 | else: |
||
80 | 1 | header = None |
|
81 | |||
82 | # read the smiles file |
||
83 | 1 | data = pd.read_csv(smiles_file, delimiter=delimiter, header=header, |
|
84 | *args, **kwargs) |
||
85 | |||
86 | # replace the smiles column with the structure column |
||
87 | 1 | lst = list(data.columns) |
|
88 | 1 | lst[smiles_column] = 'structure' |
|
89 | 1 | if name_column: |
|
90 | 1 | lst[name_column] = 'batch' |
|
91 | 1 | data.columns = lst |
|
92 | |||
93 | 1 | def parse(row): |
|
94 | """ Parse smiles for row """ |
||
95 | 1 | try: |
|
96 | 1 | return Mol.from_smiles(row.structure) |
|
0 ignored issues
–
show
|
|||
97 | 1 | except ValueError: |
|
98 | 1 | msg = 'Molecule {} could not be decoded.'.format(row.name) |
|
99 | 1 | if error_bad_mol: |
|
100 | 1 | raise ValueError(msg) |
|
101 | 1 | elif warn_bad_mol: |
|
102 | 1 | warnings.warn(msg) |
|
103 | |||
104 | 1 | return None |
|
105 | |||
106 | 1 | data['structure'] = data['structure'].apply(str) |
|
107 | 1 | data['structure'] = data.apply(parse, axis=1) |
|
108 | |||
109 | 1 | if drop_bad_mol: |
|
110 | 1 | data = data[data['structure'].notnull()] |
|
111 | |||
112 | # set index if passed |
||
113 | 1 | if name_column is not None: |
|
114 | 1 | data = data.set_index(data.columns[name_column]) |
|
115 | |||
116 | 1 | cols = data.columns.tolist() |
|
117 | 1 | cols.remove('structure') |
|
118 | 1 | data = data[['structure'] + cols] |
|
119 | 1 | return squeeze(data, axis=1) |
|
120 | |||
121 | |||
122 | 1 | def write_smiles(data, smiles_path): |
|
123 | |||
124 | """ Write a dataframe to a smiles file. |
||
125 | |||
126 | Args: |
||
127 | data (pd.Series or pd.DataFrame): |
||
128 | The dataframe to write. |
||
129 | smiles_path (str): |
||
130 | The path to write the dataframe to. |
||
131 | """ |
||
132 | |||
133 | if isinstance(data, pd.Series): |
||
134 | data = data.to_frame(name='structure') |
||
135 | data = data.copy() |
||
136 | data['structure'] = data.structure.apply(lambda m: m.to_smiles()) |
||
137 | data = data.reset_index() |
||
138 | cols = list(data.columns) |
||
139 | cols.insert(0, cols.pop(cols.index('structure'))) |
||
140 | data = data.reindex(columns=cols)[cols] |
||
141 | data.to_csv(smiles_path, sep='\t', header=None, index=None) |
||
142 | del data |
||
143 | |||
144 | |||
145 | 1 | @classmethod |
|
146 | 1 | @wraps(read_smiles) |
|
147 | def _from_smiles_df(_, *args, **kwargs): |
||
148 | return read_smiles(*args, **kwargs) |
||
149 | |||
150 | |||
151 | 1 | @classmethod |
|
152 | 1 | @wraps(read_smiles) |
|
153 | def _from_smiles_series(_, *args, **kwargs): |
||
154 | return read_smiles(*args, **kwargs).structure |
||
155 | |||
156 | |||
157 | 1 | @wraps(write_smiles) |
|
158 | def _to_smiles_df(self, *args, **kwargs): |
||
159 | return write_smiles(self, *args, **kwargs) |
||
160 | |||
161 | 1 | pd.DataFrame.from_smiles = _from_smiles_df |
|
162 | 1 | pd.Series.from_smiles = _from_smiles_series |
|
163 | 1 | pd.Series.to_smiles = _to_smiles_df |
|
164 | pd.DataFrame.to_smiles = _to_smiles_df |
||
165 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.