1 | #! /usr/bin/env python |
||
2 | # |
||
3 | # Copyright (C) 2016 Rich Lewis <[email protected]> |
||
4 | # License: 3-clause BSD |
||
5 | |||
6 | 1 | """ |
|
7 | ## skchem.descriptors.atom |
||
8 | |||
9 | Module specifying atom based descriptor generators. |
||
10 | """ |
||
11 | |||
12 | 1 | import logging |
|
13 | 1 | import subprocess |
|
14 | 1 | import re |
|
15 | 1 | from abc import ABCMeta |
|
16 | |||
17 | 1 | import pandas as pd |
|
0 ignored issues
–
show
|
|||
18 | 1 | import numpy as np |
|
0 ignored issues
–
show
The import
numpy could not be resolved.
This can be caused by one of the following: 1. Missing DependenciesThis error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml
before_commands:
- sudo pip install abc # Python2
- sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use
the command for the correct version.
2. Missing __init__.py filesThis error could also result from missing ![]() |
|||
19 | |||
20 | 1 | from ..utils import line_count, nanarray |
|
21 | 1 | from ..base import ( |
|
22 | CLIWrapper, Transformer, AtomTransformer, BatchTransformer, Featurizer |
||
23 | ) |
||
24 | |||
25 | 1 | LOGGER = logging.getLogger(__file__) |
|
26 | |||
27 | # TODO: fix averagemicrospeciescharge |
||
0 ignored issues
–
show
|
|||
28 | # TODO: fix logd logp logs |
||
0 ignored issues
–
show
|
|||
29 | # TODO: oen (orbital electronegativity) - sigma + pi |
||
0 ignored issues
–
show
|
|||
30 | # TODO: water accessible surface area |
||
0 ignored issues
–
show
|
|||
31 | |||
32 | # TODO: these don't produce csv |
||
0 ignored issues
–
show
|
|||
33 | # ['doublebondstereoisomers', 'conformers', 'stereoisomers', |
||
34 | # 'moleculardynamics', 'stereoanalysis', 'lowestenergyconformer', 'msdistr2', |
||
35 | # 'conformations', 'dominanttautomerdistribution', 'hnmr', 'moldyn', 'cnmr', |
||
36 | # 'frameworks', 'microspeciesdistribution', 'nmr', 'leconformer', 'msdistr', |
||
37 | # 'tetrahedralstereoisomers'] |
||
38 | # |
||
39 | |||
40 | 1 | CHEMAXON_HINT = """ Install ChemAxon from https://www.chemaxon.com. |
|
41 | It requires a license, which can be freely obtained for academics. """ |
||
42 | |||
43 | |||
44 | 1 | class ChemAxonBaseFeaturizer(CLIWrapper, Featurizer): |
|
0 ignored issues
–
show
|
|||
45 | |||
46 | """ Base class for ChemAxonFeaturizers (using cxcalc). |
||
47 | |||
48 | Concrete subclasses inheriting from this should override |
||
49 | `_all_feats`, `_optimal_features`. |
||
50 | """ |
||
51 | |||
52 | 1 | __metaclass__ = ABCMeta |
|
53 | |||
54 | 1 | install_hint = CHEMAXON_HINT |
|
55 | |||
56 | 1 | _feat_columns = {'averagepol': ['a_avg'], 'name': ['preferred_iupac_name'], |
|
57 | 'aromaticbondcount': ['aromatic_bond_count'], |
||
58 | 'maximalprojectionradius': ['maximal_projection_radius'], |
||
59 | 'tpolarizability': ['a_avg', 'a_xx', 'a_yy', 'a_zz'], |
||
60 | 'distance': ['distance'], |
||
61 | 'acceptor': ['acceptor_count', 'acceptor_site_count'], |
||
62 | 'fusedringcount': ['fused_ring_count'], |
||
63 | 'charge': ['total_charge'], |
||
64 | 'donor': ['donor_count', 'donor_site_count'], |
||
65 | 'ringcount': ['ring_count'], |
||
66 | 'chainbond': ['chain_bond'], |
||
67 | 'mmff94energy': ['mmff94_energy'], |
||
68 | 'huckel': ['aromatic_e+/nu-_order', |
||
69 | 'localization_energy_l_+/l-', 'pi_energy', |
||
70 | 'electron_density', 'charge_density'], |
||
71 | 'chainatom': ['chain_atom'], |
||
72 | 'shortestpath': ['shortest_path'], |
||
73 | 'resonantcount': ['resonant_count'], |
||
74 | 'tpol': ['a_avg', 'a_xx', 'a_yy', 'a_zz'], |
||
75 | 'moststabletautomer': ['most_stable_tautomer'], |
||
76 | 'generictautomer': ['generictautomer'], |
||
77 | 'hmoelectrophilicityorder': ['hmo_aromatic_e+_order'], |
||
78 | 'ringsystemcountofsize': ['ring_system_count_of_size'], |
||
79 | 'largestatomringsize': ['largest_ring_size_of_atom'], |
||
80 | 'tetrahedralstereoisomercount': [ |
||
81 | 'tetrahedral_stereoisomer_count'], |
||
82 | 'enumerations': ['enumerations'], |
||
83 | 'ringatom': ['ring_atom'], 'connected': ['connected'], |
||
84 | 'hmolocalizationenergy': [ |
||
85 | 'hmo_localization_energy_l+/l-'], |
||
86 | 'averagemolecularpolarizability': ['a_avg'], |
||
87 | 'donorsitecount': ['donor_site_count'], |
||
88 | 'donorcount': ['donor_count'], |
||
89 | 'asymmetricatom': ['asymmetric_atom'], |
||
90 | 'pienergy': ['pi_energy'], |
||
91 | 'bondcount': ['bond_count'], |
||
92 | 'chiralcenters': ['chiral_centers'], |
||
93 | 'hmohuckel': ['hmo_aromatic_e+/nu-_order', |
||
94 | 'hmo_localization_energy_l+/l-', |
||
95 | 'hmo_pi_energy', |
||
96 | 'hmo_electron_density', |
||
97 | 'hmo_charge_density'], |
||
98 | 'huckeleigenvector': ['eigenvector'], |
||
99 | 'ringcountofsize': ['ring_count_of_size'], |
||
100 | 'heteroaliphaticringcount': [ |
||
101 | 'heteroaliphatic_ring_count'], |
||
102 | 'markushenumerations': ['enumerations'], |
||
103 | 'minimalprojectionradius': ['minimal_projection_radius'], |
||
104 | 'dipole': ['dipole'], |
||
105 | 'balabanindex': ['balaban_index'], |
||
106 | 'aromaticnucleophilicityorder': ['aromatic_nu-_order'], |
||
107 | 'tautomercount': ['tautomer_count'], |
||
108 | 'cyclomaticnumber': ['cyclomatic_number'], |
||
109 | 'psa': ['polar_surface_area'], 'isoelectricpoint': ['pi'], |
||
110 | 'hmopienergy': ['hmo_pi_energy'], |
||
111 | 'ayypol': ['a_yy'], 'fragmentcount': ['fragment_count'], |
||
112 | 'acceptormultiplicity': ['acceptor_multiplicity'], |
||
113 | 'topologyanalysistable': ['atom_count', |
||
114 | 'aliphatic_atom_count', |
||
115 | 'aromatic_atom_count', |
||
116 | 'bond_count', |
||
117 | 'aliphatic_bond_count', |
||
118 | 'aromatic_bond_count', |
||
119 | 'rotatable_bond_count', |
||
120 | 'ring_count', |
||
121 | 'aliphatic_ring_count', |
||
122 | 'aromatic_ring_count', |
||
123 | 'hetero_ring_count', |
||
124 | 'heteroaliphatic_ring_count', |
||
125 | 'heteroaromatic_ring_count', |
||
126 | 'ring_atom_count', |
||
127 | 'ring_bond_count', |
||
128 | 'chain_atom_count', |
||
129 | 'chain_bond_count', |
||
130 | 'smallest_ring_size', |
||
131 | 'largest_ring_size'], |
||
132 | 'ioncharge': ['charge'], |
||
133 | 'asymmetricatoms': ['asymmetric_atoms'], |
||
134 | 'wateraccessiblesurfacearea': ['asa', 'asa+', 'asa-', |
||
135 | 'asa_h', 'asa_p'], |
||
136 | 'avgpol': ['a_avg'], |
||
137 | 'carboaliphaticringcount': ['carboaliphatic_ring_count'], |
||
138 | 'aliphaticringcount': ['aliphatic_ring_count'], |
||
139 | 'donormultiplicity': ['donor_multiplicity'], |
||
140 | 'minimalprojectionarea': ['minimal_projection_area'], |
||
141 | 'nucleophiliclocalizationenergy': [ |
||
142 | 'localization_energy_l-'], 'dihedral': ['dihedral'], |
||
143 | 'heteroringcount': ['hetero_ring_count'], |
||
144 | 'azzpol': ['a_zz'], |
||
145 | 'molecularsurfacearea': ['van_der_waals_surface_area_3d'], |
||
146 | 'hmonucleophiliclocalizationenergy': [ |
||
147 | 'hmo_localization_energy_l-'], |
||
148 | 'chargedistribution': ['charge_distribution'], |
||
149 | 'pol': ['a_mol', 'a_atom'], |
||
150 | 'hmoelectrondensity': ['hmo_electron_density'], |
||
151 | 'carboaromaticringcount': ['carboaromatic_ring_count'], |
||
152 | 'acceptorsitecount': ['acceptor_site_count'], |
||
153 | 'markushenumerationcount': ['markush_library_size'], |
||
154 | 'localizationenergy': ['localization_energy_l+/l-'], |
||
155 | 'hararyindex': ['harary_index'], |
||
156 | 'asa': ['asa', 'asa+', 'asa-', 'asa_h', 'asa_p'], |
||
157 | 'acc': ['acc'], 'majortautomer': ['major_tautomer'], |
||
158 | 'majormicrospecies': ['major-ms'], |
||
159 | 'aliphaticatomcount': ['aliphatic_atom_count'], |
||
160 | 'angle': ['angle'], 'huckeleigenvalue': ['eigenvalue'], |
||
161 | 'axxpol': ['a_xx'], |
||
162 | 'chiralcenter': ['chiral_center'], |
||
163 | 'aliphaticbondcount': ['aliphatic_bond_count'], |
||
164 | 'smallestatomringsize': ['smallest_ring_size_of_atom'], |
||
165 | 'dreidingenergy': ['dreiding_energy'], |
||
166 | 'maximalprojectionsize': [ |
||
167 | 'length_perpendicular_to_the_max_area'], |
||
168 | 'largestringsystemsize': ['largest_ring_system_size'], |
||
169 | 'accsitecount': ['acceptor_site_count'], |
||
170 | 'refractivity': ['refractivity'], |
||
171 | 'bondtype': ['bond_type'], |
||
172 | 'chargedensity': ['charge_density'], |
||
173 | 'resonants': ['resonants'], |
||
174 | 'aromaticatomcount': ['aromatic_atom_count'], |
||
175 | 'distancedegree': ['distance_degree'], |
||
176 | 'hasvalidconformer': ['has_valid_conformer'], |
||
177 | 'electrondensity': ['electron_density'], |
||
178 | 'asymmetricatomcount': ['asymmetric_atom_count'], |
||
179 | 'fsp3': ['fsp3'], 'don': ['don'], |
||
180 | 'fusedaliphaticringcount': ['fused_aliphatic_ring_count'], |
||
181 | 'pkat': ['pkat'], |
||
182 | 'fusedaromaticringcount': ['fused_aromatic_ring_count'], |
||
183 | 'majorms2': ['majorms2'], |
||
184 | 'maximalprojectionarea': ['maximal_projection_area'], |
||
185 | 'hbonddonoracceptor': ['acceptor_count', 'donor_count', |
||
186 | 'acceptor_site_count', |
||
187 | 'donor_site_count'], |
||
188 | 'acceptorcount': ['acceptor_count'], |
||
189 | 'molecularpolarizability': ['a_mol'], |
||
190 | 'huckeltable': ['aromatic_e+/nu-_order', |
||
191 | 'localization_energy_l+/l-', 'pi_energy', |
||
192 | 'electron_density', 'charge_density'], |
||
193 | 'rotatablebondcount': ['rotatable_bond_count'], |
||
194 | 'minimalprojectionsize': [ |
||
195 | 'length_perpendicular_to_the_min_area'], |
||
196 | 'polarizability': ['a_mol', 'a_atom'], |
||
197 | 'acceptortable': ['acceptor_count', |
||
198 | 'acceptor_site_count'], |
||
199 | 'aliphaticringcountofsize': [ |
||
200 | 'aliphatic_ring_count_of_size'], 'hlb': ['hlb'], |
||
201 | 'eccentricity': ['eccentricity'], |
||
202 | 'hmochargedensity': ['hmo_charge_density'], |
||
203 | 'hmohuckeleigenvalue': ['hmo_eigenvalue'], |
||
204 | 'totalchargedensity': ['total_charge_density'], |
||
205 | 'hmonucleophilicityorder': ['hmo_aromatic_nu-_order'], |
||
206 | 'aromaticringcountofsize': [ |
||
207 | 'aromatic_ring_count_of_size'], |
||
208 | 'electrophilicityorder': ['aromatic_e+_order'], |
||
209 | 'connectedgraph': ['connected_graph'], |
||
210 | 'plattindex': ['platt_index'], 'logp': ['logp'], |
||
211 | 'topanal': ['atom_count', 'aliphatic_atom_count', |
||
212 | 'aromatic_atom_count', 'bond_count', |
||
213 | 'aliphatic_bond_count', 'aromatic_bond_count', |
||
214 | 'rotatable_bond_count', 'ring_count', |
||
215 | 'aliphatic_ring_count', 'aromatic_ring_count', |
||
216 | 'hetero_ring_count', |
||
217 | 'heteroaliphatic_ring_count', |
||
218 | 'heteroaromatic_ring_count', |
||
219 | 'ring_atom_count', |
||
220 | 'ring_bond_count', 'chain_atom_count', |
||
221 | 'chain_bond_count', 'smallest_ring_size', |
||
222 | 'largest_ring_size'], |
||
223 | 'logdcalculator': ['ph=0', 'ph=1', 'ph=2', 'ph=3', 'ph=4', |
||
224 | 'ph=5', 'ph=6', 'ph=7', 'ph=8', 'ph=9', |
||
225 | 'ph=10', 'ph=11', 'ph=12', 'ph=13', |
||
226 | 'ph=14', 'unnamed:_16'], |
||
227 | 'logs': ['ph=0.0', 'ph=1.0', 'ph=2.0', 'ph=3.0', 'ph=4.0', |
||
228 | 'ph=5.0', 'ph=6.0', 'ph=7.0', 'ph=8.0', |
||
229 | 'ph=9.0', 'ph=10.0', 'ph=11.0', 'ph=12.0', |
||
230 | 'ph=13.0', 'ph=14.0', 'unnamed:_16'], |
||
231 | 'atompol': ['a_atom'], 'canonicalresonant': ['structure'], |
||
232 | 'ringbond': ['ring_bond'], |
||
233 | 'ringatomcount': ['ring_atom_count'], |
||
234 | 'donortable': ['donor_count', 'donor_site_count'], |
||
235 | 'randicindex': ['randic_index'], |
||
236 | 'rotatablebond': ['rotatable_bond'], |
||
237 | 'hyperwienerindex': ['hyper_wiener_index'], |
||
238 | 'hmohuckeleigenvector': ['hmo_eigenvector'], |
||
239 | 'carboringcount': ['carbo_ring_count'], |
||
240 | 'logpcalculator': ['logp', 'unnamed:_2'], |
||
241 | 'ringsystemcount': ['ring_system_count'], |
||
242 | 'largestringsize': ['largest_ring_size'], |
||
243 | 'stereodoublebondcount': ['stereo_double_bond_count'], |
||
244 | 'pi': ['pi'], |
||
245 | 'stericeffectindex': ['steric_effect_index'], |
||
246 | 'volume': ['van_der_waals_volume'], |
||
247 | 'averagemicrospeciescharge': ['charge'], |
||
248 | 'pka': ['apka1', 'apka2', 'bpka1', 'bpka2', 'atoms'], |
||
249 | 'hmohuckeltable': ['hmo_aromatic_e+/nu-_order', |
||
250 | 'hmo_localization_energy_l+/l-', |
||
251 | 'hmo_pi_energy', |
||
252 | 'hmo_electron_density', |
||
253 | 'hmo_charge_density'], |
||
254 | 'ringcountofatom': ['ring_count_of_atom'], |
||
255 | 'aromaticelectrophilicityorder': ['aromatic_e+_order'], |
||
256 | 'hindrance': ['steric_hindrance'], |
||
257 | 'chainatomcount': ['chain_atom_count'], |
||
258 | 'pkacalculator': ['apka1', 'apka2', 'bpka1', 'bpka2', |
||
259 | 'atoms'], |
||
260 | 'heteroaromaticringcount': ['heteroaromatic_ring_count'], |
||
261 | 'sterichindrance': ['steric_hindrance'], |
||
262 | 'hbda': ['acceptor_count', 'donor_count', |
||
263 | 'acceptor_site_count', 'donor_site_count'], |
||
264 | 'molpol': ['a_mol'], |
||
265 | 'atomicpolarizability': ['a_atom'], |
||
266 | 'msdon': ['ph=0.00', 'ph=1.00', 'ph=2.00', 'ph=3.00', |
||
267 | 'ph=4.00', 'ph=5.00', 'ph=6.00', 'ph=7.00', |
||
268 | 'ph=8.00', 'ph=9.00', 'ph=10.00', 'ph=11.00', |
||
269 | 'ph=12.00', 'ph=13.00', 'ph=14.00'], |
||
270 | 'enumerationcount': ['markush_library_size'], |
||
271 | 'vdwsa': ['van_der_waals_surface_area_3d'], |
||
272 | 'orbitalelectronegativity': [ |
||
273 | 'sigma_orbital_electronegativity', |
||
274 | 'pi_orbital_electronegativity'], |
||
275 | 'hmoelectrophiliclocalizationenergy': [ |
||
276 | 'hmo_localization_energy_l+'], |
||
277 | 'smallestringsize': ['smallest_ring_size'], |
||
278 | 'szegedindex': ['szeged_index'], |
||
279 | 'nucleophilicityorder': ['aromatic_nu-_order'], |
||
280 | 'canonicaltautomer': ['canonical_tautomer'], |
||
281 | 'stereoisomercount': ['stereoisomer_count'], |
||
282 | 'msa': ['van_der_waals_surface_area_3d'], |
||
283 | 'donsitecount': ['donor_site_count'], |
||
284 | 'randommarkushenumerations': [ |
||
285 | 'randommarkushenumerations'], |
||
286 | 'wienerindex': ['wiener_index'], |
||
287 | 'huckelorbitals': ['orbitals'], |
||
288 | 'doublebondstereoisomercount': [ |
||
289 | 'double_bond_stereoisomer_count'], |
||
290 | 'tautomers': ['tautomers'], |
||
291 | 'polarsurfacearea': ['polar_surface_area'], |
||
292 | 'chiralcentercount': ['chiral_center_count'], |
||
293 | 'electrophiliclocalizationenergy': [ |
||
294 | 'localization_energy_l+'], |
||
295 | 'aliphaticatom': ['aliphatic_atom'], |
||
296 | 'ringbondcount': ['ring_bond_count'], |
||
297 | 'wienerpolarity': ['wiener_polarity'], |
||
298 | 'msacc': ['ph=0.00', 'ph=1.00', 'ph=2.00', 'ph=3.00', |
||
299 | 'ph=4.00', 'ph=5.00', 'ph=6.00', 'ph=7.00', |
||
300 | 'ph=8.00', 'ph=9.00', 'ph=10.00', 'ph=11.00', |
||
301 | 'ph=12.00', 'ph=13.00', 'ph=14.00'], |
||
302 | 'formalcharge': ['formal_charge'], |
||
303 | 'smallestringsystemsize': ['smallest_ring_system_size'], |
||
304 | 'majorms': ['major-ms'], |
||
305 | 'tholepolarizability': ['a_avg', 'a_xx', 'a_yy', 'a_zz'], |
||
306 | 'aromaticatom': ['aromatic_atom'], |
||
307 | 'oen': ['sigma_orbital_electronegativity', |
||
308 | 'pi_orbital_electronegativity'], |
||
309 | 'chainbondcount': ['chain_bond_count'], |
||
310 | 'logd': ['ph=0.00', 'ph=1.00', 'ph=2.00', 'ph=3.00', |
||
311 | 'ph=4.00', 'ph=5.00', 'ph=6.00', 'ph=7.00', |
||
312 | 'ph=8.00', 'ph=9.00', 'ph=10.00', 'ph=11.00', |
||
313 | 'ph=12.00', 'ph=13.00', 'ph=14.00'], |
||
314 | 'hmohuckelorbitals': ['hmo_orbitals'], |
||
315 | 'aromaticringcount': ['aromatic_ring_count'], |
||
316 | 'pichargedensity': ['pi_charge_density']} |
||
317 | |||
318 | 1 | _optimal_feats = [] # override this |
|
319 | |||
320 | 1 | def __init__(self, features='optimal', verbose=True): |
|
321 | self._features = None |
||
322 | super(ChemAxonBaseFeaturizer, self).__init__(verbose=verbose) |
||
323 | self.features = features |
||
324 | |||
325 | 1 | @property |
|
326 | def features(self): |
||
0 ignored issues
–
show
This method should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() |
|||
327 | return self._features |
||
328 | |||
329 | 1 | @features.setter |
|
330 | def features(self, features): |
||
0 ignored issues
–
show
This method should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() |
|||
331 | if features in ('optimal', 'all'): |
||
332 | self._features = self._optimal_feats |
||
333 | elif isinstance(features, str): |
||
334 | self.features = [features] |
||
335 | elif isinstance(features, (list, tuple)): |
||
336 | valid = np.array([feat in self._feat_columns.keys() |
||
337 | for feat in features]) |
||
338 | if not all(valid): |
||
339 | msg = 'Descriptor "{}" not available.'.format( |
||
340 | np.array(features)[~valid]) |
||
341 | raise NotImplementedError(msg) |
||
342 | else: |
||
343 | self._features = list(features) |
||
344 | else: |
||
345 | raise NotImplementedError('Feature set {} not available.'.format( |
||
346 | features)) |
||
347 | |||
348 | 1 | def _feature_index(self): |
|
349 | return pd.Index(sum((self._feat_columns[feat] |
||
350 | for feat in self.features), []), |
||
351 | name='features') |
||
352 | |||
353 | 1 | @staticmethod |
|
354 | def validate_install(): |
||
355 | try: |
||
356 | return 0 == subprocess.call(['cxcalc'], |
||
357 | stderr=subprocess.DEVNULL, |
||
0 ignored issues
–
show
|
|||
358 | stdout=subprocess.DEVNULL) |
||
0 ignored issues
–
show
|
|||
359 | except FileNotFoundError: |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
360 | return False |
||
361 | |||
362 | 1 | def monitor_progress(self, filename): |
|
363 | res = line_count(filename) |
||
364 | return res - 1 if res else 0 |
||
365 | |||
366 | 1 | def _cli_args(self, infile, outfile): |
|
367 | return ['cxcalc', infile, '-o', outfile] + self.features |
||
368 | |||
369 | 1 | def _parse_outfile(self, outfile): |
|
370 | res = pd.read_table(outfile, engine='python').drop('id', axis=1) |
||
371 | return res |
||
372 | |||
373 | 1 | def _parse_errors(self, errs): |
|
374 | LOGGER.debug('stderr: %s', errs) |
||
375 | return [] # instances are not skipped ever, so don't return anything |
||
376 | |||
377 | |||
378 | 1 | class ChemAxonFeaturizer(ChemAxonBaseFeaturizer, BatchTransformer, |
|
0 ignored issues
–
show
This class should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() |
|||
379 | Transformer): |
||
380 | |||
381 | 1 | _optimal_feats = ['acceptorcount', 'accsitecount', 'aliphaticatomcount', |
|
382 | 'aliphaticbondcount', 'aliphaticringcount', |
||
383 | 'aromaticatomcount', 'aromaticbondcount', |
||
384 | 'aromaticringcount', 'asymmetricatomcount', |
||
385 | 'averagemolecularpolarizability', 'axxpol', 'ayypol', |
||
386 | 'azzpol', 'balabanindex', 'bondcount', |
||
387 | 'carboaliphaticringcount', 'carboaromaticringcount', |
||
388 | 'carboringcount', 'chainatomcount', |
||
389 | 'chainbondcount', 'chiralcentercount', 'connectedgraph', |
||
390 | 'cyclomaticnumber', 'dipole', |
||
391 | 'donorcount', 'donorsitecount', |
||
392 | 'doublebondstereoisomercount', 'dreidingenergy', |
||
393 | 'formalcharge', |
||
394 | 'fragmentcount', 'fsp3', 'fusedaliphaticringcount', |
||
395 | 'fusedaromaticringcount', 'fusedringcount', |
||
396 | 'hararyindex', 'heteroaliphaticringcount', |
||
397 | 'heteroaromaticringcount', 'heteroringcount', 'hlb', |
||
398 | 'hmopienergy', 'hyperwienerindex', 'largestringsize', |
||
399 | 'largestringsystemsize', |
||
400 | 'markushenumerationcount', 'maximalprojectionarea', |
||
401 | 'maximalprojectionradius', |
||
402 | 'maximalprojectionsize', 'minimalprojectionarea', |
||
403 | 'minimalprojectionradius', |
||
404 | 'minimalprojectionsize', 'mmff94energy', 'molpol', |
||
405 | 'pienergy', 'plattindex', 'psa', 'randicindex', |
||
406 | 'refractivity', 'resonantcount', 'ringatomcount', |
||
407 | 'ringbondcount', 'ringcount', 'ringsystemcount', |
||
408 | 'rotatablebondcount', 'smallestringsize', |
||
409 | 'smallestringsystemsize', |
||
410 | 'stereodoublebondcount', 'stereoisomercount', |
||
411 | 'szegedindex', 'tetrahedralstereoisomercount', |
||
412 | 'vdwsa', 'volume', 'wateraccessiblesurfacearea', |
||
413 | 'wienerindex', 'wienerpolarity'] |
||
414 | |||
415 | 1 | @property |
|
416 | def name(self): |
||
0 ignored issues
–
show
This method should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() This method could be written as a function/class method.
If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo:
def some_method(self, x, y):
return x + y;
could be written as class Foo:
@classmethod
def some_method(cls, x, y):
return x + y;
![]() |
|||
417 | return 'cx_mol' |
||
418 | |||
419 | 1 | @property |
|
420 | def columns(self): |
||
421 | return self._feature_index() |
||
422 | |||
423 | 1 | def _parse_outfile(self, outfile): |
|
424 | res = super(ChemAxonFeaturizer, self)._parse_outfile(outfile) |
||
425 | |||
426 | def fix_failed(inp): |
||
0 ignored issues
–
show
This function should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() |
|||
427 | if isinstance(inp, str) and 'FAILED' in inp: |
||
428 | return np.nan |
||
429 | else: |
||
430 | return float(inp) |
||
431 | |||
432 | return res.applymap(fix_failed) |
||
433 | |||
434 | |||
435 | 1 | class ChemAxonAtomFeaturizer(ChemAxonBaseFeaturizer, AtomTransformer, |
|
0 ignored issues
–
show
This class should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() |
|||
436 | BatchTransformer): |
||
437 | 1 | _optimal_feats = ['acceptormultiplicity', 'aliphaticatom', 'aromaticatom', |
|
438 | 'aromaticelectrophilicityorder', 'asymmetricatom', |
||
439 | 'atomicpolarizability', 'chainatom', 'chargedensity', |
||
440 | 'chiralcenter', 'distancedegree', 'donormultiplicity', |
||
441 | 'eccentricity', 'electrondensity', |
||
442 | 'electrophiliclocalizationenergy', 'hindrance', |
||
443 | 'hmochargedensity', 'hmoelectrondensity', |
||
444 | 'hmoelectrophilicityorder', |
||
445 | 'hmoelectrophiliclocalizationenergy', |
||
446 | 'hmonucleophilicityorder', |
||
447 | 'hmonucleophiliclocalizationenergy', 'ioncharge', |
||
448 | 'largestatomringsize', 'nucleophilicityorder', |
||
449 | 'nucleophiliclocalizationenergy', 'oen', |
||
450 | 'pichargedensity', 'ringatom', 'ringcountofatom', |
||
451 | 'stericeffectindex', 'totalchargedensity'] |
||
452 | |||
453 | 1 | _h_inc_feats = ['acc', 'atomicpolarizability', 'charge', 'distancedegree', |
|
454 | 'don', 'eccentricity', 'hindrance', 'largestatomringsize', |
||
455 | 'oen', 'ringcountofatom', 'smallestatomringsize', |
||
456 | 'stericeffectindex'] |
||
457 | |||
458 | 1 | @property |
|
459 | def name(self): |
||
0 ignored issues
–
show
This method should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() This method could be written as a function/class method.
If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo:
def some_method(self, x, y):
return x + y;
could be written as class Foo:
@classmethod
def some_method(cls, x, y):
return x + y;
![]() |
|||
460 | return 'cx_atom' |
||
461 | |||
462 | 1 | @property |
|
463 | def minor_axis(self): |
||
464 | return self._feature_index() |
||
465 | |||
466 | 1 | def _transform_atom(self, atom): |
|
467 | raise NotImplementedError('Cannot calculate per atom with ChemAxon') |
||
468 | |||
469 | 1 | def _parse_outfile(self, outfile): |
|
470 | res = super(ChemAxonAtomFeaturizer, self)._parse_outfile(outfile) |
||
471 | |||
472 | def parse_string(s): |
||
0 ignored issues
–
show
The name
s does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() This function should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() |
|||
473 | if s == '': |
||
474 | return np.nan |
||
475 | elif s == 'false': |
||
476 | return 0 |
||
477 | elif s == 'true': |
||
478 | return 1 |
||
479 | else: |
||
480 | try: |
||
481 | return float(s) |
||
482 | except ValueError: |
||
483 | return np.nan |
||
484 | |||
485 | def to_padded(s): |
||
0 ignored issues
–
show
The name
s does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() This function should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() |
|||
486 | inner_res = np.repeat(np.nan, self.max_atoms) |
||
487 | ans = np.array([parse_string(i) for i in str(s).split(';')]) |
||
488 | inner_res[:len(ans)] = ans |
||
489 | return inner_res |
||
490 | |||
491 | res = res.applymap(to_padded) |
||
492 | return pd.Panel(res.values.tolist()).swapaxes(1, 2) |
||
493 | |||
494 | |||
495 | 1 | class ChemAxonNMRPredictor(ChemAxonBaseFeaturizer, BatchTransformer, |
|
0 ignored issues
–
show
This class should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() |
|||
496 | AtomTransformer): |
||
497 | |||
498 | 1 | _feat_columns = {'cnmr': ['cnmr'], 'hnmr': ['hnmr']} |
|
499 | 1 | _optimal_feats = ['cnmr'] |
|
500 | |||
501 | 1 | @property |
|
502 | def name(self): |
||
0 ignored issues
–
show
This method should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() This method could be written as a function/class method.
If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo:
def some_method(self, x, y):
return x + y;
could be written as class Foo:
@classmethod
def some_method(cls, x, y):
return x + y;
![]() |
|||
503 | return 'cx_nmr' |
||
504 | |||
505 | 1 | def _transform_atom(self, atom): |
|
506 | raise NotImplementedError('ChemAxon cannot predict for atoms.') |
||
507 | |||
508 | 1 | def monitor_progress(self, filename): |
|
509 | return sum(1 for l in open(filename, 'rb') |
||
510 | if l == b'##PEAKASSIGNMENTS=(XYMA)\r\n') |
||
511 | |||
512 | 1 | @property |
|
513 | def minor_axis(self): |
||
514 | return pd.Index(self.features, name='shift') |
||
515 | |||
516 | 1 | @property |
|
517 | def features(self): |
||
518 | return self._features |
||
519 | |||
520 | 1 | @features.setter |
|
521 | def features(self, val): |
||
0 ignored issues
–
show
|
|||
522 | if val == 'c': |
||
523 | self._features = ['cnmr'] |
||
524 | elif val == 'h': |
||
525 | self._features = ['hnmr'] |
||
526 | else: |
||
527 | raise NotImplementedError('Feature {} not implemented'.format(val)) |
||
528 | |||
529 | 1 | def _parse_outfile(self, outfile): |
|
530 | n_mols = self.monitor_progress(outfile) |
||
531 | res = nanarray((n_mols, self.max_atoms, 1)) |
||
532 | regex = re.compile(b'\((-?\d+.\d+),\d+,[A-Z],<([0-9\,]+)>\)\r\n') |
||
0 ignored issues
–
show
A suspicious escape sequence
\( was found. Did you maybe forget to add an r prefix?
Escape sequences in Python are generally interpreted according to rules similar
to standard C. Only if strings are prefixed with The escape sequence that was used indicates that you might have intended to write a regular expression. Learn more about the available escape sequences. in the Python documentation. ![]() A suspicious escape sequence
\d was found. Did you maybe forget to add an r prefix?
Escape sequences in Python are generally interpreted according to rules similar
to standard C. Only if strings are prefixed with The escape sequence that was used indicates that you might have intended to write a regular expression. Learn more about the available escape sequences. in the Python documentation. ![]() A suspicious escape sequence
\, was found. Did you maybe forget to add an r prefix?
Escape sequences in Python are generally interpreted according to rules similar
to standard C. Only if strings are prefixed with The escape sequence that was used indicates that you might have intended to write a regular expression. Learn more about the available escape sequences. in the Python documentation. ![]() A suspicious escape sequence
\) was found. Did you maybe forget to add an r prefix?
Escape sequences in Python are generally interpreted according to rules similar
to standard C. Only if strings are prefixed with The escape sequence that was used indicates that you might have intended to write a regular expression. Learn more about the available escape sequences. in the Python documentation. ![]() |
|||
533 | |||
534 | mol_idx = 0 |
||
535 | |||
536 | with open(outfile, 'rb') as f: |
||
0 ignored issues
–
show
The name
f does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
537 | # loop through the file - inner loop will also advance the pointer |
||
538 | for l in f: |
||
0 ignored issues
–
show
The name
l does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
539 | if l == b'##PEAKASSIGNMENTS=(XYMA)\r\n': |
||
540 | for row in f: |
||
541 | if row == b'##END=\r\n': |
||
542 | break |
||
543 | else: |
||
544 | LOGGER.debug('Row to parse: %s', row) |
||
545 | shift, idxs = regex.match(row).groups() |
||
546 | shift = float(shift) |
||
547 | idxs = [int(idx) for idx in idxs.split(b',')] |
||
548 | for atom_idx in idxs: |
||
549 | res[mol_idx, atom_idx] = shift |
||
550 | mol_idx += 1 |
||
551 | res = pd.Panel(res) |
||
552 | return res |
||
553 | |||
554 | 1 | def transform(self, inp): |
|
555 | return super(ChemAxonNMRPredictor, self).transform(inp).T |
||
556 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.