1 | #! /usr/bin/env python |
||
2 | # |
||
3 | # Copyright (C) 2016 Rich Lewis <[email protected]> |
||
4 | # License: 3-clause BSD |
||
5 | |||
6 | 1 | """ |
|
7 | # skchem.data.converters.chembl |
||
8 | |||
9 | Dataset constructor for ChEMBL |
||
10 | """ |
||
11 | 1 | import logging |
|
12 | 1 | import pandas as pd |
|
0 ignored issues
–
show
|
|||
13 | 1 | import os |
|
14 | |||
15 | 1 | from .base import Converter, default_pipeline, contiguous_order, Feature |
|
16 | 1 | from ...cross_validation import SimThresholdSplit |
|
17 | 1 | from ... import features |
|
18 | |||
19 | 1 | LOGGER = logging.getLogger(__name__) |
|
20 | |||
21 | |||
22 | 1 | class ChEMBLConverter(Converter): |
|
23 | |||
24 | """ Converter for the ChEMBL dataset. """ |
||
25 | |||
26 | 1 | def __init__(self, directory, output_directory, output_filename='chembl.h5'): |
|
0 ignored issues
–
show
The
__init__ method of the super-class Converter is not called.
It is generally advisable to initialize the super-class by calling its class SomeParent:
def __init__(self):
self.x = 1
class SomeChild(SomeParent):
def __init__(self):
# Initialize the super class
SomeParent.__init__(self)
![]() |
|||
27 | |||
28 | output_path = os.path.join(output_directory, output_filename) |
||
29 | |||
30 | infile = os.path.join(directory, 'chembl_raw.h5') |
||
31 | ms, y = self.parse_infile(infile) |
||
0 ignored issues
–
show
The name
ms does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() The name
y does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
32 | |||
33 | pipeline = default_pipeline() |
||
34 | |||
35 | ms, y = pipeline.transform_filter(ms, y) |
||
0 ignored issues
–
show
The name
ms does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() The name
y does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
36 | |||
37 | cv = SimThresholdSplit(min_threshold=0.6, n_jobs=-1).fit(ms) |
||
0 ignored issues
–
show
The name
cv does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
38 | train, valid, test = cv.split((70, 15, 15)) |
||
39 | (ms, y, train, valid, test) = contiguous_order((ms, y, train, valid, test), (train, valid, test)) |
||
0 ignored issues
–
show
The name
ms does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() The name
y does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
40 | splits = (('train', train), ('valid', valid), ('test', test)) |
||
41 | |||
42 | feats = ( |
||
43 | Feature(fper=features.MorganFeaturizer(), |
||
0 ignored issues
–
show
|
|||
44 | key='X_morg', |
||
45 | axis_names=['batch', 'features']), |
||
46 | Feature(fper=features.PhysicochemicalFeaturizer(), |
||
0 ignored issues
–
show
|
|||
47 | key='X_pc', |
||
48 | axis_names=['batch', 'features']), |
||
49 | Feature(fper=features.AtomFeaturizer(max_atoms=100), |
||
0 ignored issues
–
show
|
|||
50 | key='A', |
||
51 | axis_names=['batch', 'atom_idx', 'features']), |
||
52 | Feature(fper=features.GraphDistanceTransformer(max_atoms=100), |
||
0 ignored issues
–
show
|
|||
53 | key='G', |
||
54 | axis_names=['batch', 'atom_idx', 'atom_idx']), |
||
55 | Feature(fper=features.SpacialDistanceTransformer(max_atoms=100), |
||
0 ignored issues
–
show
|
|||
56 | key='G_d')) |
||
57 | |||
58 | self.run(ms, y, output_path, features=feats, splits=splits) |
||
59 | |||
60 | |||
61 | 1 | def parse_infile(self, filename): |
|
0 ignored issues
–
show
This method should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. ![]() This method could be written as a function/class method.
If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo:
def some_method(self, x, y):
return x + y;
could be written as class Foo:
@classmethod
def some_method(cls, x, y):
return x + y;
![]() |
|||
62 | |||
63 | ms = pd.read_hdf(filename, 'structure') |
||
0 ignored issues
–
show
The name
ms does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
64 | y = pd.read_hdf(filename, 'targets/Y') |
||
0 ignored issues
–
show
The name
y does not conform to the variable naming conventions ([a-z_][a-z0-9_]{2,30}$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
65 | return ms, y |
||
66 | |||
67 | 1 | if __name__ == '__main__': |
|
68 | logging.basicConfig(level=logging.DEBUG) |
||
69 | LOGGER.info('Converting ChEMBL...') |
||
70 | ChEMBLConverter.convert() |
||
71 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.