| Conditions | 17 |
| Total Lines | 83 |
| Lines | 0 |
| Ratio | 0 % |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like read_sdf() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | #! /usr/bin/env python |
||
| 31 | def read_sdf(sdf, error_bad_mol=False, warn_bad_mol=True, nmols=None, |
||
| 32 | skipmols=None, skipfooter=None, read_props=True, mol_props=False, |
||
| 33 | *args, **kwargs): |
||
| 34 | |||
| 35 | """ |
||
| 36 | Read an sdf file into a pandas dataframe. |
||
| 37 | The function wraps the RDKit ForwardSDMolSupplier object. |
||
| 38 | |||
| 39 | @param sdf A file path provided as a :str:, or a :file-like: |
||
| 40 | object. |
||
| 41 | @param error_bad_mol A :bool: specifying if an error should be raised if |
||
| 42 | a molecule fails to parse. |
||
| 43 | @param warn_bad_mol A :bool: specifying if a warning should be output |
||
| 44 | if a molecule fails to parse. |
||
| 45 | @param nmols An :int: specifying number of molecules to read. |
||
| 46 | If none, read all molecules. |
||
| 47 | @param skipmols An :int: specifying number of molecules to skip at |
||
| 48 | start. |
||
| 49 | @param skipfooter An :int: specifying number of molecules to skip |
||
| 50 | from the end. |
||
| 51 | @param mol_props A :bool: specifying whether to keep properties in |
||
| 52 | the molecule dictionary. |
||
| 53 | Additionally, ForwardSDMolSupplier arguments will be passed. |
||
| 54 | |||
| 55 | @returns df A dataframe of type :pandas.core.frame.DataFrame:. |
||
| 56 | |||
| 57 | """ |
||
| 58 | |||
| 59 | # nmols is actually the index to cutoff. If we skip some at start, we need |
||
| 60 | # to add this number |
||
| 61 | if skipmols: |
||
| 62 | nmols += skipmols |
||
| 63 | |||
| 64 | if isinstance(sdf, str): |
||
| 65 | sdf = open(sdf, 'rb') # use read bytes for python 3 compatibility |
||
| 66 | |||
| 67 | # use the suppression context manager to not pollute our stdout with rdkit |
||
| 68 | # errors and warnings. |
||
| 69 | # perhaps this should be captured better by Mol etc. |
||
| 70 | with Suppressor(): |
||
| 71 | |||
| 72 | mol_supp = Chem.ForwardSDMolSupplier(sdf, *args, **kwargs) |
||
| 73 | |||
| 74 | mols = [] |
||
| 75 | |||
| 76 | # single loop through sdf |
||
| 77 | for i, mol in enumerate(mol_supp): |
||
| 78 | |||
| 79 | if skipmols and i < skipmols: |
||
| 80 | continue |
||
| 81 | |||
| 82 | if nmols and i >= nmols: |
||
| 83 | break |
||
| 84 | |||
| 85 | # rdkit returns None if it fails to parse a molecule. We will raise |
||
| 86 | # errors unless force is used. |
||
| 87 | if mol is None: |
||
| 88 | msg = 'Molecule {} could not be decoded.'.format(i + 1) |
||
| 89 | if error_bad_mol: |
||
| 90 | raise ValueError(msg) |
||
| 91 | elif warn_bad_mol: |
||
| 92 | warnings.warn(msg) |
||
| 93 | continue |
||
| 94 | |||
| 95 | mols.append(skchem.Mol(mol)) |
||
| 96 | |||
| 97 | |||
| 98 | if skipfooter: |
||
| 99 | mols = mols[:-skipfooter] |
||
| 100 | |||
| 101 | idx = pd.Index((m.name for m in mols), name='name') |
||
| 102 | data = pd.DataFrame(mols, columns=['structure']) |
||
| 103 | |||
| 104 | if read_props: |
||
| 105 | props = pd.DataFrame([mol.props for mol in mols]) |
||
| 106 | data = pd.concat([data, props], axis=1) |
||
| 107 | |||
| 108 | # now we have extracted the props, we can delete if required |
||
| 109 | if not mol_props: |
||
| 110 | data.apply(_drop_props, axis=1) |
||
| 111 | |||
| 112 | data.index = idx |
||
| 113 | return data |
||
| 114 | |||
| 172 |