| 1 |  |  | #! /usr/bin/env python | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | # Copyright (C) 2016 Rich Lewis <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | # License: 3-clause BSD | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 | 1 |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | ## skchem.descriptors.atom | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | Module specifying atom based descriptor generators. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 | 1 |  | import logging | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 | 1 |  | import subprocess | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 | 1 |  | import re | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 | 1 |  | from abc import ABCMeta | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 | 1 |  | import pandas as pd | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 | 1 |  | import numpy as np | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 | 1 |  | from ..utils import line_count, nanarray | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 | 1 |  | from ..base import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     CLIWrapper, Transformer, AtomTransformer, BatchTransformer, Featurizer | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  | ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 | 1 |  | LOGGER = logging.getLogger(__file__) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  | # TODO: fix averagemicrospeciescharge | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  | # TODO: fix logd logp logs | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  | # TODO: oen (orbital electronegativity) - sigma + pi | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  | # TODO: water accessible surface area | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  | #  TODO: these don't produce csv | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  | # ['doublebondstereoisomers', 'conformers', 'stereoisomers', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  | # 'moleculardynamics', 'stereoanalysis', 'lowestenergyconformer', 'msdistr2', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  | # 'conformations', 'dominanttautomerdistribution', 'hnmr', 'moldyn', 'cnmr', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  | # 'frameworks', 'microspeciesdistribution', 'nmr', 'leconformer', 'msdistr', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  | # 'tetrahedralstereoisomers'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  | # | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 | 1 |  | CHEMAXON_HINT = """ Install ChemAxon from https://www.chemaxon.com. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  | It requires a license, which can be freely obtained for academics. """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 | 1 |  | class ChemAxonBaseFeaturizer(CLIWrapper, Featurizer): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     """ Base class for ChemAxonFeaturizers (using cxcalc). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     Concrete subclasses inheriting from this should override | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     `_all_feats`, `_optimal_features`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 | 1 |  |     __metaclass__ = ABCMeta | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 | 1 |  |     install_hint = CHEMAXON_HINT | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 | 1 |  |     _feat_columns = {'averagepol': ['a_avg'], 'name': ['preferred_iupac_name'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |                      'aromaticbondcount': ['aromatic_bond_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |                      'maximalprojectionradius': ['maximal_projection_radius'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |                      'tpolarizability': ['a_avg', 'a_xx', 'a_yy', 'a_zz'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |                      'distance': ['distance'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |                      'acceptor': ['acceptor_count', 'acceptor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |                      'fusedringcount': ['fused_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |                      'charge': ['total_charge'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |                      'donor': ['donor_count', 'donor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |                      'ringcount': ['ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |                      'chainbond': ['chain_bond'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |                      'mmff94energy': ['mmff94_energy'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |                      'huckel': ['aromatic_e+/nu-_order', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |                                 'localization_energy_l_+/l-', 'pi_energy', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |                                 'electron_density', 'charge_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |                      'chainatom': ['chain_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |                      'shortestpath': ['shortest_path'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |                      'resonantcount': ['resonant_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |                      'tpol': ['a_avg', 'a_xx', 'a_yy', 'a_zz'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |                      'moststabletautomer': ['most_stable_tautomer'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |                      'generictautomer': ['generictautomer'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |                      'hmoelectrophilicityorder': ['hmo_aromatic_e+_order'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |                      'ringsystemcountofsize': ['ring_system_count_of_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |                      'largestatomringsize': ['largest_ring_size_of_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |                      'tetrahedralstereoisomercount': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |                          'tetrahedral_stereoisomer_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |                      'enumerations': ['enumerations'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |                      'ringatom': ['ring_atom'], 'connected': ['connected'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |                      'hmolocalizationenergy': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |                          'hmo_localization_energy_l+/l-'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |                      'averagemolecularpolarizability': ['a_avg'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |                      'donorsitecount': ['donor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |                      'donorcount': ['donor_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |                      'asymmetricatom': ['asymmetric_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |                      'pienergy': ['pi_energy'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |                      'bondcount': ['bond_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |                      'chiralcenters': ['chiral_centers'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |                      'hmohuckel': ['hmo_aromatic_e+/nu-_order', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |                                    'hmo_localization_energy_l+/l-', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |                                    'hmo_pi_energy', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |                                    'hmo_electron_density', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |                                    'hmo_charge_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |                      'huckeleigenvector': ['eigenvector'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |                      'ringcountofsize': ['ring_count_of_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |                      'heteroaliphaticringcount': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |                          'heteroaliphatic_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |                      'markushenumerations': ['enumerations'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |                      'minimalprojectionradius': ['minimal_projection_radius'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |                      'dipole': ['dipole'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |                      'balabanindex': ['balaban_index'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |                      'aromaticnucleophilicityorder': ['aromatic_nu-_order'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |                      'tautomercount': ['tautomer_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |                      'cyclomaticnumber': ['cyclomatic_number'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                      'psa': ['polar_surface_area'], 'isoelectricpoint': ['pi'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |                      'hmopienergy': ['hmo_pi_energy'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |                      'ayypol': ['a_yy'], 'fragmentcount': ['fragment_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |                      'acceptormultiplicity': ['acceptor_multiplicity'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |                      'topologyanalysistable': ['atom_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |                                                'aliphatic_atom_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |                                                'aromatic_atom_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |                                                'bond_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |                                                'aliphatic_bond_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |                                                'aromatic_bond_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |                                                'rotatable_bond_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |                                                'ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |                                                'aliphatic_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |                                                'aromatic_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |                                                'hetero_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |                                                'heteroaliphatic_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |                                                'heteroaromatic_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |                                                'ring_atom_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |                                                'ring_bond_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |                                                'chain_atom_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |                                                'chain_bond_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |                                                'smallest_ring_size', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |                                                'largest_ring_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |                      'ioncharge': ['charge'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |                      'asymmetricatoms': ['asymmetric_atoms'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |                      'wateraccessiblesurfacearea': ['asa', 'asa+', 'asa-', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |                                                     'asa_h', 'asa_p'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |                      'avgpol': ['a_avg'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |                      'carboaliphaticringcount': ['carboaliphatic_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |                      'aliphaticringcount': ['aliphatic_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |                      'donormultiplicity': ['donor_multiplicity'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |                      'minimalprojectionarea': ['minimal_projection_area'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |                      'nucleophiliclocalizationenergy': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |                          'localization_energy_l-'], 'dihedral': ['dihedral'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |                      'heteroringcount': ['hetero_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |                      'azzpol': ['a_zz'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |                      'molecularsurfacearea': ['van_der_waals_surface_area_3d'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |                      'hmonucleophiliclocalizationenergy': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |                          'hmo_localization_energy_l-'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |                      'chargedistribution': ['charge_distribution'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |                      'pol': ['a_mol', 'a_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |                      'hmoelectrondensity': ['hmo_electron_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |                      'carboaromaticringcount': ['carboaromatic_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |                      'acceptorsitecount': ['acceptor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |                      'markushenumerationcount': ['markush_library_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |                      'localizationenergy': ['localization_energy_l+/l-'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |                      'hararyindex': ['harary_index'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |                      'asa': ['asa', 'asa+', 'asa-', 'asa_h', 'asa_p'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |                      'acc': ['acc'], 'majortautomer': ['major_tautomer'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |                      'majormicrospecies': ['major-ms'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |                      'aliphaticatomcount': ['aliphatic_atom_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |                      'angle': ['angle'], 'huckeleigenvalue': ['eigenvalue'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |                      'axxpol': ['a_xx'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |                      'chiralcenter': ['chiral_center'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |                      'aliphaticbondcount': ['aliphatic_bond_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |                      'smallestatomringsize': ['smallest_ring_size_of_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |                      'dreidingenergy': ['dreiding_energy'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |                      'maximalprojectionsize': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |                          'length_perpendicular_to_the_max_area'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |                      'largestringsystemsize': ['largest_ring_system_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |                      'accsitecount': ['acceptor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |                      'refractivity': ['refractivity'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |                      'bondtype': ['bond_type'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |                      'chargedensity': ['charge_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |                      'resonants': ['resonants'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |                      'aromaticatomcount': ['aromatic_atom_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |                      'distancedegree': ['distance_degree'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |                      'hasvalidconformer': ['has_valid_conformer'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |                      'electrondensity': ['electron_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |                      'asymmetricatomcount': ['asymmetric_atom_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |                      'fsp3': ['fsp3'], 'don': ['don'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |                      'fusedaliphaticringcount': ['fused_aliphatic_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |                      'pkat': ['pkat'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |                      'fusedaromaticringcount': ['fused_aromatic_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |                      'majorms2': ['majorms2'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |                      'maximalprojectionarea': ['maximal_projection_area'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |                      'hbonddonoracceptor': ['acceptor_count', 'donor_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |                                             'acceptor_site_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |                                             'donor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |                      'acceptorcount': ['acceptor_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |                      'molecularpolarizability': ['a_mol'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |                      'huckeltable': ['aromatic_e+/nu-_order', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |                                      'localization_energy_l+/l-', 'pi_energy', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |                                      'electron_density', 'charge_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |                      'rotatablebondcount': ['rotatable_bond_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |                      'minimalprojectionsize': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |                          'length_perpendicular_to_the_min_area'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |                      'polarizability': ['a_mol', 'a_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |                      'acceptortable': ['acceptor_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |                                        'acceptor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |                      'aliphaticringcountofsize': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |                          'aliphatic_ring_count_of_size'], 'hlb': ['hlb'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |                      'eccentricity': ['eccentricity'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |                      'hmochargedensity': ['hmo_charge_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |                      'hmohuckeleigenvalue': ['hmo_eigenvalue'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |                      'totalchargedensity': ['total_charge_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |                      'hmonucleophilicityorder': ['hmo_aromatic_nu-_order'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |                      'aromaticringcountofsize': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |                          'aromatic_ring_count_of_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |                      'electrophilicityorder': ['aromatic_e+_order'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |                      'connectedgraph': ['connected_graph'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |                      'plattindex': ['platt_index'], 'logp': ['logp'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |                      'topanal': ['atom_count', 'aliphatic_atom_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |                                  'aromatic_atom_count', 'bond_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |                                  'aliphatic_bond_count', 'aromatic_bond_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |                                  'rotatable_bond_count', 'ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  |                                  'aliphatic_ring_count', 'aromatic_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  |                                  'hetero_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 |  |  |                                  'heteroaliphatic_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 |  |  |                                  'heteroaromatic_ring_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 |  |  |                                  'ring_atom_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  |                                  'ring_bond_count', 'chain_atom_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 |  |  |                                  'chain_bond_count', 'smallest_ring_size', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  |                                  'largest_ring_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  |                      'logdcalculator': ['ph=0', 'ph=1', 'ph=2', 'ph=3', 'ph=4', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  |                                         'ph=5', 'ph=6', 'ph=7', 'ph=8', 'ph=9', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 |  |  |                                         'ph=10', 'ph=11', 'ph=12', 'ph=13', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  |                                         'ph=14', 'unnamed:_16'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 227 |  |  |                      'logs': ['ph=0.0', 'ph=1.0', 'ph=2.0', 'ph=3.0', 'ph=4.0', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 228 |  |  |                               'ph=5.0', 'ph=6.0', 'ph=7.0', 'ph=8.0', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 229 |  |  |                               'ph=9.0', 'ph=10.0', 'ph=11.0', 'ph=12.0', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 230 |  |  |                               'ph=13.0', 'ph=14.0', 'unnamed:_16'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 231 |  |  |                      'atompol': ['a_atom'], 'canonicalresonant': ['structure'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 232 |  |  |                      'ringbond': ['ring_bond'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 233 |  |  |                      'ringatomcount': ['ring_atom_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 234 |  |  |                      'donortable': ['donor_count', 'donor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 235 |  |  |                      'randicindex': ['randic_index'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 236 |  |  |                      'rotatablebond': ['rotatable_bond'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 237 |  |  |                      'hyperwienerindex': ['hyper_wiener_index'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 238 |  |  |                      'hmohuckeleigenvector': ['hmo_eigenvector'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 239 |  |  |                      'carboringcount': ['carbo_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 240 |  |  |                      'logpcalculator': ['logp', 'unnamed:_2'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 241 |  |  |                      'ringsystemcount': ['ring_system_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 242 |  |  |                      'largestringsize': ['largest_ring_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 243 |  |  |                      'stereodoublebondcount': ['stereo_double_bond_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 244 |  |  |                      'pi': ['pi'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 |  |  |                      'stericeffectindex': ['steric_effect_index'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  |                      'volume': ['van_der_waals_volume'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 247 |  |  |                      'averagemicrospeciescharge': ['charge'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 248 |  |  |                      'pka': ['apka1', 'apka2', 'bpka1', 'bpka2', 'atoms'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 249 |  |  |                      'hmohuckeltable': ['hmo_aromatic_e+/nu-_order', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 250 |  |  |                                         'hmo_localization_energy_l+/l-', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 251 |  |  |                                         'hmo_pi_energy', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 252 |  |  |                                         'hmo_electron_density', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 253 |  |  |                                         'hmo_charge_density'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 254 |  |  |                      'ringcountofatom': ['ring_count_of_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 255 |  |  |                      'aromaticelectrophilicityorder': ['aromatic_e+_order'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 256 |  |  |                      'hindrance': ['steric_hindrance'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 257 |  |  |                      'chainatomcount': ['chain_atom_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 258 |  |  |                      'pkacalculator': ['apka1', 'apka2', 'bpka1', 'bpka2', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 259 |  |  |                                        'atoms'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 260 |  |  |                      'heteroaromaticringcount': ['heteroaromatic_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 261 |  |  |                      'sterichindrance': ['steric_hindrance'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 |  |  |                      'hbda': ['acceptor_count', 'donor_count', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 |  |  |                               'acceptor_site_count', 'donor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 |  |  |                      'molpol': ['a_mol'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 |  |  |                      'atomicpolarizability': ['a_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 |  |  |                      'msdon': ['ph=0.00', 'ph=1.00', 'ph=2.00', 'ph=3.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 |  |  |                                'ph=4.00', 'ph=5.00', 'ph=6.00', 'ph=7.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 |  |  |                                'ph=8.00', 'ph=9.00', 'ph=10.00', 'ph=11.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 |  |  |                                'ph=12.00', 'ph=13.00', 'ph=14.00'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 |  |  |                      'enumerationcount': ['markush_library_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 |  |  |                      'vdwsa': ['van_der_waals_surface_area_3d'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |                      'orbitalelectronegativity': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 |  |  |                          'sigma_orbital_electronegativity', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 |  |  |                          'pi_orbital_electronegativity'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  |                      'hmoelectrophiliclocalizationenergy': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 |  |  |                          'hmo_localization_energy_l+'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 |  |  |                      'smallestringsize': ['smallest_ring_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  |                      'szegedindex': ['szeged_index'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 |  |  |                      'nucleophilicityorder': ['aromatic_nu-_order'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 |  |  |                      'canonicaltautomer': ['canonical_tautomer'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 |  |  |                      'stereoisomercount': ['stereoisomer_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 |  |  |                      'msa': ['van_der_waals_surface_area_3d'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 |  |  |                      'donsitecount': ['donor_site_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 |  |  |                      'randommarkushenumerations': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 |  |  |                          'randommarkushenumerations'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |                      'wienerindex': ['wiener_index'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 287 |  |  |                      'huckelorbitals': ['orbitals'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 288 |  |  |                      'doublebondstereoisomercount': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 289 |  |  |                          'double_bond_stereoisomer_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 |  |  |                      'tautomers': ['tautomers'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 |  |  |                      'polarsurfacearea': ['polar_surface_area'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |                      'chiralcentercount': ['chiral_center_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 |  |  |                      'electrophiliclocalizationenergy': [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 |  |  |                          'localization_energy_l+'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  |                      'aliphaticatom': ['aliphatic_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 |  |  |                      'ringbondcount': ['ring_bond_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 |  |  |                      'wienerpolarity': ['wiener_polarity'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  |                      'msacc': ['ph=0.00', 'ph=1.00', 'ph=2.00', 'ph=3.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 |  |  |                                'ph=4.00', 'ph=5.00', 'ph=6.00', 'ph=7.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 |  |  |                                'ph=8.00', 'ph=9.00', 'ph=10.00', 'ph=11.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 |  |  |                                'ph=12.00', 'ph=13.00', 'ph=14.00'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 |  |  |                      'formalcharge': ['formal_charge'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 |  |  |                      'smallestringsystemsize': ['smallest_ring_system_size'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 |  |  |                      'majorms': ['major-ms'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 |  |  |                      'tholepolarizability': ['a_avg', 'a_xx', 'a_yy', 'a_zz'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  |                      'aromaticatom': ['aromatic_atom'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  |                      'oen': ['sigma_orbital_electronegativity', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 308 |  |  |                              'pi_orbital_electronegativity'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 309 |  |  |                      'chainbondcount': ['chain_bond_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 310 |  |  |                      'logd': ['ph=0.00', 'ph=1.00', 'ph=2.00', 'ph=3.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 311 |  |  |                               'ph=4.00', 'ph=5.00', 'ph=6.00', 'ph=7.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 312 |  |  |                               'ph=8.00', 'ph=9.00', 'ph=10.00', 'ph=11.00', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 313 |  |  |                               'ph=12.00', 'ph=13.00', 'ph=14.00'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 314 |  |  |                      'hmohuckelorbitals': ['hmo_orbitals'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 315 |  |  |                      'aromaticringcount': ['aromatic_ring_count'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 316 |  |  |                      'pichargedensity': ['pi_charge_density']} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 317 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 318 | 1 |  |     _optimal_feats = []  # override this | 
            
                                                                                                            
                            
            
                                    
            
            
                | 319 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 320 | 1 |  |     def __init__(self, features='optimal', verbose=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 321 |  |  |         self._features = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 322 |  |  |         super(ChemAxonBaseFeaturizer, self).__init__(verbose=verbose) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 323 |  |  |         self.features = features | 
            
                                                                                                            
                            
            
                                    
            
            
                | 324 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 325 | 1 |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 326 |  |  |     def features(self): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 327 |  |  |         return self._features | 
            
                                                                                                            
                            
            
                                    
            
            
                | 328 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 329 | 1 |  |     @features.setter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 330 |  |  |     def features(self, features): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 331 |  |  |         if features in ('optimal', 'all'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 332 |  |  |             self._features = self._optimal_feats | 
            
                                                                                                            
                            
            
                                    
            
            
                | 333 |  |  |         elif isinstance(features, str): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 334 |  |  |             self.features = [features] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 335 |  |  |         elif isinstance(features, (list, tuple)): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 336 |  |  |             valid = np.array([feat in self._feat_columns.keys() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 337 |  |  |                               for feat in features]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 338 |  |  |             if not all(valid): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 339 |  |  |                 msg = 'Descriptor "{}" not available.'.format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 340 |  |  |                     np.array(features)[~valid]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 341 |  |  |                 raise NotImplementedError(msg) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 342 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 343 |  |  |                 self._features = list(features) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 344 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 345 |  |  |             raise NotImplementedError('Feature set {} not available.'.format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 346 |  |  |                 features)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 347 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 348 | 1 |  |     def _feature_index(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 349 |  |  |         return pd.Index(sum((self._feat_columns[feat] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 350 |  |  |                              for feat in self.features), []), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 351 |  |  |                         name='features') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 352 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 353 | 1 |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 354 |  |  |     def validate_install(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 355 |  |  |         try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 356 |  |  |             return 0 == subprocess.call(['cxcalc'], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 357 |  |  |                                         stderr=subprocess.DEVNULL, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 358 |  |  |                                         stdout=subprocess.DEVNULL) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 359 |  |  |         except FileNotFoundError: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 360 |  |  |             return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 361 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 362 | 1 |  |     def monitor_progress(self, filename): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 363 |  |  |         res = line_count(filename) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 364 |  |  |         return res - 1 if res else 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 365 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 366 | 1 |  |     def _cli_args(self, infile, outfile): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 367 |  |  |         return ['cxcalc', infile, '-o', outfile] + self.features | 
            
                                                                                                            
                            
            
                                    
            
            
                | 368 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 369 | 1 |  |     def _parse_outfile(self, outfile): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 370 |  |  |         res = pd.read_table(outfile, engine='python').drop('id', axis=1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 371 |  |  |         return res | 
            
                                                                                                            
                            
            
                                    
            
            
                | 372 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 373 | 1 |  |     def _parse_errors(self, errs): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 374 |  |  |         LOGGER.debug('stderr: %s', errs) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 375 |  |  |         return []  # instances are not skipped ever, so don't return anything | 
            
                                                                                                            
                            
            
                                    
            
            
                | 376 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 377 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 378 | 1 |  | class ChemAxonFeaturizer(ChemAxonBaseFeaturizer, BatchTransformer, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 379 |  |  |                          Transformer): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 380 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 381 | 1 |  |     _optimal_feats = ['acceptorcount', 'accsitecount', 'aliphaticatomcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 382 |  |  |                       'aliphaticbondcount', 'aliphaticringcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 383 |  |  |                       'aromaticatomcount', 'aromaticbondcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 384 |  |  |                       'aromaticringcount', 'asymmetricatomcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 385 |  |  |                       'averagemolecularpolarizability', 'axxpol', 'ayypol', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 386 |  |  |                       'azzpol', 'balabanindex', 'bondcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 387 |  |  |                       'carboaliphaticringcount', 'carboaromaticringcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 388 |  |  |                       'carboringcount', 'chainatomcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 389 |  |  |                       'chainbondcount', 'chiralcentercount', 'connectedgraph', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 390 |  |  |                       'cyclomaticnumber', 'dipole', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 391 |  |  |                       'donorcount', 'donorsitecount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 392 |  |  |                       'doublebondstereoisomercount', 'dreidingenergy', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 393 |  |  |                       'formalcharge', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 394 |  |  |                       'fragmentcount', 'fsp3', 'fusedaliphaticringcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 395 |  |  |                       'fusedaromaticringcount', 'fusedringcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 396 |  |  |                       'hararyindex', 'heteroaliphaticringcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 397 |  |  |                       'heteroaromaticringcount', 'heteroringcount', 'hlb', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 398 |  |  |                       'hmopienergy', 'hyperwienerindex', 'largestringsize', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 399 |  |  |                       'largestringsystemsize', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 400 |  |  |                       'markushenumerationcount', 'maximalprojectionarea', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 401 |  |  |                       'maximalprojectionradius', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 402 |  |  |                       'maximalprojectionsize', 'minimalprojectionarea', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 403 |  |  |                       'minimalprojectionradius', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 404 |  |  |                       'minimalprojectionsize', 'mmff94energy', 'molpol', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 405 |  |  |                       'pienergy', 'plattindex', 'psa', 'randicindex', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 406 |  |  |                       'refractivity', 'resonantcount', 'ringatomcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 407 |  |  |                       'ringbondcount', 'ringcount', 'ringsystemcount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 408 |  |  |                       'rotatablebondcount', 'smallestringsize', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 409 |  |  |                       'smallestringsystemsize', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 410 |  |  |                       'stereodoublebondcount', 'stereoisomercount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 411 |  |  |                       'szegedindex', 'tetrahedralstereoisomercount', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 412 |  |  |                       'vdwsa', 'volume', 'wateraccessiblesurfacearea', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 413 |  |  |                       'wienerindex', 'wienerpolarity'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 414 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 415 | 1 |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 416 |  |  |     def name(self): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 417 |  |  |         return 'cx_mol' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 418 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 419 | 1 |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 420 |  |  |     def columns(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 421 |  |  |         return self._feature_index() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 422 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 423 | 1 |  |     def _parse_outfile(self, outfile): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 424 |  |  |         res = super(ChemAxonFeaturizer, self)._parse_outfile(outfile) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 425 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 426 |  |  |         def fix_failed(inp): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 427 |  |  |             if isinstance(inp, str) and 'FAILED' in inp: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 428 |  |  |                 return np.nan | 
            
                                                                                                            
                            
            
                                    
            
            
                | 429 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 430 |  |  |                 return float(inp) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 431 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 432 |  |  |         return res.applymap(fix_failed) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 433 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 434 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 435 | 1 |  | class ChemAxonAtomFeaturizer(ChemAxonBaseFeaturizer, AtomTransformer, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 436 |  |  |                              BatchTransformer): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 437 | 1 |  |     _optimal_feats = ['acceptormultiplicity', 'aliphaticatom', 'aromaticatom', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 438 |  |  |                       'aromaticelectrophilicityorder', 'asymmetricatom', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 439 |  |  |                       'atomicpolarizability', 'chainatom', 'chargedensity', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 440 |  |  |                       'chiralcenter', 'distancedegree', 'donormultiplicity', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 441 |  |  |                       'eccentricity', 'electrondensity', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 442 |  |  |                       'electrophiliclocalizationenergy', 'hindrance', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 443 |  |  |                       'hmochargedensity', 'hmoelectrondensity', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 444 |  |  |                       'hmoelectrophilicityorder', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 445 |  |  |                       'hmoelectrophiliclocalizationenergy', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 446 |  |  |                       'hmonucleophilicityorder', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 447 |  |  |                       'hmonucleophiliclocalizationenergy', 'ioncharge', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 448 |  |  |                       'largestatomringsize', 'nucleophilicityorder', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 449 |  |  |                       'nucleophiliclocalizationenergy', 'oen', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 450 |  |  |                       'pichargedensity', 'ringatom', 'ringcountofatom', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 451 |  |  |                       'stericeffectindex', 'totalchargedensity'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 452 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 453 | 1 |  |     _h_inc_feats = ['acc', 'atomicpolarizability', 'charge', 'distancedegree', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 454 |  |  |                     'don', 'eccentricity', 'hindrance', 'largestatomringsize', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 455 |  |  |                     'oen', 'ringcountofatom', 'smallestatomringsize', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 456 |  |  |                     'stericeffectindex'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 457 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 458 | 1 |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 459 |  |  |     def name(self): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 460 |  |  |         return 'cx_atom' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 461 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 462 | 1 |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 463 |  |  |     def minor_axis(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 464 |  |  |         return self._feature_index() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 465 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 466 | 1 |  |     def _transform_atom(self, atom): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 467 |  |  |         raise NotImplementedError('Cannot calculate per atom with ChemAxon') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 468 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 469 | 1 |  |     def _parse_outfile(self, outfile): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 470 |  |  |         res = super(ChemAxonAtomFeaturizer, self)._parse_outfile(outfile) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 471 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 472 |  |  |         def parse_string(s): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 473 |  |  |             if s == '': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 474 |  |  |                 return np.nan | 
            
                                                                                                            
                            
            
                                    
            
            
                | 475 |  |  |             elif s == 'false': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 476 |  |  |                 return 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 477 |  |  |             elif s == 'true': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 478 |  |  |                 return 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 479 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 480 |  |  |                 try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 481 |  |  |                     return float(s) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 482 |  |  |                 except ValueError: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 483 |  |  |                     return np.nan | 
            
                                                                                                            
                            
            
                                    
            
            
                | 484 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 485 |  |  |         def to_padded(s): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 486 |  |  |             inner_res = np.repeat(np.nan, self.max_atoms) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 487 |  |  |             ans = np.array([parse_string(i) for i in str(s).split(';')]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 488 |  |  |             inner_res[:len(ans)] = ans | 
            
                                                                                                            
                            
            
                                    
            
            
                | 489 |  |  |             return inner_res | 
            
                                                                                                            
                            
            
                                    
            
            
                | 490 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 491 |  |  |         res = res.applymap(to_padded) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 492 |  |  |         return pd.Panel(res.values.tolist()).swapaxes(1, 2) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 493 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 494 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 495 | 1 |  | class ChemAxonNMRPredictor(ChemAxonBaseFeaturizer, BatchTransformer, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                            
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 496 |  |  |                            AtomTransformer): | 
            
                                                        
            
                                    
            
            
                | 497 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 498 | 1 |  |     _feat_columns = {'cnmr': ['cnmr'], 'hnmr': ['hnmr']} | 
            
                                                        
            
                                    
            
            
                | 499 | 1 |  |     _optimal_feats = ['cnmr'] | 
            
                                                        
            
                                    
            
            
                | 500 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 501 | 1 |  |     @property | 
            
                                                        
            
                                    
            
            
                | 502 |  |  |     def name(self): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 503 |  |  |         return 'cx_nmr' | 
            
                                                        
            
                                    
            
            
                | 504 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 505 | 1 |  |     def _transform_atom(self, atom): | 
            
                                                        
            
                                    
            
            
                | 506 |  |  |         raise NotImplementedError('ChemAxon cannot predict for atoms.') | 
            
                                                        
            
                                    
            
            
                | 507 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 508 | 1 |  |     def monitor_progress(self, filename): | 
            
                                                        
            
                                    
            
            
                | 509 |  |  |         return sum(1 for l in open(filename, 'rb') | 
            
                                                        
            
                                    
            
            
                | 510 |  |  |                    if l == b'##PEAKASSIGNMENTS=(XYMA)\r\n') | 
            
                                                        
            
                                    
            
            
                | 511 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 512 | 1 |  |     @property | 
            
                                                        
            
                                    
            
            
                | 513 |  |  |     def minor_axis(self): | 
            
                                                        
            
                                    
            
            
                | 514 |  |  |         return pd.Index(self.features, name='shift') | 
            
                                                        
            
                                    
            
            
                | 515 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 516 | 1 |  |     @property | 
            
                                                        
            
                                    
            
            
                | 517 |  |  |     def features(self): | 
            
                                                        
            
                                    
            
            
                | 518 |  |  |         return self._features | 
            
                                                        
            
                                    
            
            
                | 519 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 520 | 1 |  |     @features.setter | 
            
                                                        
            
                                    
            
            
                | 521 |  |  |     def features(self, val): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 522 |  |  |         if val == 'c': | 
            
                                                        
            
                                    
            
            
                | 523 |  |  |             self._features = ['cnmr'] | 
            
                                                        
            
                                    
            
            
                | 524 |  |  |         elif val == 'h': | 
            
                                                        
            
                                    
            
            
                | 525 |  |  |             self._features = ['hnmr'] | 
            
                                                        
            
                                    
            
            
                | 526 |  |  |         else: | 
            
                                                        
            
                                    
            
            
                | 527 |  |  |             raise NotImplementedError('Feature {} not implemented'.format(val)) | 
            
                                                        
            
                                    
            
            
                | 528 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 529 | 1 |  |     def _parse_outfile(self, outfile): | 
            
                                                        
            
                                    
            
            
                | 530 |  |  |         n_mols = self.monitor_progress(outfile) | 
            
                                                        
            
                                    
            
            
                | 531 |  |  |         res = nanarray((n_mols, self.max_atoms, 1)) | 
            
                                                        
            
                                    
            
            
                | 532 |  |  |         regex = re.compile(b'\((-?\d+.\d+),\d+,[A-Z],<([0-9\,]+)>\)\r\n') | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                            
                                                                                            
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 533 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 534 |  |  |         mol_idx = 0 | 
            
                                                        
            
                                    
            
            
                | 535 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 536 |  |  |         with open(outfile, 'rb') as f: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 537 |  |  |             # loop through the file - inner loop will also advance the pointer | 
            
                                                        
            
                                    
            
            
                | 538 |  |  |             for l in f: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 539 |  |  |                 if l == b'##PEAKASSIGNMENTS=(XYMA)\r\n': | 
            
                                                        
            
                                    
            
            
                | 540 |  |  |                     for row in f: | 
            
                                                        
            
                                    
            
            
                | 541 |  |  |                         if row == b'##END=\r\n': | 
            
                                                        
            
                                    
            
            
                | 542 |  |  |                             break | 
            
                                                        
            
                                    
            
            
                | 543 |  |  |                         else: | 
            
                                                        
            
                                    
            
            
                | 544 |  |  |                             LOGGER.debug('Row to parse: %s', row) | 
            
                                                        
            
                                    
            
            
                | 545 |  |  |                             shift, idxs = regex.match(row).groups() | 
            
                                                        
            
                                    
            
            
                | 546 |  |  |                             shift = float(shift) | 
            
                                                        
            
                                    
            
            
                | 547 |  |  |                             idxs = [int(idx) for idx in idxs.split(b',')] | 
            
                                                        
            
                                    
            
            
                | 548 |  |  |                             for atom_idx in idxs: | 
            
                                                        
            
                                    
            
            
                | 549 |  |  |                                 res[mol_idx, atom_idx] = shift | 
            
                                                        
            
                                    
            
            
                | 550 |  |  |                     mol_idx += 1 | 
            
                                                        
            
                                    
            
            
                | 551 |  |  |         res = pd.Panel(res) | 
            
                                                        
            
                                    
            
            
                | 552 |  |  |         return res | 
            
                                                        
            
                                    
            
            
                | 553 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 554 | 1 |  |     def transform(self, inp): | 
            
                                                        
            
                                    
            
            
                | 555 |  |  |         return super(ChemAxonNMRPredictor, self).transform(inp).T | 
            
                                                        
            
                                    
            
            
                | 556 |  |  |  | 
            
                        
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.pyfiles in your module folders. Make sure that you place one file in each sub-folder.