Completed
Push — master ( 5b00a3...5fddd3 )
by Rich
14:42
created

sum_van_der_waals_volume()   A

Complexity

Conditions 1

Size

Total Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 13
rs 9.4285
cc 1
1
#! /usr/bin/env python
2
#
3
# Copyright (C) 2016 Rich Lewis <[email protected]>
4
# License: 3-clause BSD
5
6
"""
7
# skchem.features.descriptors.constitutional
8
9
56 Constitutional features for scikit-chem.
10
"""
11
12
from functools import partial
13
14
from rdkit.Chem import rdMolDescriptors, rdmolops, Descriptors
0 ignored issues
show
Configuration introduced by
The import rdkit.Chem could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
15
from .decorators import (
0 ignored issues
show
Configuration introduced by
Unable to import 'decorators' (invalid syntax (<string>, line 107))

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
16
    requires_h_depleted, requires_h_filled, requires_bo_amat)
17
18
19
def molecular_weight(mol):
20
21
    """ The molecular weight.
22
23
    Args:
24
        mol (skchem.Mol):
25
            The molecule for which to calculate the descriptor.
26
27
    Returns:
28
        float
29
    """
30
    return rdMolDescriptors.CalcExactMolWt(mol)
31
32
33
@requires_h_filled
34
def average_molecular_weight(mol):
35
36
    """ Average molecular weight of atoms.
37
38
    Args:
39
        mol (skchem.Mol):
40
            The molecule for which to calculate the descriptor.
41
42
    Returns:
43
        float
44
    """
45
46
    return mol_wt(mol) / len(mol.atoms)
0 ignored issues
show
Comprehensibility Best Practice introduced by
Undefined variable 'mol_wt'
Loading history...
47
48
49
def sum_van_der_waals_volume(mol):
50
51
    """ The sum of the Van der Waals volume.
52
53
    Args:
54
        mol (skchem.Mol):
55
            The molecule for which to calculate the descriptor.
56
57
    Returns:
58
        float
59
    """
60
61
    return mol.atoms.van_der_waals_volume.sum()
62
63
64
def sum_electronegativity(mol):
65
66
    """ The sum of the Sanderson electronegativities.
67
68
    Args:
69
        mol (skchem.Mol):
70
            The molecule for which to calculate the descriptor.
71
72
    Returns:
73
        float
74
    """
75
76
    return mol.atoms.sanderson_electronegativity.sum()
77
78
79
def sum_ionisation_energy(mol):
80
81
    """ The sum of the first ionization energies.
82
83
    Args:
84
        mol (skchem.Mol):
85
            The molecule for which to calculate the descriptor.
86
87
    Returns:
88
        float
89
    """
90
91
    return mol.atoms.ionization_energy.sum()
92
93
94
def sum_polarizability(mol):
95
96
    """ The sum of the polarizabilities.
97
98
    Args:
99
        mol (skchem.Mol):
100
            The molecule for which to calculate the descriptor.
101
102
    Returns:
103
        float
104
    """
105
106
    return mol.atoms.polarizability.sum()
107
108
109
def mean_van_der_waals_volume(mol):
110
111
    """ The mean of the Van der Waals volume.
112
113
    Args:
114
        mol (skchem.Mol):
115
            The molecule for which to calculate the descriptor.
116
117
    Returns:
118
        float
119
    """
120
121
    return mol.atoms.van_der_waals_volume.sum()
122
123
124
def mean_electronegativity(mol):
125
126
    """ The mean of the Sanderson electronegativity.
127
128
    Args:
129
        mol (skchem.Mol):
130
            The molecule for which to calculate the descriptor.
131
132
    Returns:
133
        float
134
    """
135
136
    return mol.atoms.sanderson_electronegativity.mean()
137
138
139
def mean_ionisation_energy(mol):
140
141
    """ The mean of the first ionization energies.
142
143
    Args:
144
        mol (skchem.Mol):
145
            The molecule for which to calculate the descriptor.
146
147
    Returns:
148
        float
149
    """
150
151
    return mol.atoms.ionization_energy.mean()
152
153
154
def mean_polarizability(mol):
155
156
    """ The mean of the polarizabilities.
157
158
    Args:
159
        mol (skchem.Mol):
160
            The molecule for which to calculate the descriptor.
161
162
    Returns:
163
        float
164
    """
165
166
    return mol.atoms.polarizability.mean()
167
168
169
@requires_h_depleted
170
def graph_density(mol):
171
172
    """ The graph density of the h-depleted graph.
173
174
    Args:
175
        mol (skchem.Mol):
176
            The molecule for which to calculate the descriptor.
177
178
    Returns:
179
        float
180
    """
181
182
    return 2 * len(mol.bonds) / (len(mol.atoms) * (len(mol.atoms) - 1))
183
184
185
def n_atoms(mol):
186
187
    """ The number of atoms.
188
189
    Args:
190
        mol (skchem.Mol):
191
            The molecule for which to calculate the descriptor.
192
193
    Returns:
194
        int
195
    """
196
197
    return len(mol.atoms)
198
199
200
@requires_h_filled
201
def n_hyd(mol):
202
203
    """ The number of hydrogen atoms.
204
205
    Args:
206
        mol (skchem.Mol):
207
            The molecule for which to calculate the descriptor.
208
209
    Returns:
210
        int
211
    """
212
213
    return sum(mol.atoms.atomic_number == 1)
214
215
216
@requires_h_depleted
217
def n_atom(mol, s):
0 ignored issues
show
Coding Style Naming introduced by
The name s does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
218
219
    """ The number of atoms of symbol *s*.
220
221
    Args:
222
        mol (skchem.Mol):
223
            The molecule for which to calculate the descriptor.
224
225
    Returns:
226
        int
227
    """
228
229
    return sum(mol.atoms.symbol == s)
230
231
232
@requires_h_filled
233
def fract_atom(mol, s):
0 ignored issues
show
Coding Style Naming introduced by
The name s does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
234
235
    """ The fraction of atoms of symbol *s*.
236
237
    Args:
238
        mol (skchem.Mol):
239
            The molecule for which to calculate the descriptor.
240
241
    Returns:
242
        float
243
    """
244
245
    return sum(mol.atoms.symbol == s) / len(mol.atoms)
246
247
248
def n_halo(mol):
249
    # TODO: memoize
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
250
    """ The number of halogens.
251
252
    Args:
253
        mol (skchem.Mol):
254
            The molecule for which to calculate the descriptor.
255
256
    Returns:
257
        int
258
    """
259
    return sum(n_atom(mol, s) for s in ('F', 'Cl', 'Br', 'I'))
260
261
262
@requires_h_filled
263
def fract_halo(mol):
264
    """ The fraction of halogens.
265
266
     Args:
267
         mol (skchem.Mol):
268
             The molecule for which to calculate the descriptor.
269
270
     Returns:
271
         float
272
     """
273
274
    return n_halo(mol) / len(mol.atoms)
275
276
277
def n_hetero(mol):
278
279
    """ The number of heteroatoms.
280
281
    Args:
282
        mol (skchem.Mol):
283
            The molecule for which to calculate the descriptor.
284
285
    Returns:
286
        int
287
    """
288
289
    return rdMolDescriptors.CalcNumHeteroatoms(mol)
290
291
292
def n_heavy(mol):
293
294
    """ The number of heavy atoms.
295
296
    Args:
297
        mol (skchem.Mol):
298
            The molecule for which to calculate the descriptor.
299
300
    Returns:
301
        int
302
    """
303
304
    return mol.GetNumHeavyAtoms()
305
306
307
def n_terminal(mol):
308
309
    """ The number of heavy atoms.
310
311
    Args:
312
        mol (skchem.Mol):
313
            The molecule for which to calculate the descriptor.
314
315
    Returns:
316
        int
317
    """
318
319
    return mol.atoms.is_terminal.sum()
320
321
322
@requires_h_filled
323
def n_bonds(mol):
324
325
    """ The number of bonds.
326
327
    Args:
328
        mol (skchem.Mol):
329
            The molecule for which to calculate the descriptor.
330
331
    Returns:
332
        int
333
    """
334
335
    return len(mol.bonds)
336
337
338
@requires_h_depleted
339
def n_bonds_non_h(mol):
340
341
    """ The number of bonds between atoms other than hydrogen.
342
343
    Args:
344
        mol (skchem.Mol):
345
            The molecule for which to calculate the descriptor.
346
347
    Returns:
348
        int
349
    """
350
351
    return len(mol.bonds)
352
353
354
@requires_h_depleted
355
def n_bonds_multiple(mol):
356
357
    """ The number of multiple bonds.
358
359
    Args:
360
        mol (skchem.Mol):
361
            The molecule for which to calculate the descriptor.
362
363
    Returns:
364
        int
365
    """
366
367
    return (mol.bonds.order > 1).sum()
368
369
370
@requires_h_depleted
371
def sum_of_conventional_bond_orders(mol):
372
373
    """ The sum of conventional bond orders (h-depleted).
374
375
    Args:
376
        mol (skchem.Mol):
377
            The molecule for which to calculate the descriptor.
378
379
    Returns:
380
        int
381
    """
382
383
    return mol.bonds.order.sum()
384
385
386
def n_rotatable_bonds(mol):
387
388
    """ The number of rotatable bonds.
389
390
    Args:
391
        mol (skchem.Mol):
392
            The molecule for which to calculate the descriptor.
393
394
    Returns:
395
        int
396
397
    """
398
    return rdMolDescriptors.CalcNumRotatableBonds(mol)
399
400
401
@requires_h_depleted
402
def fract_rotatable_bonds(mol):
403
    # TODO: memoize
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
404
    """ The fraction of rotatable bonds.
405
406
    Args:
407
        mol (skchem.Mol):
408
            The molecule for which to calculate the descriptor.
409
410
    Returns:
411
        float
412
    """
413
    return n_rotatable_bonds(mol) / n_bonds(mol)
414
415
416
@requires_h_depleted
417
@requires_bo_amat
418
def n_bond_order(mol, i):
419
420
    """ The number of bonds of order *i*.
421
422
    Args:
423
        mol (skchem.Mol):
424
            The molecule for which to calculate the descriptor.
425
426
        i (int):
427
            The order of bonds.
428
    Returns:
429
        float
430
    """
431
    return 0.5 * (mol._bo_amat == i).sum()
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _bo_amat was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
432
433
434
def fract_c_sp3(mol):
435
436
    """ The fraction of carbons that are sp3.
437
438
    Args:
439
        mol (skchem.Mol):
440
            The molecule for which to calculate the descriptor.
441
442
    Returns:
443
        float
444
    """
445
446
    return rdMolDescriptors.CalcFractionCSP3(mol)
447
448
449
def fract_c_sp2(mol):
450
451
    """ The fraction of carbons that are sp3.
452
453
    Args:
454
        mol (skchem.Mol):
455
            The molecule for which to calculate the descriptor.
456
457
    Returns:
458
        float
459
    """
460
461
    carbs = mol.atoms.atomic_number == 6
462
    return (mol.atoms.hybridization_state == 'SP2')[carbs].sum() / carbs.sum()
463
464
465
def fract_c_sp(mol):
466
467
    """ The fraction of carbons that are sp3.
468
469
    Args:
470
        mol (skchem.Mol):
471
            The molecule for which to calculate the descriptor.
472
473
    Returns:
474
        float
475
    """
476
477
    carbs = mol.atoms.atomic_number == 6
478
    return (mol.atoms.hybridization_state == 'SP')[carbs].sum() / carbs.sum()
479
480
481
def n_disconnected(mol):
482
483
    """ The number of disconnected fragments in the mol.
484
485
        Args:
486
            mol (skchem.Mol):
487
                The molecule for which to calculate the descriptor.
488
489
        Returns:
490
            int
491
        """
492
493
    return len(rdmolops.GetMolFrags(mol))
494
495
496
def total_charge(mol):
497
498
    """ The total charge of the molecule.
499
500
        Args:
501
            mol (skchem.Mol):
502
                The molecule for which to calculate the descriptor.
503
504
        Returns:
505
            float
506
        """
507
508
    return mol.atoms.charge.sum()
509
510
511
def n_hba(mol):
512
513
    """ The number of h bond acceptors.
514
515
    Args:
516
        mol (skchem.Mol):
517
            The molecule for which to calculate the descriptor.
518
519
    Returns:
520
        float
521
    """
522
    return rdMolDescriptors.CalcNumHBA(mol)
523
524
525
def n_hba(mol):
0 ignored issues
show
Bug introduced by
This function was already defined on line 511.
Loading history...
526
527
    """ The number of h bond donors.
528
529
    Args:
530
        mol (skchem.Mol):
531
            The molecule for which to calculate the descriptor.
532
533
    Returns:
534
        float
535
    """
536
    return rdMolDescriptors.CalcNumHBD(mol)
537
538
539
def n_radical_electrons(mol):
540
541
    """ The number of radical electrons.
542
543
    Args:
544
        mol (skchem.Mol):
545
            The molecule for which to calculate the descriptor.
546
547
    Returns:
548
        float
549
    """
550
    return Descriptors.NumRadicalElectrons(mol)
551
552
553
def n_valence_electrons(mol):
554
555
    """ The number of valence electrons.
556
557
    Args:
558
        mol (skchem.Mol):
559
            The molecule for which to calculate the descriptor.
560
561
    Returns:
562
        float
563
    """
564
565
    return Descriptors.NumValenceElectrons(mol)
566
567
568
def heavy_atom_mol_wt(x):
0 ignored issues
show
Coding Style Naming introduced by
The name x does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
569
570
    """ The molecular weight of only heavy atoms.
571
572
    Args:
573
        mol (skchem.Mol):
574
            The molecule for which to calculate the descriptor.
575
576
    Returns:
577
        float
578
    """
579
580
    return rdMolDescriptors.CalcExactMolWt(x, True)
581
582
583
def n_hbd_lipinski(x):
0 ignored issues
show
Coding Style Naming introduced by
The name x does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
584
585
    """ The number of hydrogen bond donors according to Lipinski."""
586
587
    return rdMolDescriptors.CalcNumLipinskiHBD(x)
588
589
590
def n_hba_lipinski(x):
0 ignored issues
show
Coding Style Naming introduced by
The name x does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
591
592
    """ The number of hydrogen bond acceptors according to Lipinski."""
593
594
    return rdMolDescriptors.CalcNumLipinskiHBA(x)
595
596
597
def n_paths(mol, l):
0 ignored issues
show
Coding Style Naming introduced by
The name l does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
598
599
    """ The number of paths of length *l*. """
600
601
    return len(rdmolops.FindAllPathsOfLengthN(mol, l))
602
603
604
DESCRIPTORS = {
0 ignored issues
show
Bug introduced by
Duplicate key 'n_atoms' in dictionary
Loading history...
605
    'mol_wt': molecular_weight,
606
    'avg_mol_wt': average_molecular_weight,
607
    'sum_vdw_vol': sum_van_der_waals_volume,
608
    'sum_eneg': sum_electronegativity,
609
    'sum_pol': sum_polarizability,
610
    'sum_ion_energy': sum_ionisation_energy,
611
    'mean_vdw_vol': mean_van_der_waals_volume,
612
    'mean_eneg': mean_electronegativity,
613
    'mean_pol': mean_polarizability,
614
    'mean_ion_energy': mean_ionisation_energy,
615
    'graph_density': graph_density,
616
    'n_atoms': n_atoms,
617
    'n_term': n_terminal,
618
    'n_bonds': n_bonds,
619
    'n_bonds_non_h': n_bonds_non_h,
620
    'n_bonds_mult': n_bonds_multiple,
621
    'sum_bond_order': sum_of_conventional_bond_orders,
622
    'n_rot_bonds': n_rotatable_bonds,
623
    'fract_rot_bonds': fract_rotatable_bonds,
624
    'n_hyd': n_hyd,
625
    'n_halo': n_halo,
626
    'n_heavy': n_heavy,
627
    'n_hetero': n_hetero,
628
    'n_hba': n_hba,
629
    'n_atoms': n_atoms,
630
    'fract_halo': fract_halo,
631
    'fract_csp3': fract_c_sp3,
632
    'fract_csp2': fract_c_sp2,
633
    'fract_csp': fract_c_sp,
634
    'n_disconn': n_disconnected,
635
    'total_charge': total_charge,
636
    'n_rad': n_radical_electrons,
637
    'n_val': n_valence_electrons,
638
    'heavy_mol_wt': heavy_atom_mol_wt,
639
    'n_hbd_lip': n_hbd_lipinski,
640
    'n_hba_lib': n_hba_lipinski
641
}
642
643
SYMBOLS = ('C', 'N', 'O', 'P', 'S', 'F', 'Cl', 'Br', 'I', 'B')
644
645
DESCRIPTORS.update({'n_{}'.format(symbol): partial(n_atom, s=symbol)
646
                    for symbol in SYMBOLS})
647
648
DESCRIPTORS.update({'n_{}'.format(symbol): partial(fract_atom, s=symbol)
649
                    for symbol in ('H', 'C', 'N', 'O')})
650
651
DESCRIPTORS.update({'n_bond_{}'.format(i): partial(n_bond_order, i=i)
652
                    for i in (1, 1.5, 2, 3)})
653
654
DESCRIPTORS.update({'n_paths_{}'.format(i): partial(n_paths, l=i)
655
                    for i in range(1, 7)})
656