Completed
Push — master ( 360284...546a0f )
by Rich
05:22
created

sum_electronegativity()   A

Complexity

Conditions 1

Size

Total Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 1.125

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 1
c 1
b 0
f 1
dl 0
loc 13
ccs 1
cts 2
cp 0.5
crap 1.125
rs 9.4285
1
#! /usr/bin/env python
2
#
3
# Copyright (C) 2016 Rich Lewis <[email protected]>
4
# License: 3-clause BSD
5
6 1
"""
7
# skchem.features.descriptors.constitutional
8
9
Constitutional features for scikit-chem.
10
"""
11
12 1
from collections import OrderedDict
13 1
from functools import partial
14
15 1
from rdkit.Chem import rdMolDescriptors, rdmolops, Descriptors
0 ignored issues
show
Configuration introduced by
The import rdkit.Chem could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
16
17 1
from .caching import cache, requires_h_filled, requires_h_depleted
0 ignored issues
show
Unused Code introduced by
Unused cache imported from caching
Loading history...
18 1
from .fundamentals import bond_order_adjacency_matrix
0 ignored issues
show
Unused Code introduced by
Unused bond_order_adjacency_matrix imported from fundamentals
Loading history...
19
20
21 1
def molecular_weight(mol):
22
23
    """ The molecular weight.
24
25
    Args:
26
        mol (skchem.Mol):
27
            The molecule for which to calculate the descriptor.
28
29
    Returns:
30
        float
31
    """
32
    return rdMolDescriptors.CalcExactMolWt(mol)
33
34
35 1
@requires_h_filled
36
def average_molecular_weight(mol):
37
38
    """ Average molecular weight of atoms.
39
40
    Args:
41
        mol (skchem.Mol):
42
            The molecule for which to calculate the descriptor.
43
44
    Returns:
45
        float
46
    """
47
48
    return molecular_weight(mol) / len(mol.atoms)
49
50
51 1
def sum_van_der_waals_volume(mol):
52
53
    """ The sum of the Van der Waals volume.
54
55
    Args:
56
        mol (skchem.Mol):
57
            The molecule for which to calculate the descriptor.
58
59
    Returns:
60
        float
61
    """
62
63
    return mol.atoms.van_der_waals_volume.sum()
64
65
66 1
def sum_electronegativity(mol):
67
68
    """ The sum of the Sanderson electronegativities.
69
70
    Args:
71
        mol (skchem.Mol):
72
            The molecule for which to calculate the descriptor.
73
74
    Returns:
75
        float
76
    """
77
78
    return mol.atoms.sanderson_electronegativity.sum()
79
80
81 1
def sum_ionisation_energy(mol):
82
83
    """ The sum of the first ionisation energies.
84
85
    Args:
86
        mol (skchem.Mol):
87
            The molecule for which to calculate the descriptor.
88
89
    Returns:
90
        float
91
    """
92
93
    return mol.atoms.ionisation_energy.sum()
94
95
96 1
def sum_polarisability(mol):
97
98
    """ The sum of the polarisabilities.
99
100
    Args:
101
        mol (skchem.Mol):
102
            The molecule for which to calculate the descriptor.
103
104
    Returns:
105
        float
106
    """
107
108
    return mol.atoms.polarisability.sum()
109
110
111 1
def mean_van_der_waals_volume(mol):
112
113
    """ The mean of the Van der Waals volume.
114
115
    Args:
116
        mol (skchem.Mol):
117
            The molecule for which to calculate the descriptor.
118
119
    Returns:
120
        float
121
    """
122
123
    return mol.atoms.van_der_waals_volume.sum()
124
125
126 1
def mean_electronegativity(mol):
127
128
    """ The mean of the Sanderson electronegativity.
129
130
    Args:
131
        mol (skchem.Mol):
132
            The molecule for which to calculate the descriptor.
133
134
    Returns:
135
        float
136
    """
137
138
    return mol.atoms.sanderson_electronegativity.mean()
139
140
141 1
def mean_ionisation_energy(mol):
142
143
    """ The mean of the first ionisation energies.
144
145
    Args:
146
        mol (skchem.Mol):
147
            The molecule for which to calculate the descriptor.
148
149
    Returns:
150
        float
151
    """
152
153
    return mol.atoms.ionisation_energy.mean()
154
155
156 1
def mean_polarisability(mol):
157
158
    """ The mean of the polarizabilities.
159
160
    Args:
161
        mol (skchem.Mol):
162
            The molecule for which to calculate the descriptor.
163
164
    Returns:
165
        float
166
    """
167
168
    return mol.atoms.polarisability.mean()
169
170
171 1
@requires_h_depleted
172
def graph_density(mol):
173
174
    """ The graph density of the h-depleted graph.
175
176
    Args:
177
        mol (skchem.Mol):
178
            The molecule for which to calculate the descriptor.
179
180
    Returns:
181
        float
182
    """
183
184
    return 2 * len(mol.bonds) / (len(mol.atoms) * (len(mol.atoms) - 1))
185
186
187 1
def n_atoms(mol):
188
189
    """ The number of atoms.
190
191
    Args:
192
        mol (skchem.Mol):
193
            The molecule for which to calculate the descriptor.
194
195
    Returns:
196
        int
197
    """
198
199
    return len(mol.atoms)
200
201
202 1
@requires_h_filled
203
def n_hyd(mol):
204
205
    """ The number of hydrogen atoms.
206
207
    Args:
208
        mol (skchem.Mol):
209
            The molecule for which to calculate the descriptor.
210
211
    Returns:
212
        int
213
    """
214
215
    return sum(mol.atoms.atomic_number == 1)
216
217
218 1
@requires_h_depleted
219 1
def n_atom(mol, symbol='C'):
220
221
    """ The number of atoms of symbol *s*.
222
223
    Args:
224
        mol (skchem.Mol):
225
            The molecule for which to calculate the descriptor.
226
227
    Returns:
228
        int
229
    """
230
231
    return sum(mol.atoms.symbol == symbol)
232
233
234 1
@requires_h_filled
235 1
def fract_atom(mol, symbol='C'):
236
237
    """ The fraction of atoms of symbol *s*.
238
239
    Args:
240
        mol (skchem.Mol):
241
            The molecule for which to calculate the descriptor.
242
243
    Returns:
244
        float
245
    """
246
247
    return sum(mol.atoms.symbol == symbol) / len(mol.atoms)
248
249
250 1
def n_halo(mol):
251
    # TODO: memoize
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
252
    """ The number of halogens.
253
254
    Args:
255
        mol (skchem.Mol):
256
            The molecule for which to calculate the descriptor.
257
258
    Returns:
259
        int
260
    """
261
    return sum(n_atom(mol, s) for s in ('F', 'Cl', 'Br', 'I'))
262
263
264 1
@requires_h_filled
265
def fract_halo(mol):
266
    """ The fraction of halogens.
267
268
     Args:
269
         mol (skchem.Mol):
270
             The molecule for which to calculate the descriptor.
271
272
     Returns:
273
         float
274
     """
275
276
    return n_halo(mol) / len(mol.atoms)
277
278
279 1
def n_hetero(mol):
280
281
    """ The number of heteroatoms.
282
283
    Args:
284
        mol (skchem.Mol):
285
            The molecule for which to calculate the descriptor.
286
287
    Returns:
288
        int
289
    """
290
291
    return rdMolDescriptors.CalcNumHeteroatoms(mol)
292
293
294 1
def n_heavy(mol):
295
296
    """ The number of heavy atoms.
297
298
    Args:
299
        mol (skchem.Mol):
300
            The molecule for which to calculate the descriptor.
301
302
    Returns:
303
        int
304
    """
305
306
    return mol.GetNumHeavyAtoms()
307
308
309 1
def n_terminal(mol):
310
311
    """ The number of heavy atoms.
312
313
    Args:
314
        mol (skchem.Mol):
315
            The molecule for which to calculate the descriptor.
316
317
    Returns:
318
        int
319
    """
320
321
    return mol.atoms.is_terminal.sum()
322
323
324 1
@requires_h_filled
325
def n_bonds(mol):
326
327
    """ The number of bonds.
328
329
    Args:
330
        mol (skchem.Mol):
331
            The molecule for which to calculate the descriptor.
332
333
    Returns:
334
        int
335
    """
336
337
    return len(mol.bonds)
338
339
340 1
@requires_h_depleted
341
def n_bonds_non_h(mol):
342
343
    """ The number of bonds between atoms other than hydrogen.
344
345
    Args:
346
        mol (skchem.Mol):
347
            The molecule for which to calculate the descriptor.
348
349
    Returns:
350
        int
351
    """
352
353
    return len(mol.bonds)
354
355
356 1
@requires_h_depleted
357
def n_bonds_multiple(mol):
358
359
    """ The number of multiple bonds.
360
361
    Args:
362
        mol (skchem.Mol):
363
            The molecule for which to calculate the descriptor.
364
365
    Returns:
366
        int
367
    """
368
369
    return (mol.bonds.order > 1).sum()
370
371
372 1
@requires_h_depleted
373
def sum_of_conventional_bond_orders(mol):
374
375
    """ The sum of conventional bond orders (h-depleted).
376
377
    Args:
378
        mol (skchem.Mol):
379
            The molecule for which to calculate the descriptor.
380
381
    Returns:
382
        int
383
    """
384
385
    return mol.bonds.order.sum()
386
387
388 1
def n_rotatable_bonds(mol):
389
390
    """ The number of rotatable bonds.
391
392
    Args:
393
        mol (skchem.Mol):
394
            The molecule for which to calculate the descriptor.
395
396
    Returns:
397
        int
398
399
    """
400
    return rdMolDescriptors.CalcNumRotatableBonds(mol)
401
402
403 1
@requires_h_depleted
404
def fract_rotatable_bonds(mol):
405
    # TODO: memoize
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
406
    """ The fraction of rotatable bonds.
407
408
    Args:
409
        mol (skchem.Mol):
410
            The molecule for which to calculate the descriptor.
411
412
    Returns:
413
        float
414
    """
415
    return n_rotatable_bonds(mol) / n_bonds(mol)
416
417
418 1
@requires_h_depleted
419 1
def n_bond_order(mol, order=1):
420
421
    """ The number of bonds of order *i*.
422
423
    Args:
424
        mol (skchem.Mol):
425
            The molecule for which to calculate the descriptor.
426
427
        order (int):
428
            The order of bonds.
429
    Returns:
430
        int
431
    """
432
    return (mol.bonds.order == order).sum()
433
434
435 1
def fract_c_hybrid(mol, h_state='SP3'):
436
437
    """ The fraction of carbons that are in a certain hybridization state.
438
439
    Args:
440
        mol (skchem.Mol):
441
            The molecule for which to calculate the descriptor.
442
443
        h_state (str):
444
            The hybridization state for which to measure the fraction.
445
    Returns:
446
        float
447
    """
448
449
    carbs = mol.atoms.atomic_number == 6
450
    return (mol.atoms.hybridization_state == h_state)[carbs].sum() / carbs.sum()
451
452
453 1
def n_disconnected(mol):
454
455
    """ The number of disconnected fragments in the mol.
456
457
        Args:
458
            mol (skchem.Mol):
459
                The molecule for which to calculate the descriptor.
460
461
        Returns:
462
            int
463
        """
464
465
    return len(rdmolops.GetMolFrags(mol))
466
467
468 1
def total_charge(mol):
469
470
    """ The total charge of the molecule.
471
472
        Args:
473
            mol (skchem.Mol):
474
                The molecule for which to calculate the descriptor.
475
476
        Returns:
477
            float
478
        """
479
480
    return mol.atoms.formal_charge.sum()
481
482
483 1
def n_hba(mol):
484
485
    """ The number of h bond acceptors.
486
487
    Args:
488
        mol (skchem.Mol):
489
            The molecule for which to calculate the descriptor.
490
491
    Returns:
492
        float
493
    """
494
    return rdMolDescriptors.CalcNumHBA(mol)
495
496
497 1
def n_hba(mol):
0 ignored issues
show
Bug introduced by
This function was already defined on line 483.
Loading history...
498
499
    """ The number of h bond donors.
500
501
    Args:
502
        mol (skchem.Mol):
503
            The molecule for which to calculate the descriptor.
504
505
    Returns:
506
        float
507
    """
508
    return rdMolDescriptors.CalcNumHBD(mol)
509
510
511 1
def n_radical_electrons(mol):
512
513
    """ The number of radical electrons.
514
515
    Args:
516
        mol (skchem.Mol):
517
            The molecule for which to calculate the descriptor.
518
519
    Returns:
520
        float
521
    """
522
    return Descriptors.NumRadicalElectrons(mol)
523
524
525 1
def n_valence_electrons(mol):
526
527
    """ The number of valence electrons.
528
529
    Args:
530
        mol (skchem.Mol):
531
            The molecule for which to calculate the descriptor.
532
533
    Returns:
534
        float
535
    """
536
537
    return Descriptors.NumValenceElectrons(mol)
538
539
540 1
def heavy_atom_mol_wt(x):
0 ignored issues
show
Coding Style Naming introduced by
The name x does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
541
542
    """ The molecular weight of only heavy atoms.
543
544
    Args:
545
        mol (skchem.Mol):
546
            The molecule for which to calculate the descriptor.
547
548
    Returns:
549
        float
550
    """
551
552
    return rdMolDescriptors.CalcExactMolWt(x, True)
553
554
555 1
def n_hbd_lipinski(x):
0 ignored issues
show
Coding Style Naming introduced by
The name x does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
556
557
    """ The number of hydrogen bond donors according to Lipinski."""
558
559
    return rdMolDescriptors.CalcNumLipinskiHBD(x)
560
561
562 1
def n_hba_lipinski(x):
0 ignored issues
show
Coding Style Naming introduced by
The name x does not conform to the argument naming conventions ([a-z_][a-z0-9_]{2,30}$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
563
564
    """ The number of hydrogen bond acceptors according to Lipinski."""
565
566
    return rdMolDescriptors.CalcNumLipinskiHBA(x)
567
568
569 1
@requires_h_depleted
570 1
def n_paths(mol, length=1):
571
572
    """ The number of paths of length *l*. """
573
574
    return len(rdmolops.FindAllPathsOfLengthN(mol, length))
575
576
577 1
DESCRIPTORS = OrderedDict((
578
    ('mol_wt', molecular_weight),
579
    ('avg_mol_wt', average_molecular_weight),
580
    ('sum_vdw_vol', sum_van_der_waals_volume),
581
    ('sum_eneg', sum_electronegativity),
582
    ('sum_pol', sum_polarisability),
583
    ('sum_ion_energy', sum_ionisation_energy),
584
    ('mean_vdw_vol', mean_van_der_waals_volume),
585
    ('mean_eneg', mean_electronegativity),
586
    ('mean_pol', mean_polarisability),
587
    ('mean_ion_energy', mean_ionisation_energy),
588
    ('graph_density', graph_density),
589
    ('n_atoms', n_atoms),
590
    ('n_term', n_terminal),
591
    ('n_bonds', n_bonds),
592
    ('n_bonds_non_h', n_bonds_non_h),
593
    ('n_bonds_mult', n_bonds_multiple),
594
    ('sum_bond_order', sum_of_conventional_bond_orders),
595
    ('n_rot_bonds', n_rotatable_bonds),
596
    ('fract_rot_bonds', fract_rotatable_bonds),
597
    ('n_hyd', n_hyd),
598
    ('n_halo', n_halo),
599
    ('n_heavy', n_heavy),
600
    ('n_hetero', n_hetero),
601
    ('n_hba', n_hba),
602
    ('n_atoms', n_atoms),
603
    ('fract_halo', fract_halo),
604
    ('n_disconn', n_disconnected),
605
    ('total_charge', total_charge),
606
    ('n_rad', n_radical_electrons),
607
    ('n_val', n_valence_electrons),
608
    ('heavy_mol_wt', heavy_atom_mol_wt),
609
    ('n_hbd_lip', n_hbd_lipinski),
610
    ('n_hba_lib', n_hba_lipinski)
611
))
612
613 1
SYMBOLS = ('C', 'N', 'O', 'P', 'S', 'F', 'Cl', 'Br', 'I', 'B')
614 1
HYBRIDS = ('SP3', 'SP2', 'SP')
615
616 1
DESCRIPTORS.update(('fract_c_{}'.format(hybrid), partial(fract_c_hybrid,
617
                                                         h_state=hybrid))
618
                   for hybrid in HYBRIDS)
619
620 1
DESCRIPTORS.update((('n_{}'.format(symbol), partial(n_atom, symbol=symbol))
621
                    for symbol in SYMBOLS))
622
623 1
DESCRIPTORS.update((('n_{}'.format(symbol), partial(fract_atom, symbol=symbol))
624
                    for symbol in ('H', 'C', 'N', 'O')))
625
626 1
DESCRIPTORS.update((('n_bond_{}'.format(order), partial(n_bond_order,
627
                                                       order=order))
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
order=order))
^|
Loading history...
628
                    for order in (1, 1.5, 2, 3)))
629
630 1
DESCRIPTORS.update((('n_paths_{}'.format(length), partial(n_paths, length=length))
631
                    for length in range(1, 7)))
632