Passed
Push — master ( d6a4c8...fc60f9 )
by Daniel
01:48
created

amd.io._Reader.read_one()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
"""Contains I/O tools, including a .CIF reader and CSD reader
2
(``csd-python-api`` only) to extract periodic set representations
3
of crystals which can be passed to :func:`.calculate.AMD` and :func:`.calculate.PDD`.
4
5
These intermediate :class:`.periodicset.PeriodicSet` representations can be written
6
to a .hdf5 file with :class:`SetWriter`, which can be read back with :class:`SetReader`.
7
This is much faster than rereading a .CIF and recomputing invariants.
8
"""
9
10
import os
11
import functools
12
import warnings
13
from typing import Callable, Iterable, Sequence, Tuple
14
15
import numpy as np
16
import ase.io.cif
17
import ase.spacegroup.spacegroup
18
19
from .periodicset import PeriodicSet
20
from .utils import cellpar_to_cell
21
22
try:
23
    import ccdc.io
24
    import ccdc.search
25
    _CSD_PYTHON_API_ENABLED = True
26
except (ImportError, RuntimeError) as _:
27
    _CSD_PYTHON_API_ENABLED = False
28
29
def _custom_warning(message, category, filename, lineno, *args, **kwargs):
0 ignored issues
show
Unused Code introduced by
The argument args seems to be unused.
Loading history...
Unused Code introduced by
The argument kwargs seems to be unused.
Loading history...
Unused Code introduced by
The argument filename seems to be unused.
Loading history...
Unused Code introduced by
The argument lineno seems to be unused.
Loading history...
30
    return f'{category.__name__}: {message}\n'
31
32
warnings.formatwarning = _custom_warning
33
34
class ParseError(ValueError):
35
    """Raised when an item cannot be parsed into a periodic set."""
36
    pass
0 ignored issues
show
Unused Code introduced by
Unnecessary pass statement
Loading history...
37
38
39
class _Reader:
0 ignored issues
show
best-practice introduced by
Too many instance attributes (8/7)
Loading history...
40
    """Base Reader class. Contains parsers for converting ase CifBlock
41
    and ccdc Entry objects to PeriodicSets.
42
    Intended to be inherited and then a generator set to self._generator.
43
    First make a new method for _Reader converting object to PeriodicSet
44
    (e.g. named _X_to_PSet). Then make this class outline:
45
    class XReader(_Reader):
46
        def __init__(self, ..., **kwargs):
47
        super().__init__(**kwargs)
48
        # setup and checks
49
        # make 'iterable' which yields objects to be converted (e.g. CIFBlock, Entry)
50
        # set self._generator like this
51
        self._generator = self._map(iterable, self._X_to_PSet)
52
    """
53
54
    # move these? 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
55
    _EQUIV_SITE_TOL = 1e-3
56
    _DISORDER_OPTIONS = {'skip', 'ordered_sites', 'all_sites'}
57
    _RESERVED_TAGS = {
58
        'motif',
59
        'cell',
60
        'name',
61
        'asymmetric_unit',
62
        'wyckoff_multiplicities',
63
        'types',
64
        'filename',}
65
    _ATOM_SITE_FRACT_TAGS = [
66
        '_atom_site_fract_x',
67
        '_atom_site_fract_y',
68
        '_atom_site_fract_z',]
69
    _ATOM_SITE_CARTN_TAGS = [
70
        '_atom_site_cartn_x',
71
        '_atom_site_cartn_y',
72
        '_atom_site_cartn_z',]
73
    _SYMOP_TAGS = [
74
        '_space_group_symop_operation_xyz',
75
        '_space_group_symop.operation_xyz',
76
        '_symmetry_equiv_pos_as_xyz',]
77
78
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (7/5)
Loading history...
79
            self,
80
            remove_hydrogens=False,
81
            disorder='skip',
82
            heaviest_component=False,
83
            show_warnings=True,
84
            extract_data=None,
85
            include_if=None):
86
87
        if disorder not in _Reader._DISORDER_OPTIONS:
88
            raise ValueError(f'disorder parameter {disorder} must be one of {_Reader._DISORDER_OPTIONS}')
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (105/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
89
90
        extract_data, include_if = _validate_kwargs(extract_data, include_if)
91
        self.remove_hydrogens = remove_hydrogens
92
        self.disorder = disorder
93
        self.heaviest_component = heaviest_component
94
        self.show_warnings = show_warnings
95
        self.extract_data = extract_data
96
        self.include_if = include_if
97
        self.current_filename = None
98
        self._generator = []
99
100
    def __iter__(self):
101
        yield from self._generator
102
103
    def read_one(self):
104
        """Read the next (or first) item."""
105
        return next(iter(self._generator))
106
107
    def _map(self, func: Callable, iterable: Iterable) -> Iterable[PeriodicSet]:
108
        """Iterates over iterable, passing items through parser and yielding the result.
109
        Applies warning and include_if filters, catches bad structures and warns.
110
        """
111
112
        if not self.show_warnings:
113
            warnings.simplefilter('ignore')
114
        
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
115
        for item in iterable:
116
            
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
117
            with warnings.catch_warnings(record=True) as warning_msgs:
118
119
                if any(not check(item) for check in self.include_if):
120
                    continue
121
122
                parse_failed = False
123
                try:
124
                    periodic_set = func(item)
125
                except ParseError as err:
126
                    parse_failed = str(err)
127
128
            if parse_failed:
129
                warnings.warn(parse_failed)
130
                continue
131
132
            for warning in warning_msgs:
133
                msg = f'{periodic_set.name}: {warning.message}'
134
                warnings.warn(msg, category=warning.category)
135
136
            if self.current_filename:
137
                periodic_set.tags['filename'] = self.current_filename
138
139
            for key, extractor_func in self.extract_data.items():
140
                periodic_set.tags[key] = extractor_func(item)
141
142
            yield periodic_set
143
144
145
class CifReader(_Reader):
146
    """Read all structures in a .CIF with ``ase`` or ``ccdc``
147
    (``csd-python-api`` only), yielding  :class:`.periodicset.PeriodicSet`
148
    objects which can be passed to :func:`.calculate.AMD` or
149
    :func:`.calculate.PDD`.
150
151
    Examples:
152
153
        ::
154
155
            # Put all crystals in a .CIF in a list
156
            structures = list(amd.CifReader('mycif.cif'))
157
158
            # Reads just one if the .CIF has just one crystal
159
            periodic_set = amd.CifReader('mycif.cif').read_one()
160
161
            # If a folder has several .CIFs each with one crystal, use
162
            structures = list(amd.CifReader('path/to/folder', folder=True))
163
164
            # Make list of AMDs (with k=100) of crystals in a .CIF
165
            amds = [amd.AMD(periodic_set, 100) for periodic_set in amd.CifReader('mycif.cif')]
166
    """
167
168
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (10/5)
Loading history...
169
            self,
170
            path,
171
            reader='ase',
172
            folder=False,
173
            remove_hydrogens=False,
174
            disorder='skip',
175
            heaviest_component=False,
176
            show_warnings=True,
177
            extract_data=None,
178
            include_if=None
179
    ):
180
        
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
181
        super().__init__(
182
            remove_hydrogens=remove_hydrogens,
183
            disorder=disorder,
184
            heaviest_component=heaviest_component,
185
            show_warnings=show_warnings,
186
            extract_data=extract_data,
187
            include_if=include_if
188
        )
189
190
        if reader not in ('ase', 'ccdc'):
191
            raise ValueError(f'Invalid reader {reader}; must be ase or ccdc.')
192
193
        if reader == 'ase' and heaviest_component:
194
            raise NotImplementedError('Parameter heaviest_component not implimented for ase, only ccdc.')
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (105/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
195
196
        if reader == 'ase':
197
            extensions = {'cif'}
198
            file_parser = ase.io.cif.parse_cif
199
            converter = functools.partial(cifblock_to_periodicset,
200
                                          remove_hydrogens=remove_hydrogens,
201
                                          disorder=disorder)
202
203
        elif reader == 'ccdc':
204
            if not _CSD_PYTHON_API_ENABLED:
205
                raise ImportError("Failed to import csd-python-api; check it is installed and licensed.")
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (105/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
206
            extensions = ccdc.io.EntryReader.known_suffixes
207
            file_parser = ccdc.io.EntryReader
208
            converter = functools.partial(entry_to_periodicset,
209
                                          remove_hydrogens=remove_hydrogens,
210
                                          disorder=disorder,
211
                                          heaviest_component=heaviest_component)
212
213
        if folder:
214
            generator = self._folder_generator(path, file_parser, extensions)
0 ignored issues
show
introduced by
The variable extensions does not seem to be defined for all execution paths.
Loading history...
introduced by
The variable file_parser does not seem to be defined for all execution paths.
Loading history...
215
        else:
216
            generator = file_parser(path)
217
218
        self._generator = self._map(converter, generator)
0 ignored issues
show
introduced by
The variable converter does not seem to be defined for all execution paths.
Loading history...
219
220
    def _folder_generator(self, path, file_parser, extensions):
221
        for file in os.listdir(path):
222
            suff = os.path.splitext(file)[1][1:]
223
            if suff.lower() in extensions:
224
                self.current_filename = file
225
                yield from file_parser(os.path.join(path, file))
226
227
228
class CSDReader(_Reader):
229
    """Read Entries from the CSD, yielding :class:`.periodicset.PeriodicSet` objects.
230
231
    The CSDReader returns :class:`.periodicset.PeriodicSet` objects which can be passed
232
    to :func:`.calculate.AMD` or :func:`.calculate.PDD`.
233
234
    Examples:
235
236
        Get crystals with refcodes in a list::
237
238
            refcodes = ['DEBXIT01', 'DEBXIT05', 'HXACAN01']
239
            structures = list(amd.CSDReader(refcodes))
240
241
        Read refcode families (any whose refcode starts with strings in the list)::
242
243
            refcodes = ['ACSALA', 'HXACAN']
244
            structures = list(amd.CSDReader(refcodes, families=True))
245
246
        Create a generic reader, read crystals by name with :meth:`CSDReader.entry()`::
247
248
            reader = amd.CSDReader()
249
            debxit01 = reader.entry('DEBXIT01')
250
251
            # looping over this generic reader will yield all CSD entries
252
            for periodic_set in reader:
253
                ...
254
255
        Make list of AMD (with k=100) for crystals in these families::
256
257
            refcodes = ['ACSALA', 'HXACAN']
258
            amds = []
259
            for periodic_set in amd.CSDReader(refcodes, families=True):
260
                amds.append(amd.AMD(periodic_set, 100))
261
    """
262
263
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (9/5)
Loading history...
264
            self,
265
            refcodes=None,
266
            families=False,
267
            remove_hydrogens=False,
268
            disorder='skip',
269
            heaviest_component=False,
270
            show_warnings=True,
271
            extract_data=None,
272
            include_if=None,
273
    ):
274
        
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
275
        super().__init__(
276
            remove_hydrogens=remove_hydrogens,
277
            disorder=disorder,
278
            heaviest_component=heaviest_component,
279
            show_warnings=show_warnings,
280
            extract_data=extract_data,
281
            include_if=include_if
282
        )
283
284
        if not _CSD_PYTHON_API_ENABLED:
285
            raise ImportError('Failed to import csd-python-api; check it is installed and licensed.')
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (101/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
286
287
        if isinstance(refcodes, str) and refcodes.lower() == 'csd':
288
            refcodes = None
289
290
        if refcodes is None:
291
            families = False
292
        else:
293
            refcodes = [refcodes] if isinstance(refcodes, str) else list(refcodes)
294
295
        # families parameter reads all crystals with ids starting with passed refcodes
296
        if families:
297
            all_refcodes = []
298
            for refcode in refcodes:
299
                query = ccdc.search.TextNumericSearch()
300
                query.add_identifier(refcode)
301
                all_refcodes.extend((hit.identifier for hit in query.search()))
0 ignored issues
show
introduced by
The variable hit does not seem to be defined in case the for loop on line 298 is not entered. Are you sure this can never be the case?
Loading history...
302
303
            # filter to unique refcodes
304
            seen = set()
305
            seen_add = seen.add
306
            refcodes = [
307
                refcode for refcode in all_refcodes
308
                if not (refcode in seen or seen_add(refcode))]
309
310
        self._entry_reader = ccdc.io.EntryReader('CSD')
311
312
        converter = functools.partial(entry_to_periodicset,
313
                                      remove_hydrogens=remove_hydrogens,
314
                                      disorder=disorder,
315
                                      heaviest_component=heaviest_component)
316
317
        generator = self._ccdc_generator(refcodes)
318
319
        self._generator = self._map(converter, generator)
320
321
    def entry(self, refcode: str, **kwargs) -> PeriodicSet:
322
        """Read a PeriodicSet given any CSD refcode."""
323
324
        entry = self._entry_reader.entry(refcode)
325
        periodic_set = entry_to_periodicset(entry, **kwargs)
326
        return periodic_set
327
328
    def _ccdc_generator(self, refcodes):
329
        """Generates ccdc Entries from CSD refcodes."""
330
331
        if refcodes is None:
332
            for entry in self._entry_reader:
333
                yield entry
334
        else:
335
            for refcode in refcodes:
336
                try:
337
                    entry = self._entry_reader.entry(refcode)
338
                    yield entry
339
                except RuntimeError:    # if self.show_warnings?
340
                    warnings.warn(f'Identifier {refcode} not found in database')
341
342
343
def entry_to_periodicset(
344
        entry,
345
        remove_hydrogens=False,
346
        disorder='skip',
347
        heaviest_component=False
348
) -> PeriodicSet:
349
    """ccdc.entry.Entry --> PeriodicSet."""
350
351
    crystal = entry.crystal
352
353
    if not entry.has_3d_structure:
354
        raise ParseError(f'{entry.identifier}: Has no 3D structure')
355
356
    molecule = crystal.disordered_molecule
357
358
    if disorder == 'skip':
359
        if crystal.has_disorder or entry.has_disorder or \
360
            any(atom_has_disorder(a.label, a.occupancy) for a in molecule.atoms):
361
            raise ParseError(f'{entry.identifier}: Has disorder')
362
363
    elif disorder == 'ordered_sites':
364
        molecule.remove_atoms(a for a in molecule.atoms
365
                              if atom_has_disorder(a.label, a.occupancy))
366
367
    if remove_hydrogens:
368
        molecule.remove_atoms(a for a in molecule.atoms if a.atomic_symbol in 'HD')
369
370
    if heaviest_component and len(molecule.components) > 1:
371
        molecule = _heaviest_component(molecule)
372
373
    if not molecule.all_atoms_have_sites or \
374
        any(a.fractional_coordinates is None for a in molecule.atoms):
375
        raise ParseError(f'{entry.identifier}: Has atoms without sites')
376
377
    crystal.molecule = molecule
378
    asym_atoms = crystal.asymmetric_unit_molecule.atoms
379
    asym_unit = np.array([tuple(a.fractional_coordinates) for a in asym_atoms])
380
    asym_unit = np.mod(asym_unit, 1)
381
    asym_symbols = [a.atomic_symbol for a in asym_atoms]
382
    cell = cellpar_to_cell(*crystal.cell_lengths, *crystal.cell_angles)
383
384
    sitesym = crystal.symmetry_operators
385
    if not sitesym:
386
        sitesym = ['x,y,z', ]
387
388
    if disorder != 'all_sites':
389
        keep_sites = _unique_sites(asym_unit)
390
        if not np.all(keep_sites):
391
            warnings.warn(f'{entry.identifier}: May have overlapping sites; duplicates will be removed')
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (104/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
392
        asym_unit = asym_unit[keep_sites]
393
        asym_symbols = [sym for sym, keep in zip(asym_symbols, keep_sites) if keep]
394
395
    if asym_unit.shape[0] == 0:
396
        raise ParseError(f'{entry.identifier}: Has no valid sites')
397
    
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
398
    frac_motif, asym_inds, multiplicities, inverses = expand_asym_unit(asym_unit, sitesym)
399
    full_types = [asym_symbols[i] for i in inverses]
400
    motif = frac_motif @ cell
401
402
    tags = {
403
        'name': entry.identifier,
404
        'asymmetric_unit': asym_inds,
405
        'wyckoff_multiplicities': multiplicities,
406
        'types': full_types,
407
    }
408
409
    return PeriodicSet(motif, cell, **tags)
410
411
412
def cifblock_to_periodicset(
413
        block,
414
        remove_hydrogens=False,
415
        disorder='skip'
416
) -> PeriodicSet:
417
    """ase.io.cif.CIFBlock --> PeriodicSet."""
418
419
    cell = block.get_cell().array
420
421
    # asymmetric unit fractional coords
422
    asym_unit = [block.get(name) for name in _Reader._ATOM_SITE_FRACT_TAGS]
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _ATOM_SITE_FRACT_TAGS was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
423
    if None in asym_unit:
424
        asym_motif = [block.get(name) for name in _Reader._ATOM_SITE_CARTN_TAGS]
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _ATOM_SITE_CARTN_TAGS was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
425
        if None in asym_motif:
426
            raise ParseError(f'{block.name}: Has no sites')
427
        asym_unit = np.array(asym_motif) @ np.linalg.inv(cell)
428
    asym_unit = np.mod(np.array(asym_unit).T, 1)
429
430
    try:
431
        asym_symbols = block.get_symbols()
432
    except ase.io.cif.NoStructureData as _:
433
        asym_symbols = ['Unknown' for _ in range(len(asym_unit))]
434
435
    sitesym = ['x,y,z', ]
436
    for tag in _Reader._SYMOP_TAGS:
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _SYMOP_TAGS was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
437
        if tag in block:
438
            sitesym = block[tag]
439
            break
440
    if isinstance(sitesym, str):
441
        sitesym = [sitesym]
442
443
    remove_sites = []
444
445
    occupancies = block.get('_atom_site_occupancy')
446
    labels = block.get('_atom_site_label')
447
    if occupancies is not None:
448
        if disorder == 'skip':
449
            if any(atom_has_disorder(lab, occ) for lab, occ in zip(labels, occupancies)):
450
                raise ParseError(f'{block.name}: Has disorder')
451
        elif disorder == 'ordered_sites':
452
            remove_sites.extend(
453
                (i for i, (lab, occ) in enumerate(zip(labels, occupancies))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable i does not seem to be defined.
Loading history...
454
                    if atom_has_disorder(lab, occ)))
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation (remove 3 spaces).
Loading history...
455
456
    if remove_hydrogens:
457
        remove_sites.extend((i for i, sym in enumerate(asym_symbols) if sym in 'HD'))
458
459
    asym_unit = np.delete(asym_unit, remove_sites, axis=0)
460
    asym_symbols = [s for i, s in enumerate(asym_symbols) if i not in remove_sites]
461
462
    if disorder != 'all_sites':
463
        keep_sites = _unique_sites(asym_unit)
464
        if not np.all(keep_sites):
465
            warnings.warn(f'{block.name}: May have overlapping sites; duplicates will be removed')
466
        asym_unit = asym_unit[keep_sites]
467
        asym_symbols = [sym for sym, keep in zip(asym_symbols, keep_sites) if keep]
468
    
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
469
    if asym_unit.shape[0] == 0:
470
        raise ParseError(f'{block.name}: Has no valid sites')
471
472
    frac_motif, asym_inds, multiplicities, inverses = expand_asym_unit(asym_unit, sitesym)
473
    full_types = [asym_symbols[i] for i in inverses]
474
    motif = frac_motif @ cell
475
476
    tags = {
477
        'name': block.name,
478
        'asymmetric_unit': asym_inds,
479
        'wyckoff_multiplicities': multiplicities,
480
        'types': full_types,
481
    }
482
483
    return PeriodicSet(motif, cell, **tags)
484
485
486
def expand_asym_unit(
487
        asym_unit: np.ndarray, 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
488
        sitesym: Sequence[str]
489
) -> Tuple[np.ndarray, ...]:
490
    """
491
    Asymmetric unit's fractional coords + sitesyms (as strings)
492
    -->
493
    frac motif, asym unit inds, multiplicities, inverses
494
    """
495
496
    rotations, translations = ase.spacegroup.spacegroup.parse_sitesym(sitesym)
497
    all_sites = []
498
    asym_inds = [0]
499
    multiplicities = []
500
    inverses = []
501
502
    for inv, site in enumerate(asym_unit):
503
        multiplicity = 0
504
505
        for rot, trans in zip(rotations, translations):
506
            site_ = np.mod(np.dot(rot, site) + trans, 1)
507
508
            if not all_sites:
509
                all_sites.append(site_)
510
                inverses.append(inv)
511
                multiplicity += 1
512
                continue
513
514
            # check if site_ overlaps with existing sites
515
            diffs1 = np.abs(site_ - all_sites)
516
            diffs2 = np.abs(diffs1 - 1)
517
            mask = np.all((diffs1 <= _Reader._EQUIV_SITE_TOL) | 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
Coding Style Best Practice introduced by
It seems like _EQUIV_SITE_TOL was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
518
                          (diffs2 <= _Reader._EQUIV_SITE_TOL), axis=-1)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _EQUIV_SITE_TOL was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
519
520
            if np.any(mask):
521
                where_equal = np.argwhere(mask).flatten()
522
                for ind in where_equal:
523
                    if inverses[ind] == inv:
524
                        pass
525
                    else:
526
                        warnings.warn(f'Equivalent sites at positions {inverses[ind]}, {inv}')
527
            else:
528
                all_sites.append(site_)
529
                inverses.append(inv)
530
                multiplicity += 1
531
532
        if multiplicity > 0:
533
            multiplicities.append(multiplicity)
534
            asym_inds.append(len(all_sites))
535
536
    frac_motif = np.array(all_sites)
537
    asym_inds = np.array(asym_inds[:-1])
538
    multiplicities = np.array(multiplicities)
539
    return frac_motif, asym_inds, multiplicities, inverses
540
541
542
def atom_has_disorder(label, occupancy):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
543
    return label.endswith('?') or (np.isscalar(occupancy) and occupancy < 1)
544
545
546
def _unique_sites(asym_unit):
547
    site_diffs1 = np.abs(asym_unit[:, None] - asym_unit)
548
    site_diffs2 = np.abs(site_diffs1 - 1)
549
    overlapping = np.triu(np.all(
550
        (site_diffs1 <= _Reader._EQUIV_SITE_TOL) | 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
Coding Style Best Practice introduced by
It seems like _EQUIV_SITE_TOL was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
551
        (site_diffs2 <= _Reader._EQUIV_SITE_TOL),
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _EQUIV_SITE_TOL was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
552
        axis=-1), 1)
553
    return ~overlapping.any(axis=0)
554
555
556
def _heaviest_component(molecule):
557
    """Heaviest component (removes all but the heaviest component of the asym unit).
558
    Intended for removing solvents. Probably doesn't play well with disorder"""
559
    component_weights = []
560
    for component in molecule.components:
561
        weight = 0
562
        for a in component.atoms:
563
            if isinstance(a.atomic_weight, (float, int)):
564
                if isinstance(a.occupancy, (float, int)):
565
                    weight += a.occupancy * a.atomic_weight
566
                else:
567
                    weight += a.atomic_weight
568
        component_weights.append(weight)
569
    largest_component_ind = np.argmax(np.array(component_weights))
570
    molecule = molecule.components[largest_component_ind]
571
    return molecule
572
573
574
def _validate_kwargs(extract_data, include_if):
575
576
    reserved_tags = {'motif', 'cell', 'name',
577
                     'asymmetric_unit', 'wyckoff_multiplicities',
578
                     'types', 'filename'}
579
580
    if extract_data is None:
581
        extract_data = {}
582
    else:
583
        if not isinstance(extract_data, dict):
584
            raise ValueError('extract_data must be a dict of callables')
585
        for key in extract_data:
586
            if not callable(extract_data[key]):
587
                raise ValueError('extract_data must be a dict of callables')
588
            if key in reserved_tags:
589
                raise ValueError(f'extract_data includes reserved key {key}')
590
591
    if include_if is None:
592
        include_if = ()
593
    elif not all(callable(func) for func in include_if):
594
        raise ValueError('include_if must be a list of callables')
595
596
    return extract_data, include_if
597