Passed
Push — master ( 4570af...abe023 )
by Daniel
03:02
created

amd.io._expand_asym_unit()   B

Complexity

Conditions 8

Size

Total Lines 53
Code Lines 36

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 36
dl 0
loc 53
rs 7.1493
c 0
b 0
f 0
cc 8
nop 2

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""Tools for reading crystals from files, or from the CSD with ``csd-python-api``. 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
2
The readers return :class:`.periodicset.PeriodicSet` objects representing the 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
3
crystal which can be passed to :func:`.calculate.AMD` and :func:`.calculate.PDD` 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
4
to get their invariants.
5
"""
6
7
import os
8
import functools
9
import warnings
10
from typing import Callable, Iterable, Sequence, Tuple
11
12
import numpy as np
13
import ase.io.cif
14
import ase.data
15
from ase.spacegroup.spacegroup import parse_sitesym
16
17
from . import utils
18
from .periodicset import PeriodicSet
19
20
try:
21
    import ccdc.io
22
    import ccdc.search
23
    _CSD_PYTHON_API_ENABLED = True
24
except (ImportError, RuntimeError) as _:
25
    _CSD_PYTHON_API_ENABLED = False
26
27
def _custom_warning(message, category, filename, lineno, *args, **kwargs):
0 ignored issues
show
Unused Code introduced by
The argument filename seems to be unused.
Loading history...
Unused Code introduced by
The argument lineno seems to be unused.
Loading history...
Unused Code introduced by
The argument args seems to be unused.
Loading history...
Unused Code introduced by
The argument kwargs seems to be unused.
Loading history...
28
    return f'{category.__name__}: {message}\n'
29
30
warnings.formatwarning = _custom_warning
31
32
_EQUIV_SITE_TOL = 1e-3
33
_ATOM_SITE_FRACT_TAGS = [
34
    '_atom_site_fract_x',
35
    '_atom_site_fract_y',
36
    '_atom_site_fract_z',]
37
_ATOM_SITE_CARTN_TAGS = [
38
    '_atom_site_cartn_x',
39
    '_atom_site_cartn_y',
40
    '_atom_site_cartn_z',]
41
_SYMOP_TAGS = [
42
    '_space_group_symop_operation_xyz',
43
    '_space_group_symop.operation_xyz',
44
    '_symmetry_equiv_pos_as_xyz',]
45
46
47
class _ParseError(ValueError):
48
    """Raised when an item cannot be parsed into a periodic set."""
49
    pass
0 ignored issues
show
Unused Code introduced by
Unnecessary pass statement
Loading history...
50
51
52
class _Reader:
53
    """Base Reader class. Contains parsers for converting ase CifBlock
54
    and ccdc Entry objects to PeriodicSets.
55
    Intended to be inherited and then a generator set to self._generator.
56
    First make a new method for _Reader converting object to PeriodicSet
57
    (e.g. named _X_to_PSet). Then make this class outline:
58
    class XReader(_Reader):
59
        def __init__(self, ..., **kwargs):
60
        super().__init__(**kwargs)
61
        # setup and checks
62
        # make 'iterable' which yields objects to be converted (e.g. CIFBlock, Entry)
63
        # set self._generator like this
64
        self._generator = self._map(iterable, self._X_to_PSet)
65
    """
66
67
    _DISORDER_OPTIONS = {'skip', 'ordered_sites', 'all_sites'}
68
69
    def __init__(
70
            self,
71
            remove_hydrogens=False,
72
            disorder='skip',
73
            heaviest_component=False,
74
            show_warnings=True,
75
    ):
76
77
        if disorder not in _Reader._DISORDER_OPTIONS:
78
            raise ValueError(f'disorder parameter {disorder} must be one of {_Reader._DISORDER_OPTIONS}')
79
80
        self.remove_hydrogens = remove_hydrogens
81
        self.disorder = disorder
82
        self.heaviest_component = heaviest_component
83
        self.show_warnings = show_warnings
84
        self.current_filename = None
85
        self._generator = []
86
87
    def __iter__(self):
88
        yield from self._generator
89
90
    def read_one(self):
91
        """Read the next (or first) item."""
92
        return next(iter(self._generator))
93
94
    def _map(self, func: Callable, iterable: Iterable) -> Iterable[PeriodicSet]:
95
        """Iterates over iterable, passing items through parser and yielding the result.
96
        Applies warning and include_if filters, catches bad structures and warns.
97
        """
98
99
        if not self.show_warnings:
100
            warnings.simplefilter('ignore')
101
102
        for item in iterable:
103
104
            with warnings.catch_warnings(record=True) as warning_msgs:
105
106
                parse_failed = False
107
                try:
108
                    periodic_set = func(item)
109
                except _ParseError as err:
110
                    parse_failed = str(err)
111
112
            if parse_failed:
113
                warnings.warn(parse_failed)
114
                continue
115
116
            for warning in warning_msgs:
117
                msg = f'{periodic_set.name}: {warning.message}'
118
                warnings.warn(msg, category=warning.category)
119
120
            if self.current_filename:
121
                periodic_set.tags['filename'] = self.current_filename
122
123
            yield periodic_set
124
125
126
class CifReader(_Reader):
127
    """Read all structures in a .cif file or all files in a folder 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
128
    with ase or csd-python-api (if installed), yielding 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
129
    :class:`.periodicset.PeriodicSet` s.
130
131
    Parameters
132
    ----------
133
    path : str
134
        Path to a .cif file or directory. (Other files are accepted when using
135
        ``reader='ccdc'``, if csd-python-api is installed.)
136
    reader : str, optional
137
        The backend package used for parsing. Default is :code:`ase`,
138
        to use csd-python-api change to :code:`ccdc`. The ccdc reader should 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
139
        be able to read any format accepted by :class:`ccdc.io.EntryReader`,
140
        though only cifs have been tested.
141
    remove_hydrogens : bool, optional
142
        Remove Hydrogens from the crystal.
143
    disorder : str, optional
144
        Controls how disordered structures are handled. Default is ``skip`` which skips any crystal 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
145
        with disorder, since disorder conflicts with the periodic set model. To read disordered 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
146
        structures anyway, choose either :code:`ordered_sites` to remove sites with disorder or 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
147
        :code:`all_sites` include all sites regardless.
148
    heaviest_component : bool, optional
149
        csd-python-api only. Removes all but the heaviest molecule in the asymmeric unit,
150
        intended for removing solvents. 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
151
    show_warnings : bool, optional
152
        Controls whether warnings that arise during reading are printed.
153
154
    Yields
155
    ------
156
    :class:`.periodicset.PeriodicSet`
157
        Represents the crystal as a periodic set, consisting of a finite set of points (motif)
158
        and lattice (unit cell). Contains other useful data, e.g. the crystal's name and
159
        information about the asymmetric unit for calculation.
160
161
    Examples
162
    --------
163
    
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
164
        ::
165
166
            # Put all crystals in a .CIF in a list
167
            structures = list(amd.CifReader('mycif.cif'))
168
169
            # Can also accept path to a directory, reading all files inside
170
            structures = list(amd.CifReader('path/to/folder'))
171
172
            # Reads just one if the .CIF has just one crystal
173
            periodic_set = amd.CifReader('mycif.cif').read_one()
174
175
            # List of AMDs (k=100) of crystals in a .CIF
176
            amds = [amd.AMD(periodic_set, 100) for periodic_set in amd.CifReader('mycif.cif')]
177
    """
178
179
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (7/5)
Loading history...
180
            self,
181
            path,
182
            reader='ase',
183
            remove_hydrogens=False,
184
            disorder='skip',
185
            heaviest_component=False,
186
            show_warnings=True,
187
    ):
188
189
        super().__init__(
190
            remove_hydrogens=remove_hydrogens,
191
            disorder=disorder,
192
            heaviest_component=heaviest_component,
193
            show_warnings=show_warnings,
194
        )
195
196
        if reader not in ('ase', 'ccdc'):
197
            raise ValueError(f'Invalid reader {reader}; must be ase or ccdc.')
198
199
        if reader == 'ase' and heaviest_component:
200
            raise NotImplementedError('Parameter heaviest_component not implimented for ase, only ccdc.')
201
202
        if reader == 'ase':
203
            extensions = {'cif'}
204
            file_parser = ase.io.cif.parse_cif
205
            converter = functools.partial(cifblock_to_periodicset,
206
                                          remove_hydrogens=remove_hydrogens,
207
                                          disorder=disorder)
208
209
        elif reader == 'ccdc':
210
            if not _CSD_PYTHON_API_ENABLED:
211
                raise ImportError("Failed to import csd-python-api; check it is installed and licensed.")
212
            extensions = ccdc.io.EntryReader.known_suffixes
213
            file_parser = ccdc.io.EntryReader
214
            converter = functools.partial(entry_to_periodicset,
215
                                          remove_hydrogens=remove_hydrogens,
216
                                          disorder=disorder,
217
                                          heaviest_component=heaviest_component)
218
219
        if os.path.isfile(path):
220
            generator = file_parser(path)
0 ignored issues
show
introduced by
The variable file_parser does not seem to be defined for all execution paths.
Loading history...
221
        elif os.path.isdir(path):
222
            generator = self._generate_from_dir(path, file_parser, extensions)
0 ignored issues
show
introduced by
The variable extensions does not seem to be defined for all execution paths.
Loading history...
223
        else:
224
            raise FileNotFoundError(f'No such file or directory: {path}')
225
226
        self._generator = self._map(converter, generator)
0 ignored issues
show
introduced by
The variable converter does not seem to be defined for all execution paths.
Loading history...
227
228
    def _generate_from_dir(self, path, file_parser, extensions):
229
        for file in os.listdir(path):
230
            suff = os.path.splitext(file)[1][1:]
231
            if suff.lower() in extensions:
232
                self.current_filename = file
233
                yield from file_parser(os.path.join(path, file))
234
235
236
class CSDReader(_Reader):
237
    """Read structures from the CSD with csd-python-api, yielding 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
238
    :class:`.periodicset.PeriodicSet` s.
239
240
    Parameters
241
    ----------
242
    refcodes : List[str], optional
243
        List of CSD refcodes to read. If None or 'CSD', iterates over the whole CSD.
244
    families : bool, optional
245
        Read all entries whose refcode starts with the given strings, or 'families' 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
246
        (e.g. giving 'DEBXIT' reads all entries starting with DEBXIT).
247
    remove_hydrogens : bool, optional
248
        Remove hydrogens from the crystal.
249
    disorder : str, optional
250
        Controls how disordered structures are handled. Default is ``skip`` which skips any crystal 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
251
        with disorder, since disorder conflicts with the periodic set model. To read disordered 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
252
        structures anyway, choose either :code:`ordered_sites` to remove sites with disorder or 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
253
        :code:`all_sites` include all sites regardless.
254
    heaviest_component : bool, optional
255
        csd-python-api only. Removes all but the heaviest molecule in the asymmeric unit,
256
        intended for removing solvents. 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
257
    show_warnings : bool, optional
258
        Controls whether warnings that arise during reading are printed.
259
260
    Yields
261
    ------
262
    :class:`.periodicset.PeriodicSet`
263
        Represents the crystal as a periodic set, consisting of a finite set of points (motif)
264
        and lattice (unit cell). Contains other useful data, e.g. the crystal's name and
265
        information about the asymmetric unit for calculation.
266
267
    Examples
268
    --------
269
    
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
270
        ::
271
272
            # Put these entries in a list
273
            refcodes = ['DEBXIT01', 'DEBXIT05', 'HXACAN01']
274
            structures = list(amd.CSDReader(refcodes))
275
276
            # Read refcode families (any whose refcode starts with strings in the list)
277
            refcode_families = ['ACSALA', 'HXACAN']
278
            structures = list(amd.CSDReader(refcode_families, families=True))
279
280
            # Get AMDs (k=100) for crystals in these families
281
            refcodes = ['ACSALA', 'HXACAN']
282
            amds = []
283
            for periodic_set in amd.CSDReader(refcodes, families=True):
284
                amds.append(amd.AMD(periodic_set, 100))
285
286
            # Giving the reader nothing reads from the whole CSD.
287
            reader = amd.CSDReader()
288
            
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
289
            # looping over this generic reader will yield all CSD entries
290
            for periodic_set in reader:
291
                ...
292
            
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
293
            # or, read structures by refcode on demand
294
            debxit01 = reader.entry('DEBXIT01')
295
    """
296
297
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (7/5)
Loading history...
298
            self,
299
            refcodes=None,
300
            families=False,
301
            remove_hydrogens=False,
302
            disorder='skip',
303
            heaviest_component=False,
304
            show_warnings=True,
305
    ):
306
307
        super().__init__(
308
            remove_hydrogens=remove_hydrogens,
309
            disorder=disorder,
310
            heaviest_component=heaviest_component,
311
            show_warnings=show_warnings,
312
        )
313
314
        if not _CSD_PYTHON_API_ENABLED:
315
            raise ImportError('Failed to import csd-python-api; check it is installed and licensed.')
316
317
        if isinstance(refcodes, str) and refcodes.lower() == 'csd':
318
            refcodes = None
319
320
        if refcodes is None:
321
            families = False
322
        else:
323
            refcodes = [refcodes] if isinstance(refcodes, str) else list(refcodes)
324
325
        # families parameter reads all crystals with ids starting with passed refcodes
326
        if families:
327
            all_refcodes = []
328
            for refcode in refcodes:
329
                query = ccdc.search.TextNumericSearch()
330
                query.add_identifier(refcode)
331
                hits = [hit.identifier for hit in query.search()]
332
                all_refcodes.extend(hits)
333
334
            # filter to unique refcodes
335
            seen = set()
336
            seen_add = seen.add
337
            refcodes = [
338
                refcode for refcode in all_refcodes
339
                if not (refcode in seen or seen_add(refcode))]
340
341
        self._entry_reader = ccdc.io.EntryReader('CSD')
342
343
        converter = functools.partial(entry_to_periodicset,
344
                                      remove_hydrogens=remove_hydrogens,
345
                                      disorder=disorder,
346
                                      heaviest_component=heaviest_component)
347
348
        generator = self._ccdc_generator(refcodes)
349
        self._generator = self._map(converter, generator)
350
351
    def entry(self, refcode: str, **kwargs) -> PeriodicSet:
352
        """Read a crystal given a CSD refcode, returning a :class:`.periodicset.PeriodicSet`."""
353
354
        entry = self._entry_reader.entry(refcode)
355
        periodic_set = entry_to_periodicset(entry, **kwargs)
356
        return periodic_set
357
358
    def _ccdc_generator(self, refcodes):
359
        """Generates ccdc Entries from CSD refcodes."""
360
361
        if refcodes is None:
362
            for entry in self._entry_reader:
363
                yield entry
364
        else:
365
            for refcode in refcodes:
366
                try:
367
                    entry = self._entry_reader.entry(refcode)
368
                    yield entry
369
                except RuntimeError:    # if self.show_warnings?
370
                    warnings.warn(f'Identifier {refcode} not found in database')
371
372
373
def entry_to_periodicset(
374
        entry,
375
        remove_hydrogens=False,
376
        disorder='skip',
377
        heaviest_component=False
378
) -> PeriodicSet:
379
    """:class:`ccdc.entry.Entry` --> :class:`amd.periodicset.PeriodicSet`.
380
    Entry is the type returned by :class:`ccdc.io.EntryReader`."""
381
382
    crystal = entry.crystal
383
384
    if not entry.has_3d_structure:
385
        raise _ParseError(f'{crystal.identifier}: Has no 3D structure')
386
387
    molecule = crystal.disordered_molecule
388
389
    if disorder == 'skip':
390
        if crystal.has_disorder or entry.has_disorder or \
391
            any(_atom_has_disorder(a.label, a.occupancy) for a in molecule.atoms):
392
            raise _ParseError(f'{crystal.identifier}: Has disorder')
393
394
    elif disorder == 'ordered_sites':
395
        molecule.remove_atoms(a for a in molecule.atoms
396
                              if _atom_has_disorder(a.label, a.occupancy))
397
398
    if remove_hydrogens:
399
        molecule.remove_atoms(a for a in molecule.atoms if a.atomic_symbol in 'HD')
400
401
    if heaviest_component and len(molecule.components) > 1:
402
        molecule = _heaviest_component(molecule)
403
404
    if not molecule.all_atoms_have_sites or \
405
        any(a.fractional_coordinates is None for a in molecule.atoms):
406
        raise _ParseError(f'{crystal.identifier}: Has atoms without sites')
407
408
    crystal.molecule = molecule
409
    asym_atoms = crystal.asymmetric_unit_molecule.atoms
410
    asym_unit = np.array([tuple(a.fractional_coordinates) for a in asym_atoms])
411
    asym_unit = np.mod(asym_unit, 1)
412
    asym_types = [a.atomic_number for a in asym_atoms]
413
    cell = utils.cellpar_to_cell(*crystal.cell_lengths, *crystal.cell_angles)
414
415
    sitesym = crystal.symmetry_operators
416
    if not sitesym:
417
        sitesym = ['x,y,z', ]
418
419
    if disorder != 'all_sites':
420
        keep_sites = _unique_sites(asym_unit)
421
        if not np.all(keep_sites):
422
            warnings.warn(f'{crystal.identifier}: May have overlapping sites; duplicates will be removed')
423
        asym_unit = asym_unit[keep_sites]
424
        asym_types = [sym for sym, keep in zip(asym_types, keep_sites) if keep]
425
426
    if asym_unit.shape[0] == 0:
427
        raise _ParseError(f'{crystal.identifier}: Has no valid sites')
428
429
    frac_motif, asym_inds, multiplicities, inverses = _expand_asym_unit(asym_unit, sitesym)
430
    full_types = np.array([asym_types[i] for i in inverses])
431
    motif = frac_motif @ cell
432
433
    tags = {
434
        'name': crystal.identifier,
435
        'asymmetric_unit': asym_inds,
436
        'wyckoff_multiplicities': multiplicities,
437
        'types': full_types
438
    }
439
440
    return PeriodicSet(motif, cell, **tags)
441
442
443
def cifblock_to_periodicset(
444
        block,
445
        remove_hydrogens=False,
446
        disorder='skip'
447
) -> PeriodicSet:
448
    """:class:`ase.io.cif.CIFBlock` --> :class:`amd.periodicset.PeriodicSet`. 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
449
    CIFBlock is the type returned by :class:`ase.io.cif.parse_cif`.
450
    """
451
452
    cell = block.get_cell().array
453
454
    # asymmetric unit fractional coords
455
    asym_unit = [block.get(name) for name in _ATOM_SITE_FRACT_TAGS]
456
    if None in asym_unit:
457
        asym_motif = [block.get(name) for name in _ATOM_SITE_CARTN_TAGS]
458
        if None in asym_motif:
459
            raise _ParseError(f'{block.name}: Has no sites')
460
        asym_unit = np.array(asym_motif) @ np.linalg.inv(cell)
461
    asym_unit = np.mod(np.array(asym_unit).T, 1)
462
463
    try:
464
        asym_types = [ase.data.atomic_numbers[s] for s in block.get_symbols()]
465
    except ase.io.cif.NoStructureData as _:
466
        asym_types = [0 for _ in range(len(asym_unit))]
467
468
    sitesym = ['x,y,z', ]
469
    for tag in _SYMOP_TAGS:
470
        if tag in block:
471
            sitesym = block[tag]
472
            break
473
    if isinstance(sitesym, str):
474
        sitesym = [sitesym]
475
476
    remove_sites = []
477
478
    occupancies = block.get('_atom_site_occupancy')
479
    labels = block.get('_atom_site_label')
480
    if occupancies is not None:
481
        if disorder == 'skip':
482
            if any(_atom_has_disorder(lab, occ) for lab, occ in zip(labels, occupancies)):
483
                raise _ParseError(f'{block.name}: Has disorder')
484
        elif disorder == 'ordered_sites':
485
            remove_sites.extend(
486
                (i for i, (lab, occ) in enumerate(zip(labels, occupancies))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable i does not seem to be defined.
Loading history...
487
                 if _atom_has_disorder(lab, occ)))
488
489
    if remove_hydrogens:
490
        remove_sites.extend((i for i, num in enumerate(asym_types) if num == 1))
491
492
    asym_unit = np.delete(asym_unit, remove_sites, axis=0)
493
    asym_types = [s for i, s in enumerate(asym_types) if i not in remove_sites]
494
495
    if disorder != 'all_sites':
496
        keep_sites = _unique_sites(asym_unit)
497
        if not np.all(keep_sites):
498
            warnings.warn(f'{block.name}: May have overlapping sites; duplicates will be removed')
499
        asym_unit = asym_unit[keep_sites]
500
        asym_types = [sym for sym, keep in zip(asym_types, keep_sites) if keep]
501
502
    if asym_unit.shape[0] == 0:
503
        raise _ParseError(f'{block.name}: Has no valid sites')
504
505
    frac_motif, asym_inds, multiplicities, inverses = _expand_asym_unit(asym_unit, sitesym)
506
    full_types = np.array([asym_types[i] for i in inverses])
507
    motif = frac_motif @ cell
508
509
    tags = {
510
        'name': block.name,
511
        'asymmetric_unit': asym_inds,
512
        'wyckoff_multiplicities': multiplicities,
513
        'types': full_types
514
    }
515
516
    return PeriodicSet(motif, cell, **tags)
517
518
519
def _expand_asym_unit(
520
        asym_unit: np.ndarray,
521
        sitesym: Sequence[str]
522
) -> Tuple[np.ndarray, ...]:
523
    """
524
    Asymmetric unit's fractional coords + site symmetries (as strings)
525
    -->
526
    fractional motif, asymmetric unit indices, multiplicities and inverses.
527
    """
528
529
    rotations, translations = parse_sitesym(sitesym)
530
    all_sites = []
531
    asym_inds = [0]
532
    multiplicities = []
533
    inverses = []
534
535
    for inv, site in enumerate(asym_unit):
536
        multiplicity = 0
537
538
        for rot, trans in zip(rotations, translations):
539
            site_ = np.mod(np.dot(rot, site) + trans, 1)
540
541
            if not all_sites:
542
                all_sites.append(site_)
543
                inverses.append(inv)
544
                multiplicity += 1
545
                continue
546
547
            # check if site_ overlaps with existing sites
548
            diffs1 = np.abs(site_ - all_sites)
549
            diffs2 = np.abs(diffs1 - 1)
550
            mask = np.all((diffs1 <= _EQUIV_SITE_TOL) | (diffs2 <= _EQUIV_SITE_TOL), axis=-1)
551
552
            if np.any(mask):
553
                where_equal = np.argwhere(mask).flatten()
554
                for ind in where_equal:
555
                    if inverses[ind] == inv:
556
                        pass
557
                    else:
558
                        warnings.warn(f'Equivalent sites at positions {inverses[ind]}, {inv}')
559
            else:
560
                all_sites.append(site_)
561
                inverses.append(inv)
562
                multiplicity += 1
563
564
        if multiplicity > 0:
565
            multiplicities.append(multiplicity)
566
            asym_inds.append(len(all_sites))
567
568
    frac_motif = np.array(all_sites)
569
    asym_inds = np.array(asym_inds[:-1])
570
    multiplicities = np.array(multiplicities)
571
    return frac_motif, asym_inds, multiplicities, inverses
572
573
574
def _atom_has_disorder(label, occupancy):
575
    """Return True if atom has disorder and False otherwise."""
576
    return label.endswith('?') or (np.isscalar(occupancy) and occupancy < 1)
577
578
579
def _unique_sites(asym_unit):
580
    site_diffs1 = np.abs(asym_unit[:, None] - asym_unit)
581
    site_diffs2 = np.abs(site_diffs1 - 1)
582
    overlapping = np.triu(np.all(
583
        (site_diffs1 <= _EQUIV_SITE_TOL) | (site_diffs2 <= _EQUIV_SITE_TOL),
584
        axis=-1), 1)
585
    return ~overlapping.any(axis=0)
586
587
588
def _heaviest_component(molecule):
589
    """Heaviest component (removes all but the heaviest component of the asym unit).
590
    Intended for removing solvents. Probably doesn't play well with disorder"""
591
    component_weights = []
592
    for component in molecule.components:
593
        weight = 0
594
        for a in component.atoms:
595
            if isinstance(a.atomic_weight, (float, int)):
596
                if isinstance(a.occupancy, (float, int)):
597
                    weight += a.occupancy * a.atomic_weight
598
                else:
599
                    weight += a.atomic_weight
600
        component_weights.append(weight)
601
    largest_component_ind = np.argmax(np.array(component_weights))
602
    molecule = molecule.components[largest_component_ind]
603
    return molecule
604