Test Failed
Push — master ( ba7657...6f99a2 )
by Daniel
07:11
created

amd.io.frac_molecular_centres()   A

Complexity

Conditions 2

Size

Total Lines 13
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 10
dl 0
loc 13
rs 9.9
c 0
b 0
f 0
cc 2
nop 1
1
"""Tools for reading crystals from files, or from the CSD with ``csd-python-api``. 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
2
The readers return :class:`.periodicset.PeriodicSet` objects representing the 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
3
crystal which can be passed to :func:`.calculate.AMD` and :func:`.calculate.PDD` 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
4
to get their invariants.
5
"""
6
7
import os
8
import functools
9
import warnings
10
from typing import Callable, Iterable, Sequence, Tuple
11
12
import numpy as np
13
import numba
14
import ase.io.cif
15
import ase.data
16
from ase.spacegroup.spacegroup import parse_sitesym
17
18
from .utils import cellpar_to_cell
19
from .periodicset import PeriodicSet
20
21
try:
22
    import ccdc.io
23
    import ccdc.search
24
    _CSD_PYTHON_API_ENABLED = True
25
except (ImportError, RuntimeError) as _:
26
    _CSD_PYTHON_API_ENABLED = False
27
28
def _custom_warning(message, category, filename, lineno, *args, **kwargs):
0 ignored issues
show
Unused Code introduced by
The argument filename seems to be unused.
Loading history...
Unused Code introduced by
The argument lineno seems to be unused.
Loading history...
Unused Code introduced by
The argument args seems to be unused.
Loading history...
Unused Code introduced by
The argument kwargs seems to be unused.
Loading history...
29
    return f'{category.__name__}: {message}\n'
30
31
warnings.formatwarning = _custom_warning
32
33
_EQUIV_SITE_TOL = 1e-3
34
_DISORDER_OPTIONS = {'skip', 'ordered_sites', 'all_sites'}
35
_ATOM_SITE_FRACT_TAGS = [
36
    '_atom_site_fract_x',
37
    '_atom_site_fract_y',
38
    '_atom_site_fract_z',
39
]
40
_ATOM_SITE_CARTN_TAGS = [
41
    '_atom_site_cartn_x',
42
    '_atom_site_cartn_y',
43
    '_atom_site_cartn_z',
44
]
45
_SYMOP_TAGS = [
46
    '_space_group_symop_operation_xyz',
47
    '_space_group_symop.operation_xyz',
48
    '_symmetry_equiv_pos_as_xyz',
49
]
50
51
52
class _Reader:
53
54
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
55
            self,
56
            remove_hydrogens=False,
57
            disorder='skip',
58
            heaviest_component=False,
59
            molecular_centres=False,
60
            show_warnings=True
61
    ):
62
63
        if disorder not in _DISORDER_OPTIONS:
64
            raise ValueError(f'disorder parameter {disorder} must be one of {_DISORDER_OPTIONS}')
65
66
        self.remove_hydrogens = remove_hydrogens
67
        self.disorder = disorder
68
        self.heaviest_component = heaviest_component
69
        self.molecular_centres = molecular_centres
70
        self.show_warnings = show_warnings
71
        self._generator = []
72
73
    def __iter__(self):
74
        yield from self._generator
75
76
    def read_one(self):
77
        """Read the first, and usually the only, item."""
78
        try:
79
            return next(iter(self._generator))
80
        except StopIteration:
81
            return None
82
83
84
class CifReader(_Reader):
85
    """Read all structures in a .cif file or all files in a folder 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
86
    with ase or csd-python-api (if installed), yielding 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
87
    :class:`.periodicset.PeriodicSet` s.
88
89
    Parameters
90
    ----------
91
    path : str
92
        Path to a .cif file or directory. (Other files are accepted when using
93
        ``reader='ccdc'``, if csd-python-api is installed.)
94
    reader : str, optional
95
        The backend package used for parsing. Default is :code:`ase`,
96
        to use csd-python-api change to :code:`ccdc`. The ccdc reader should 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
97
        be able to read any format accepted by :class:`ccdc.io.EntryReader`,
98
        though only cifs have been tested.
99
    remove_hydrogens : bool, optional
100
        Remove Hydrogens from the crystal.
101
    disorder : str, optional
102
        Controls how disordered structures are handled. Default is ``skip`` which skips any crystal 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
103
        with disorder, since disorder conflicts with the periodic set model. To read disordered 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
104
        structures anyway, choose either :code:`ordered_sites` to remove sites with disorder or 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
105
        :code:`all_sites` include all sites regardless.
106
    heaviest_component : bool, optional
107
        csd-python-api only. Removes all but the heaviest molecule in the asymmeric unit,
108
        intended for removing solvents.
109
    molecular_centres : bool, default False
110
        csd-python-api only. Extract the centres of molecules in the unit cell and store 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
111
        in attribute molecular_centres.
112
    show_warnings : bool, optional
113
        Controls whether warnings that arise during reading are printed.
114
115
    Yields
116
    ------
117
    :class:`.periodicset.PeriodicSet`
118
        Represents the crystal as a periodic set, consisting of a finite set of points (motif)
119
        and lattice (unit cell). Contains other useful data, e.g. the crystal's name and
120
        information about the asymmetric unit for calculation.
121
122
    Examples
123
    --------
124
    
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
125
        ::
126
127
            # Put all crystals in a .CIF in a list
128
            structures = list(amd.CifReader('mycif.cif'))
129
130
            # Can also accept path to a directory, reading all files inside
131
            structures = list(amd.CifReader('path/to/folder'))
132
133
            # Reads just one if the .CIF has just one crystal
134
            periodic_set = amd.CifReader('mycif.cif').read_one()
135
136
            # List of AMDs (k=100) of crystals in a .CIF
137
            amds = [amd.AMD(periodic_set, 100) for periodic_set in amd.CifReader('mycif.cif')]
138
    """
139
140
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (8/5)
Loading history...
141
            self,
142
            path,
143
            reader='ase',
144
            remove_hydrogens=False,
145
            disorder='skip',
146
            heaviest_component=False,
147
            molecular_centres=False,
148
            show_warnings=True,
149
    ):
150
151
        super().__init__(
152
            remove_hydrogens=remove_hydrogens,
153
            disorder=disorder,
154
            heaviest_component=heaviest_component,
155
            molecular_centres=molecular_centres,
156
            show_warnings=show_warnings
157
        )
158
159
        if reader in ('ase', 'pycodcif'):
160
161
            if heaviest_component:
162
                raise NotImplementedError('Parameter heaviest_component only implimented for reader="ccdc".')
163
            if molecular_centres:
164
                raise NotImplementedError('Parameter molecular_centres only implimented for reader="ccdc".')
165
166
            extensions = {'cif'}
167
            file_parser = functools.partial(ase.io.cif.parse_cif, reader=reader)
168
            converter = functools.partial(
169
                cifblock_to_periodicset,
170
                remove_hydrogens=self.remove_hydrogens,
171
                disorder=self.disorder
172
            )
173
174
        elif reader == 'ccdc':
175
176
            if not _CSD_PYTHON_API_ENABLED:
177
                raise ImportError("Failed to import csd-python-api; check it is installed and licensed.")
178
179
            extensions = ccdc.io.EntryReader.known_suffixes
180
            file_parser = ccdc.io.EntryReader
181
            converter = functools.partial(
182
                entry_to_periodicset,
183
                remove_hydrogens=self.remove_hydrogens,
184
                disorder=self.disorder,
185
                molecular_centres=self.molecular_centres,
186
                heaviest_component=self.heaviest_component
187
            )
188
189
        else:
190
            raise ValueError(f'Invalid reader {reader}.')
191
192
        if os.path.isfile(path):
193
            generator = file_parser(path)
194
        elif os.path.isdir(path):
195
            generator = self._generate_from_dir(path, file_parser, extensions)
196
        else:
197
            raise FileNotFoundError(f'No such file or directory: {path}')
198
199
        self._generator = _map(converter, generator, self.show_warnings)
200
201
    def _generate_from_dir(self, path, file_parser, extensions):
0 ignored issues
show
Coding Style introduced by
This method could be written as a function/class method.

If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example

class Foo:
    def some_method(self, x, y):
        return x + y;

could be written as

class Foo:
    @classmethod
    def some_method(cls, x, y):
        return x + y;
Loading history...
202
        for file in os.listdir(path):
203
            suff = os.path.splitext(file)[1][1:]
204
            if suff.lower() in extensions:
205
                yield from file_parser(os.path.join(path, file))
206
207
208
class CSDReader(_Reader):
209
    """Read structures from the CSD with csd-python-api, yielding 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
210
    :class:`.periodicset.PeriodicSet` s.
211
212
    Parameters
213
    ----------
214
    refcodes : List[str], optional
215
        List of CSD refcodes to read. If None or 'CSD', iterates over the whole CSD.
216
    families : bool, optional
217
        Read all entries whose refcode starts with the given strings, or 'families' 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
218
        (e.g. giving 'DEBXIT' reads all entries starting with DEBXIT).
219
    remove_hydrogens : bool, optional
220
        Remove hydrogens from the crystal.
221
    disorder : str, optional
222
        Controls how disordered structures are handled. Default is ``skip`` which skips any crystal 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
223
        with disorder, since disorder conflicts with the periodic set model. To read disordered 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
224
        structures anyway, choose either :code:`ordered_sites` to remove sites with disorder or 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
225
        :code:`all_sites` include all sites regardless.
226
    heaviest_component : bool, optional
227
        Removes all but the heaviest molecule in the asymmeric unit, intended for removing solvents.
228
    molecular_centres : bool, default False
229
        Extract the centres of molecules in the unit cell and store in attribute molecular_centres.
230
    show_warnings : bool, optional
231
        Controls whether warnings that arise during reading are printed.
232
233
    Yields
234
    ------
235
    :class:`.periodicset.PeriodicSet`
236
        Represents the crystal as a periodic set, consisting of a finite set of points (motif)
237
        and lattice (unit cell). Contains other useful data, e.g. the crystal's name and
238
        information about the asymmetric unit for calculation.
239
240
    Examples
241
    --------
242
    
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
243
        ::
244
245
            # Put these entries in a list
246
            refcodes = ['DEBXIT01', 'DEBXIT05', 'HXACAN01']
247
            structures = list(amd.CSDReader(refcodes))
248
249
            # Read refcode families (any whose refcode starts with strings in the list)
250
            refcode_families = ['ACSALA', 'HXACAN']
251
            structures = list(amd.CSDReader(refcode_families, families=True))
252
253
            # Get AMDs (k=100) for crystals in these families
254
            refcodes = ['ACSALA', 'HXACAN']
255
            amds = []
256
            for periodic_set in amd.CSDReader(refcodes, families=True):
257
                amds.append(amd.AMD(periodic_set, 100))
258
259
            # Giving the reader nothing reads from the whole CSD.
260
            reader = amd.CSDReader()
261
            
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
262
            # looping over this generic reader will yield all CSD entries
263
            for periodic_set in reader:
264
                ...
265
            
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
266
            # or, read structures by refcode on demand
267
            debxit01 = reader.entry('DEBXIT01')
268
    """
269
270
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (8/5)
Loading history...
271
            self,
272
            refcodes=None,
273
            families=False,
274
            remove_hydrogens=False,
275
            disorder='skip',
276
            heaviest_component=False,
277
            molecular_centres=False,
278
            show_warnings=True,
279
    ):
280
281
        super().__init__(
282
            remove_hydrogens=remove_hydrogens,
283
            disorder=disorder,
284
            heaviest_component=heaviest_component,
285
            molecular_centres=molecular_centres,
286
            show_warnings=show_warnings,
287
        )
288
289
        if not _CSD_PYTHON_API_ENABLED:
290
            raise ImportError('Failed to import csd-python-api; check it is installed and licensed.')
291
292
        if isinstance(refcodes, str) and refcodes.lower() == 'csd':
293
            refcodes = None
294
295
        if refcodes is None:
296
            families = False
297
        else:
298
            refcodes = [refcodes] if isinstance(refcodes, str) else list(refcodes)
299
300
        if families:
301
            refcodes = _refcodes_from_families(refcodes)
302
303
        self._entry_reader = ccdc.io.EntryReader('CSD')
304
        converter = functools.partial(entry_to_periodicset,
305
                                      remove_hydrogens=self.remove_hydrogens,
306
                                      disorder=self.disorder,
307
                                      molecular_centres=self.molecular_centres,
308
                                      heaviest_component=self.heaviest_component)
309
310
        generator = self._ccdc_generator(refcodes)
311
        self._generator = _map(converter, generator, self.show_warnings)
312
313
    def entry(self, refcode: str, **kwargs) -> PeriodicSet:
314
        """Read a crystal given a CSD refcode, returning a :class:`.periodicset.PeriodicSet`.
315
        If given kwargs, overrides the kwargs given to the Reader."""
316
317
        try:
318
            entry = self._entry_reader.entry(refcode)
319
        except RuntimeError:
320
            warnings.warn(f'{refcode} not found in database')
321
322
        kwargs_ = {
323
            'remove_hydrogens': self.remove_hydrogens,
324
            'disorder': self.disorder,
325
            'heaviest_component': self.heaviest_component
326
        }
327
        kwargs_.update(kwargs)
328
        converter = functools.partial(entry_to_periodicset, **kwargs_)
329
330
        if 'show_warnings' in kwargs:
0 ignored issues
show
Unused Code introduced by
Consider using dict.get for getting values from a dict if a key is present or a default if not
Loading history...
331
            show_warnings = kwargs['show_warnings']
332
        else:
333
            show_warnings = self.show_warnings
334
335
        try:
336
            periodic_set = next(iter(_map(converter, [entry], show_warnings)))
337
        except StopIteration:
338
            periodic_set = None
339
340
        return periodic_set
341
342
    def family(self, refcode_family: str, **kwargs):
0 ignored issues
show
introduced by
Missing function or method docstring
Loading history...
343
344
        kwargs_ = {
345
            'remove_hydrogens': self.remove_hydrogens,
346
            'disorder': self.disorder,
347
            'heaviest_component': self.heaviest_component,
348
            'molecular_centres': self.molecular_centres
349
        }
350
351
        kwargs_.update(kwargs)
352
        converter = functools.partial(entry_to_periodicset, **kwargs_)
353
        refcodes = _refcodes_from_families([refcode_family])
354
        generator = self._ccdc_generator(refcodes)
355
356
        if 'show_warnings' in kwargs:
0 ignored issues
show
Unused Code introduced by
Consider using dict.get for getting values from a dict if a key is present or a default if not
Loading history...
357
            show_warnings = kwargs['show_warnings']
358
        else:
359
            show_warnings = self.show_warnings
360
361
        yield from _map(converter, generator, show_warnings)
362
363
    def _ccdc_generator(self, refcodes):
364
        """Generates ccdc Entries from CSD refcodes."""
365
366
        if refcodes is None:
367
            for entry in self._entry_reader:
368
                yield entry
369
        else:
370
            for refcode in refcodes:
371
                try:
372
                    entry = self._entry_reader.entry(refcode)
373
                    yield entry
374
                except RuntimeError:
375
                    warnings.warn(f'{refcode} not found in database')
376
377
378
class _ParseError(ValueError):
379
    """Raised when an item cannot be parsed into a periodic set."""
380
    pass
0 ignored issues
show
Unused Code introduced by
Unnecessary pass statement
Loading history...
381
382
383
def entry_to_periodicset(
384
        entry,
385
        remove_hydrogens=False,
386
        disorder='skip',
387
        heaviest_component=False,
388
        molecular_centres=False
389
) -> PeriodicSet:
390
    """:class:`ccdc.entry.Entry` --> :class:`amd.periodicset.PeriodicSet`.
391
    Entry is the type returned by :class:`ccdc.io.EntryReader`.
392
393
    Parameters
394
    ----------
395
    entry : :class:`ccdc.entry.Entry`
396
        A ccdc Entry object representing a database entry.
397
    remove_hydrogens : bool, optional
398
        Remove Hydrogens from the crystal.
399
    disorder : str, optional
400
        Controls how disordered structures are handled. Default is ``skip`` which skips any crystal 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
401
        with disorder, since disorder conflicts with the periodic set model. To read disordered 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
402
        structures anyway, choose either :code:`ordered_sites` to remove sites with disorder or 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
403
        :code:`all_sites` include all sites regardless.
404
    heaviest_component : bool, optional
405
        Removes all but the heaviest molecule in the asymmeric unit,
406
        intended for removing solvents.
407
    molecular_centres : bool, default False
408
        Extract the centres of molecules in the unit cell and store in attribute molecular_centres.
409
410
    Returns
411
    -------
412
    :class:`.periodicset.PeriodicSet`
413
        Represents the crystal as a periodic set, consisting of a finite set of points (motif)
414
        and lattice (unit cell). Contains other useful data, e.g. the crystal's name and
415
        information about the asymmetric unit for calculation.
416
417
    Raises
418
    ------
419
    _ParseError :
420
        Raised if the structure can/should not be parsed for the following reasons:
421
        1. entry.has_3d_structure is False,
422
        2. disorder == 'skip' and any of:
423
            (a) any disorder flag is True,
424
            (b) any atom has fractional occupancy,
425
            (c) any atom's label ends with '?',
426
        3. entry.crystal.molecule.all_atoms_have_sites is False,
427
        4. a.fractional_coordinates is None for any a in entry.crystal.disordered_molecule,
428
        5. motif is empty after removing H, disordered sites or solvents.
429
    """
430
431
    crystal = entry.crystal
432
433
    if not entry.has_3d_structure:
434
        raise _ParseError(f'{crystal.identifier} has no 3D structure')
435
436
    molecule = crystal.disordered_molecule
437
438
    if disorder == 'skip':
439
        if crystal.has_disorder or entry.has_disorder or \
440
            any(_atom_has_disorder(a.label, a.occupancy) for a in molecule.atoms):
441
            raise _ParseError(f'{crystal.identifier} has disorder')
442
443
    elif disorder == 'ordered_sites':
444
        molecule.remove_atoms(a for a in molecule.atoms
445
                              if _atom_has_disorder(a.label, a.occupancy))
446
447
    if remove_hydrogens:
448
        molecule.remove_atoms(a for a in molecule.atoms if a.atomic_symbol in 'HD')
449
450
    if heaviest_component and len(molecule.components) > 1:
451
        molecule = _heaviest_component(molecule)
452
453
    # nonsensical results are likely if not all atoms have sites, but attempt to read anyway
454
    if any(a.fractional_coordinates is None for a in molecule.atoms):
455
        warnings.warn(f'has atoms without sites')
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
456
        molecule.remove_atoms(a for a in molecule.atoms if a.fractional_coordinates is None)
457
        # raise _ParseError(f'{crystal.identifier} has atoms without sites')
458
459
    if not molecule.all_atoms_have_sites:
460
        raise _ParseError(f'{crystal.identifier} has atoms without sites')
461
462
    crystal.molecule = molecule
463
    asym_atoms = crystal.asymmetric_unit_molecule.atoms
464
    asym_unit = np.array([tuple(a.fractional_coordinates) for a in asym_atoms])
465
    asym_unit = np.mod(asym_unit, 1)
466
    asym_types = [a.atomic_number for a in asym_atoms]
467
    cell = cellpar_to_cell(*crystal.cell_lengths, *crystal.cell_angles)
468
469
    sitesym = crystal.symmetry_operators
470
    if not sitesym:
471
        sitesym = ['x,y,z']
472
473
    if disorder != 'all_sites':
474
        keep_sites = _unique_sites(asym_unit)
475
        if not np.all(keep_sites):
476
            warnings.warn(f'may have overlapping sites; duplicates will be removed')
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
477
        asym_unit = asym_unit[keep_sites]
478
        asym_types = [sym for sym, keep in zip(asym_types, keep_sites) if keep]
479
480
    if asym_unit.shape[0] == 0:
481
        raise _ParseError(f'{crystal.identifier} has no valid sites')
482
483
    frac_motif, asym_inds, multiplicities, inverses = _expand_asym_unit(asym_unit, sitesym)
484
    full_types = np.array([asym_types[i] for i in inverses])
485
    motif = frac_motif @ cell
486
487
    kwargs = {
488
        'name': crystal.identifier,
489
        'asymmetric_unit': asym_inds,
490
        'wyckoff_multiplicities': multiplicities,
491
        'types': full_types
492
    }
493
494
    periodic_set = PeriodicSet(motif, cell, **kwargs)
495
496
    if molecular_centres:
497
        frac_centres = frac_molecular_centres(entry.crystal)
498
        periodic_set.molecular_centres = frac_centres @ cell
499
500
    return periodic_set
501
502
503
def cifblock_to_periodicset(
504
        block,
505
        remove_hydrogens=False,
506
        disorder='skip'
507
) -> PeriodicSet:
508
    """:class:`ase.io.cif.CIFBlock` --> :class:`amd.periodicset.PeriodicSet`. 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
509
    CIFBlock is the type returned by :class:`ase.io.cif.parse_cif`.
510
511
    Parameters
512
    ----------
513
    block : :class:`ase.io.cif.CIFBlock`
514
        An ase CIFBlock object representing a crystal.
515
    remove_hydrogens : bool, optional
516
        Remove Hydrogens from the crystal.
517
    disorder : str, optional
518
        Controls how disordered structures are handled. Default is ``skip`` which skips any crystal 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
519
        with disorder, since disorder conflicts with the periodic set model. To read disordered 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
520
        structures anyway, choose either :code:`ordered_sites` to remove sites with disorder or 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
521
        :code:`all_sites` include all sites regardless.
522
523
    Returns
524
    -------
525
    :class:`.periodicset.PeriodicSet`
526
        Represents the crystal as a periodic set, consisting of a finite set of points (motif)
527
        and lattice (unit cell). Contains other useful data, e.g. the crystal's name and
528
        information about the asymmetric unit for calculation.
529
530
    Raises
531
    ------
532
    _ParseError
533
        Raised if the structure can/should not be parsed for the following reasons:
534
        1. no sites found or motif is empty after removing H or disordered sites,
535
        2. a site has missing coordinates,
536
        3. disorder == 'skip' and any of:
537
            (a) any atom has fractional occupancy,
538
            (b) any atom's label ends with '?'.
539
    """
540
541
    cell = block.get_cell().array
542
543
    # asymmetric unit fractional coords
544
    asym_unit = [block.get(name) for name in _ATOM_SITE_FRACT_TAGS]
545
    if None in asym_unit:
546
        asym_motif = [block.get(name) for name in _ATOM_SITE_CARTN_TAGS]
547
        if None in asym_motif:
548
            raise _ParseError(f'{block.name} has no sites')
549
        asym_unit = np.array(asym_motif) @ np.linalg.inv(cell)
550
551
    if any(None in coords for coords in asym_unit):
552
        raise _ParseError(f'{block.name} has atoms without sites')
553
554
    asym_unit = np.mod(np.array(asym_unit).T, 1)
555
556
    try:
557
        asym_types = [ase.data.atomic_numbers[s] for s in block.get_symbols()]
558
    except ase.io.cif.NoStructureData as _:
559
        asym_types = [0 for _ in range(len(asym_unit))]
560
561
    sitesym = ['x,y,z', ]
562
    for tag in _SYMOP_TAGS:
563
        if tag in block:
564
            sitesym = block[tag]
565
            break
566
    if isinstance(sitesym, str):
567
        sitesym = [sitesym]
568
569
    remove_sites = []
570
571
    occupancies = block.get('_atom_site_occupancy')
572
    labels = block.get('_atom_site_label')
573
    if occupancies is not None:
574
        if disorder == 'skip':
575
            if any(_atom_has_disorder(lab, occ) for lab, occ in zip(labels, occupancies)):
576
                raise _ParseError(f'{block.name} has disorder')
577
        elif disorder == 'ordered_sites':
578
            remove_sites.extend(
579
                (i for i, (lab, occ) in enumerate(zip(labels, occupancies))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable i does not seem to be defined.
Loading history...
580
                 if _atom_has_disorder(lab, occ)))
581
582
    if remove_hydrogens:
583
        remove_sites.extend((i for i, num in enumerate(asym_types) if num == 1))
584
585
    asym_unit = np.delete(asym_unit, remove_sites, axis=0)
586
    asym_types = [s for i, s in enumerate(asym_types) if i not in remove_sites]
587
588
    if disorder != 'all_sites':
589
        keep_sites = _unique_sites(asym_unit)
590
        if not np.all(keep_sites):
591
            warnings.warn(f'may have overlapping sites; duplicates will be removed')
0 ignored issues
show
introduced by
Using an f-string that does not have any interpolated variables
Loading history...
592
        asym_unit = asym_unit[keep_sites]
593
        asym_types = [sym for sym, keep in zip(asym_types, keep_sites) if keep]
594
595
    if asym_unit.shape[0] == 0:
596
        raise _ParseError(f'{block.name} has no valid sites')
597
598
    frac_motif, asym_inds, multiplicities, inverses = _expand_asym_unit(asym_unit, sitesym)
599
    full_types = np.array([asym_types[i] for i in inverses])
600
    motif = frac_motif @ cell
601
602
    kwargs = {
603
        'name': block.name,
604
        'asymmetric_unit': asym_inds,
605
        'wyckoff_multiplicities': multiplicities,
606
        'types': full_types
607
    }
608
609
    return PeriodicSet(motif, cell, **kwargs)
610
611
612
def frac_molecular_centres(crystal):
613
    """Returns any geometric centres of molecules in the unit cell.
614
    Expects a ccdc Crystal object and returns fractional coordiantes."""
615
616
    frac_centres = []
617
    for comp in crystal.packing(inclusion='CentroidIncluded').components:
618
        coords = [a.fractional_coordinates for a in comp.atoms]
619
        x, y, z = zip(*coords)
620
        m = len(coords)
621
        frac_centres.append((sum(x) / m, sum(y) / m, sum(z) / m))
622
    frac_centres = np.mod(np.array(frac_centres), 1)
623
    keep_inds = _unique_sites(frac_centres)
624
    return frac_centres[keep_inds]
625
626
627
def _expand_asym_unit(
628
        asym_unit: np.ndarray,
629
        sitesym: Sequence[str]
630
) -> Tuple[np.ndarray, ...]:
631
    """
632
    Asymmetric unit's fractional coords + site symmetries (as strings)
633
    -->
634
    fractional motif, asymmetric unit indices, multiplicities and inverses.
635
    """
636
637
    rotations, translations = parse_sitesym(sitesym)
638
    all_sites = []
639
    asym_inds = [0]
640
    multiplicities = []
641
    inverses = []
642
643
    for inv, site in enumerate(asym_unit):
644
        multiplicity = 0
645
646
        for rot, trans in zip(rotations, translations):
647
            site_ = np.mod(np.dot(rot, site) + trans, 1)
648
649
            if not all_sites:
650
                all_sites.append(site_)
651
                inverses.append(inv)
652
                multiplicity += 1
653
                continue
654
655
            # check if site_ overlaps with existing sites
656
            diffs1 = np.abs(site_ - all_sites)
657
            diffs2 = np.abs(diffs1 - 1)
658
            mask = np.all((diffs1 <= _EQUIV_SITE_TOL) | (diffs2 <= _EQUIV_SITE_TOL), axis=-1)
659
660
            if np.any(mask):
661
                where_equal = np.argwhere(mask).flatten()
662
                for ind in where_equal:
663
                    if inverses[ind] == inv:
664
                        pass
665
                    else:
666
                        warnings.warn(f'has equivalent sites at positions {inverses[ind]}, {inv}')
667
            else:
668
                all_sites.append(site_)
669
                inverses.append(inv)
670
                multiplicity += 1
671
672
        if multiplicity > 0:
673
            multiplicities.append(multiplicity)
674
            asym_inds.append(len(all_sites))
675
676
    frac_motif = np.array(all_sites)
677
    asym_inds = np.array(asym_inds[:-1])
678
    multiplicities = np.array(multiplicities)
679
    return frac_motif, asym_inds, multiplicities, inverses
680
681
682
def _atom_has_disorder(label, occupancy):
683
    """Return True if label ends with ? or occupancy < 1."""
684
    return label.endswith('?') or (np.isscalar(occupancy) and occupancy < 1)
685
686
687
@numba.njit()
688
def _unique_sites(asym_unit):
689
    site_diffs1 = np.abs(np.expand_dims(asym_unit, 1) - asym_unit)
690
    site_diffs2 = np.abs(site_diffs1 - 1)
691
    sites_neq_mask = np.logical_and((site_diffs1 > _EQUIV_SITE_TOL), 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
692
                                    (site_diffs2 > _EQUIV_SITE_TOL))
693
    overlapping = np.triu(sites_neq_mask.sum(axis=-1) == 0, 1)
694
    return overlapping.sum(axis=0) == 0
695
696
697
def _heaviest_component(molecule):
698
    """Heaviest component (removes all but the heaviest component of the asym unit).
699
    Intended for removing solvents. Probably doesn't play well with disorder"""
700
    component_weights = []
701
    for component in molecule.components:
702
        weight = 0
703
        for a in component.atoms:
704
            if isinstance(a.atomic_weight, (float, int)):
705
                if isinstance(a.occupancy, (float, int)):
706
                    weight += a.occupancy * a.atomic_weight
707
                else:
708
                    weight += a.atomic_weight
709
        component_weights.append(weight)
710
    largest_component_ind = np.argmax(np.array(component_weights))
711
    molecule = molecule.components[largest_component_ind]
712
    return molecule
713
714
715
def _refcodes_from_families(refcode_families):
716
    """List of strings --> all CSD refcodes starting with one of the strings.
717
    Intended to be passed a list of families and return all refcodes in them."""
718
    all_refcodes = []
719
    for refcode in refcode_families:
720
        query = ccdc.search.TextNumericSearch()
721
        query.add_identifier(refcode)
722
        hits = [hit.identifier for hit in query.search()]
723
        all_refcodes.extend(hits)
724
725
    # filter to unique refcodes
726
    seen = set()
727
    seen_add = seen.add
728
    refcodes = [
729
        refcode for refcode in all_refcodes
730
        if not (refcode in seen or seen_add(refcode))]
731
732
    return refcodes
733
734
735
def _map(func: Callable, iterable: Iterable, show_warnings: bool):
736
    """Iterates over iterable, passing items through func and yielding the result. 
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
737
    Catches _ParseError and warnings, optionally printing them.
738
    """
739
740
    if not show_warnings:
741
        warnings.simplefilter('ignore')
742
743
    for item in iterable:
744
745
        with warnings.catch_warnings(record=True) as warning_msgs:
746
747
            parse_failed = False
748
            try:
749
                periodic_set = func(item)
750
            except _ParseError as err:
751
                parse_failed = str(err)
752
753
        if parse_failed:
754
            warnings.warn(parse_failed)
755
            continue
756
757
        for warning in warning_msgs:
758
            msg = f'{periodic_set.name} {warning.message}'
759
            warnings.warn(msg, category=warning.category)
760
761
        yield periodic_set
762