Test Failed
Push — master ( 9779af...19222a )
by Daniel
02:55
created

amd.io   F

Complexity

Total Complexity 176

Size/Duplication

Total Lines 1451
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 176
eloc 697
dl 0
loc 1451
rs 1.903
c 0
b 0
f 0

16 Functions

Rating   Name   Duplication   Size   Complexity  
A _custom_warning() 0 2 1
B periodicset_from_pymatgen_structure() 0 58 7
A _atom_has_disorder() 0 3 1
F periodicset_from_ase_cifblock() 0 148 30
A _refcodes_from_families() 0 26 3
A _heaviest_component() 0 16 5
B _cif_val_to_float() 0 15 6
A periodicset_from_ccdc_entry() 0 64 4
A _gemmi_loop_to_dict() 0 7 2
C _expand_asym_unit() 0 60 9
F periodicset_from_gemmi_block() 0 132 18
D periodicset_from_pymatgen_cifblock() 0 112 13
F _parse_sitesym_pymatgen() 0 77 19
F periodicset_from_ccdc_crystal() 0 126 15
A _frac_molecular_centres_ccdc() 0 12 2
A _unique_sites() 0 11 1

9 Methods

Rating   Name   Duplication   Size   Complexity  
B _Reader.__next__() 0 28 7
A _Reader.__iter__() 0 4 3
A _Reader.__init__() 0 21 2
A _Reader.read() 0 9 2
A CifReader._generate_from_dir() 0 6 3
B CSDReader.__init__() 0 50 7
A CifReader._pymatgen_cifblock_generator() 0 5 1
A CSDReader._ccdc_generator() 0 14 5
C CifReader.__init__() 0 96 10

How to fix   Complexity   

Complexity

Complex classes like amd.io often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""Tools for reading crystals from files, or from the CSD with
0 ignored issues
show
coding-style introduced by
Too many lines in module (1450/1000)
Loading history...
2
``csd-python-api``. The readers return
3
:class:`amd.PeriodicSet <.periodicset.PeriodicSet>` objects representing the
4
crystal which can be passed to :func:`amd.AMD() <.calculate.AMD>` and
5
:func:`amd.PDD() <.calculate.PDD>` to get their invariants.
6
"""
7
8
import os
9
import re
10
import functools
11
import warnings
12
from typing import Tuple
13
14
import numpy as np
15
import numba
16
17
import ase.io.cif
18
import ase.data
19
import ase.spacegroup
20
from ase.spacegroup.spacegroup import parse_sitesym as ase_parse_sitesym
21
22
from .utils import cellpar_to_cell
23
from .periodicset import PeriodicSet
24
25
26
def _custom_warning(message, category, filename, lineno, *args, **kwargs):
0 ignored issues
show
Unused Code introduced by
The argument filename seems to be unused.
Loading history...
Unused Code introduced by
The argument lineno seems to be unused.
Loading history...
Unused Code introduced by
The argument args seems to be unused.
Loading history...
Unused Code introduced by
The argument kwargs seems to be unused.
Loading history...
27
    return f'{category.__name__}: {message}\n'
28
29
warnings.formatwarning = _custom_warning
30
31
_EQUIV_SITE_TOL = 1e-3
32
_DISORDER_OPTIONS = {'skip', 'ordered_sites', 'all_sites'}
33
34
CIF_TAGS = {
35
    'cellpar': [
36
        '_cell_length_a',
37
        '_cell_length_b',
38
        '_cell_length_c',
39
        '_cell_angle_alpha',
40
        '_cell_angle_beta',
41
        '_cell_angle_gamma',],
42
43
    'atom_site_fract': [
44
        '_atom_site_fract_x',
45
        '_atom_site_fract_y',
46
        '_atom_site_fract_z',],
47
48
    'atom_site_cartn': [
49
        '_atom_site_cartn_x',
50
        '_atom_site_cartn_y',
51
        '_atom_site_cartn_z',],
52
53
    'atom_symbol': [
54
        '_atom_site_type_symbol',
55
        '_atom_site_label',],
56
57
    'symop': [
58
        '_symmetry_equiv_pos_as_xyz',
59
        '_space_group_symop_operation_xyz',
60
        '_space_group_symop.operation_xyz',
61
        '_symmetry_equiv_pos_as_xyz_',
62
        '_space_group_symop_operation_xyz_',],
63
64
    'spacegroup_name': [
65
        '_space_group_name_Hall',
66
        '_space_group_name_Hall_',
67
        '_symmetry_space_group_name_hall',
68
        '_symmetry_space_group_name_hall_',
69
        '_space_group_name_H-M_alt',
70
        '_space_group_name_H-M_alt_',
71
        '_symmetry_space_group_name_H-M',
72
        '_symmetry_space_group_name_H-M_',
73
        '_symmetry_space_group_name_H_M',
74
        '_symmetry_space_group_name_H_M_',
75
        '_symmetry_space_group_name_h-m',
76
        '_symmetry_space_group_name_h-m_',],
77
78
    'spacegroup_number': [
79
        '_space_group_IT_number',
80
        '_symmetry_Int_Tables_number',
81
        '_space_group_IT_number_',
82
        '_symmetry_Int_Tables_number_',],
83
}
84
85
86
class _Reader:
87
88
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
89
            self,
90
            remove_hydrogens=False,
91
            disorder='skip',
92
            heaviest_component=False,
93
            molecular_centres=False,
94
            show_warnings=True
95
    ):
96
97
        if disorder not in _DISORDER_OPTIONS:
98
            msg = 'disorder parameter must be one of ' \
99
                  f'{_DISORDER_OPTIONS} (passed {disorder})'
100
            raise ValueError(msg)
101
102
        self.remove_hydrogens = remove_hydrogens
103
        self.disorder = disorder
104
        self.heaviest_component = heaviest_component
105
        self.molecular_centres = molecular_centres
106
        self.show_warnings = show_warnings
107
        self._backend_generator = None
108
        self._converter = None
109
110
    def __iter__(self):
111
        if self._backend_generator is None or self._converter is None:
112
            raise RuntimeError(f'{self.__class__.__name__} not initialized.')
113
        return self
114
115
    def __next__(self):
116
        """Iterates over self._backend_generator, passing items through self._converter.
117
        Catches ParseError + warnings raised in self._converter, optionally printing them.
118
        """
119
120
        if not self.show_warnings:
121
            warnings.simplefilter('ignore')
122
123
        while True:
124
125
            item = next(self._backend_generator) # will raise StopIteration when done
126
127
            with warnings.catch_warnings(record=True) as warning_msgs:
128
                msg = None
129
                try:
130
                    periodic_set = self._converter(item)
0 ignored issues
show
Bug introduced by
self._converter does not seem to be callable.
Loading history...
131
                except ParseError as err:
132
                    msg = str(err)
133
134
            if msg:
135
                warnings.warn(msg)
136
                continue
137
138
            for warning in warning_msgs:
139
                msg = f'{periodic_set.name} {warning.message}'
140
                warnings.warn(msg, category=warning.category)
141
142
            return periodic_set
143
144
    def read(self):
145
        """Reads the crystal(s), returns one
146
        :class:`amd.PeriodicSet <.periodicset.PeriodicSet>` if there is only
147
        one, otherwise returns a list. (Note the return type is not
148
        consistent!)"""
149
        l = list(self)
150
        if len(l) == 1:
151
            return l[0]
152
        return l
153
154
155
class CifReader(_Reader):
156
    """Read all structures in a .cif file or all files in a folder
157
    with ase or csd-python-api (if installed), yielding
158
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>` s.
159
160
    Parameters
161
    ----------
162
    path : str
163
        Path to a .cif file or directory. (Other files are accepted when using
164
        ``reader='ccdc'``, if csd-python-api is installed.)
165
    reader : str, optional
166
        The backend package used for parsing. Default is :code:`ase`,
167
        to use csd-python-api change to :code:`ccdc`. The ccdc reader should
168
        be able to read any format accepted by :class:`ccdc.io.EntryReader`,
169
        though only cifs have been tested.
170
    remove_hydrogens : bool, optional
171
        Remove Hydrogens from the crystal.
172
    disorder : str, optional
173
        Controls how disordered structures are handled. Default is ``skip``
174
        which skips any crystal with disorder, since disorder conflicts with
175
        the periodic set model. To read disordered structures anyway, choose
176
        either :code:`ordered_sites` to remove atoms with disorder or
177
        :code:`all_sites` include all atoms regardless of disorder.
178
    heaviest_component : bool, optional
179
        csd-python-api only. Removes all but the heaviest molecule in the
180
        asymmeric unit, intended for removing solvents.
181
    molecular_centres : bool, default False
182
        csd-python-api only. Extract the centres of molecules in the unit cell
183
        and store in the attribute molecular_centres.
184
    show_warnings : bool, optional
185
        Controls whether warnings that arise during reading are printed.
186
187
    Yields
188
    ------
189
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
190
        Represents the crystal as a periodic set, consisting of a finite set
191
        of points (motif) and lattice (unit cell). Contains other data, e.g.
192
        the crystal's name and information about the asymmetric unit.
193
194
    Examples
195
    --------
196
197
        ::
198
199
            # Put all crystals in a .CIF in a list
200
            structures = list(amd.CifReader('mycif.cif'))
201
202
            # Can also accept path to a directory, reading all files inside
203
            structures = list(amd.CifReader('path/to/folder'))
204
205
            # Reads just one if the .CIF has just one crystal
206
            periodic_set = amd.CifReader('mycif.cif').read()
207
208
            # List of AMDs (k=100) of crystals in a .CIF
209
            amds = [amd.AMD(periodic_set, 100) for periodic_set in amd.CifReader('mycif.cif')]
210
    """
211
212
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (8/5)
Loading history...
213
            self,
214
            path,
215
            reader='ase',
216
            remove_hydrogens=False,
217
            disorder='skip',
218
            heaviest_component=False,
219
            molecular_centres=False,
220
            show_warnings=True,
221
    ):
222
223
        super().__init__(
224
            remove_hydrogens=remove_hydrogens,
225
            disorder=disorder,
226
            heaviest_component=heaviest_component,
227
            molecular_centres=molecular_centres,
228
            show_warnings=show_warnings
229
        )
230
231
        # file_parser: Callable(path) -> Iterable[External object]
232
        # self._converter: Callable(External object) -> PeriodicSet
233
        # set options in self._converter with functools.partial
234
235
        if reader != 'ccdc':
236
237
            if heaviest_component:
238
                msg = 'Parameter heaviest_component ' \
239
                      'only implemented for reader="ccdc".'
240
                raise NotImplementedError(msg)
241
242
            if molecular_centres:
243
                msg = 'Parameter molecular_centres ' \
244
                      'only implemented for reader="ccdc".'
245
                raise NotImplementedError(msg)
246
247
        if reader in ('ase', 'pycodcif'):
248
249
            extensions = {'cif'}
250
            file_parser = functools.partial(ase.io.cif.parse_cif, reader=reader)
251
            self._converter = functools.partial(
252
                periodicset_from_ase_cifblock,
253
                remove_hydrogens=self.remove_hydrogens,
254
                disorder=self.disorder
255
            )
256
257
        elif reader == 'pymatgen':
258
259
            extensions = {'cif'}
260
            file_parser = CifReader._pymatgen_cifblock_generator
261
            self._converter = functools.partial(
262
                periodicset_from_pymatgen_cifblock,
263
                remove_hydrogens=self.remove_hydrogens,
264
                disorder=self.disorder
265
            )
266
267
        # elif reader == 'gemmi':
268
269
        #     import gemmi
270
        #     # from gemmi.cif import read_file
271
272
        #     extensions = {'cif'}
273
        #     file_parser = gemmi.cif.read_file
274
        #     self._converter = functools.partial(
275
        #         periodicset_from_gemmi_block,
276
        #         remove_hydrogens=self.remove_hydrogens,
277
        #         disorder=self.disorder
278
        #     )
279
280
        elif reader == 'ccdc':
281
282
            try:
283
                import ccdc.io
0 ignored issues
show
introduced by
Import outside toplevel (ccdc.io)
Loading history...
284
            except (ImportError, RuntimeError) as e:
285
                msg = 'Failed to import csd-python-api, please' \
286
                      'check it is installed and licensed.'
287
                raise ImportError(msg) from e
288
289
            extensions = ccdc.io.EntryReader.known_suffixes
290
            file_parser = ccdc.io.EntryReader
291
            self._converter = functools.partial(
292
                periodicset_from_ccdc_entry,
293
                remove_hydrogens=self.remove_hydrogens,
294
                disorder=self.disorder,
295
                molecular_centres=self.molecular_centres,
296
                heaviest_component=self.heaviest_component
297
            )
298
299
        else:
300
            raise ValueError(f'Unknown reader {reader}.')
301
302
        if os.path.isfile(path):
303
            self._backend_generator = iter(file_parser(path))
304
        elif os.path.isdir(path):
305
            self._backend_generator = iter(CifReader._generate_from_dir(path, file_parser, extensions))
306
        else:
307
            raise FileNotFoundError(f'No such file or directory: {path}')
308
309
    @staticmethod
310
    def _generate_from_dir(path, file_parser, extensions):
311
        for file in os.listdir(path):
312
            suff = os.path.splitext(file)[1][1:]
313
            if suff.lower() in extensions:
314
                yield from file_parser(os.path.join(path, file))
315
316
    @staticmethod
317
    def _pymatgen_cifblock_generator(path):
318
        """Path to .cif --> generator of pymatgen CifBlocks."""
319
        from pymatgen.io.cif import CifFile
0 ignored issues
show
introduced by
Unable to import 'pymatgen.io.cif'
Loading history...
introduced by
Import outside toplevel (pymatgen.io.cif.CifFile)
Loading history...
320
        yield from CifFile.from_file(path).data.values()
321
322
323
class CSDReader(_Reader):
324
    """Read structures from the CSD with csd-python-api, yielding
325
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>` s.
326
327
    Parameters
328
    ----------
329
    refcodes : str or List[str], optional
330
        Single or list of CSD refcodes to read. If None or 'CSD', iterates
331
        over the whole CSD.
332
    families : bool, optional
333
        Read all entries whose refcode starts with the given strings, or
334
        'families' (e.g. giving 'DEBXIT' reads all entries starting with
335
        DEBXIT).
336
    remove_hydrogens : bool, optional
337
        Remove hydrogens from the crystal.
338
    disorder : str, optional
339
        Controls how disordered structures are handled. Default is ``skip``
340
        which skips any crystal with disorder, since disorder conflicts with
341
        the periodic set model. To read disordered structures anyway, choose
342
        either :code:`ordered_sites` to remove atoms with disorder or
343
        :code:`all_sites` include all atoms regardless of disorder.
344
    heaviest_component : bool, optional
345
        Removes all but the heaviest molecule in the asymmeric unit, intended
346
        for removing solvents.
347
    molecular_centres : bool, default False
348
        Extract the centres of molecules in the unit cell and store in
349
        attribute molecular_centres.
350
    show_warnings : bool, optional
351
        Controls whether warnings that arise during reading are printed.
352
353
    Yields
354
    ------
355
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
356
        Represents the crystal as a periodic set, consisting of a finite set
357
        of points (motif) and lattice (unit cell). Contains other useful data,
358
        e.g. the crystal's name and information about the asymmetric unit for
359
        calculation.
360
361
    Examples
362
    --------
363
364
        ::
365
366
            # Put these entries in a list
367
            refcodes = ['DEBXIT01', 'DEBXIT05', 'HXACAN01']
368
            structures = list(amd.CSDReader(refcodes))
369
370
            # Read refcode families (any whose refcode starts with strings in the list)
371
            refcode_families = ['ACSALA', 'HXACAN']
372
            structures = list(amd.CSDReader(refcode_families, families=True))
373
374
            # Get AMDs (k=100) for crystals in these families
375
            refcodes = ['ACSALA', 'HXACAN']
376
            amds = []
377
            for periodic_set in amd.CSDReader(refcodes, families=True):
378
                amds.append(amd.AMD(periodic_set, 100))
379
380
            # Giving the reader nothing reads from the whole CSD.
381
            for periodic_set in amd.CSDReader():
382
                ...
383
    """
384
385
    def __init__(
0 ignored issues
show
best-practice introduced by
Too many arguments (8/5)
Loading history...
386
            self,
387
            refcodes=None,
388
            families=False,
389
            remove_hydrogens=False,
390
            disorder='skip',
391
            heaviest_component=False,
392
            molecular_centres=False,
393
            show_warnings=True,
394
    ):
395
396
        super().__init__(
397
            remove_hydrogens=remove_hydrogens,
398
            disorder=disorder,
399
            heaviest_component=heaviest_component,
400
            molecular_centres=molecular_centres,
401
            show_warnings=show_warnings,
402
        )
403
404
        try:
405
            import ccdc.io
0 ignored issues
show
introduced by
Import outside toplevel (ccdc.io)
Loading history...
406
        except (ImportError, RuntimeError) as _:
407
            msg = 'Failed to import csd-python-api, please' \
408
                  'check it is installed and licensed.'
409
            raise ImportError(msg)
410
411
        if isinstance(refcodes, str) and refcodes.lower() == 'csd':
412
            refcodes = None
413
414
        if refcodes is None:
415
            families = False
416
        else:
417
            if isinstance(refcodes, str):
418
                refcodes = [refcodes]
419
            else:
420
                refcodes = list(refcodes)
421
422
        if families:
423
            refcodes = _refcodes_from_families(refcodes)
424
425
        # self._entry_reader = ccdc.io.EntryReader('CSD')
426
        entry_reader = ccdc.io.EntryReader('CSD')
427
        self._converter = functools.partial(
428
            periodicset_from_ccdc_entry,
429
            remove_hydrogens=self.remove_hydrogens,
430
            disorder=self.disorder,
431
            molecular_centres=self.molecular_centres,
432
            heaviest_component=self.heaviest_component
433
        )
434
        self._backend_generator = iter(self._ccdc_generator(refcodes, entry_reader))
435
436
    @staticmethod
437
    def _ccdc_generator(refcodes, entry_reader):
438
        """Generates ccdc Entries from CSD refcodes."""
439
440
        if refcodes is None:
441
            for entry in entry_reader:
442
                yield entry
443
        else:
444
            for refcode in refcodes:
445
                try:
446
                    entry = entry_reader.entry(refcode)
447
                    yield entry
448
                except RuntimeError:
449
                    warnings.warn(f'{refcode} not found in database')
450
451
452
class ParseError(ValueError):
453
    """Raised when an item cannot be parsed into a periodic set."""
454
    pass
0 ignored issues
show
Unused Code introduced by
Unnecessary pass statement
Loading history...
455
456
457
def periodicset_from_ase_cifblock(
458
        block,
459
        remove_hydrogens=False,
460
        disorder='skip'
461
) -> PeriodicSet:
462
    """:class:`ase.io.cif.CIFBlock` --> :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
463
    CIFBlock is the type returned by :class:`ase.io.cif.parse_cif`.
464
465
    Parameters
466
    ----------
467
    block : :class:`ase.io.cif.CIFBlock`
468
        An ase CIFBlock object representing a crystal.
469
    remove_hydrogens : bool, optional
470
        Remove Hydrogens from the crystal.
471
    disorder : str, optional
472
        Controls how disordered structures are handled. Default is ``skip``
473
        which raises a ParseError if the input has any disorder, since
474
        disorder conflicts with the periodic set model. To read disordered
475
        structures anyway, choose either :code:`ordered_sites` to remove sites
476
        with disorder or :code:`all_sites` include all sites regardless.
477
478
    Returns
479
    -------
480
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
481
        Represents the crystal as a periodic set, consisting of a finite set
482
        of points (motif) and lattice (unit cell). Contains other useful data,
483
        e.g. the crystal's name and information about the asymmetric unit for
484
        calculation.
485
486
    Raises
487
    ------
488
    ParseError :
489
        Raised if the structure fails to be parsed for any of the following:
490
        1. Required data is missing (e.g. cell parameters),
491
        2. disorder == 'skip' and any of:
492
            (a) any atom has fractional occupancy,
493
            (b) any atom's label ends with '?',
494
        3. The motif is empty after removing H or disordered sites.
495
    """
496
497
    # get cell
498
    try:
499
        cellpar = [block[tag] for tag in CIF_TAGS['cellpar']]
500
    except KeyError:
501
        raise ParseError(f'{block.name} has missing cell data')
502
    cell = cellpar_to_cell(*cellpar)
503
504
    # asymmetric unit fractional coords
505
    asym_unit = [block.get(name) for name in CIF_TAGS['atom_site_fract']]
506
    if None in asym_unit: # missing fractional coords, try cartesian
507
        asym_motif = [block.get(name) for name in CIF_TAGS['atom_site_cartn']]
508
        if None in asym_motif:
509
            raise ParseError(f'{block.name} has missing coordinate data')
510
        asym_unit = np.array(asym_motif) @ np.linalg.inv(cell)
511
512
    elif any(None in coords for coords in asym_unit):
513
        raise ParseError(f'{block.name} has missing coordinate data')
514
515
    # if there's a ?, . or other string, remove site
516
    if any(isinstance(c, str) for coords in asym_unit for c in coords):
517
        warnings.warn('atoms without sites or unparsable data will be removed')
518
        asym_unit_ = [[], [], []]
519
        for xyz in zip(*asym_unit):
520
            if not any(isinstance(coord, str) for coord in xyz):
521
                for i in range(3):
522
                    asym_unit_[i].append(xyz[i])
523
        asym_unit = asym_unit_
524
525
    asym_unit = np.mod(np.array(asym_unit).T, 1)
526
    # asym_unit = _snap_small_prec_coords(asym_unit)
527
528
    # atomic types
529
    for tag in CIF_TAGS['atom_symbol']:
530
        if tag in block:
531
            asym_types = []
532
            for label in block[tag]:
533
                if label in ('.', '?'):
534
                    warnings.warn('missing atomic type data')
535
                    asym_types.append(0)
536
                # remove additional labels
537
                sym = re.search(r'([A-Z][a-z]?)', label).group(0)
538
                if sym == 'D':
539
                    sym = 'H'
540
                asym_types.append(ase.data.atomic_numbers[sym])
541
            break
542
    else:
543
        warnings.warn('missing atomic types')
544
        asym_types = [0 for _ in range(len(asym_unit))]
545
546
    # symmetry operations
547
    sitesym = []
548
    for tag in CIF_TAGS['symop']: # try symop tags first
549
        if tag in block:
550
            sitesym = block[tag]
551
            break
552
    if isinstance(sitesym, str):
553
        sitesym = [sitesym]
554
    if not sitesym: # if no symops, get ops from spacegroup
555
        try:
556
            spg = block.get_spacegroup(True)
557
            rot, trans = spg.rotations, spg.translations
558
        except:
0 ignored issues
show
Coding Style Best Practice introduced by
General except handlers without types should be used sparingly.

Typically, you would use general except handlers when you intend to specifically handle all types of errors, f.e. when logging. Otherwise, such general error handlers can mask errors in your application that you want to know of.

Loading history...
559
            rot, trans = ase_parse_sitesym(['x,y,z'])
560
    else:
561
        rot, trans = ase_parse_sitesym(sitesym)
562
563
    # (if needed) remove Hydrogens, fract occupancy atoms
564
    remove_sites = [] # indices of asymmetric unit sites to remove
565
    occupancies = block.get('_atom_site_occupancy')
566
    labels = block.get('_atom_site_label')
567
    if occupancies is not None:
568
        if disorder == 'skip':
569
            if any(_atom_has_disorder(lab, occ) for lab, occ in zip(labels, occupancies)):
570
                raise ParseError(f'{block.name} has disorder')
571
        elif disorder == 'ordered_sites':
572
            for i, (lab, occ) in enumerate(zip(labels, occupancies)):
573
                if _atom_has_disorder(lab, occ):
574
                    remove_sites.append(i)
575
576
    if remove_hydrogens:
577
        remove_sites.extend((i for i, num in enumerate(asym_types) if num == 1))
0 ignored issues
show
introduced by
The variable i does not seem to be defined for all execution paths.
Loading history...
578
579
    asym_unit = np.delete(asym_unit, remove_sites, axis=0)
580
    asym_types = [s for i, s in enumerate(asym_types) if i not in remove_sites]
581
582
    # if disorder == 'all_sites', don't remove overlapping sites
583
    if disorder != 'all_sites':
584
        keep_sites = _unique_sites(asym_unit)
585
        if not np.all(keep_sites):
586
            warnings.warn('may have overlapping sites; duplicates will be removed')
587
        asym_unit = asym_unit[keep_sites]
588
        asym_types = [sym for sym, keep in zip(asym_types, keep_sites) if keep]
589
590
    if asym_unit.shape[0] == 0:
591
        raise ParseError(f'{block.name} has no valid sites')
592
593
    frac_motif, asym_inds, wyc_muls, inverses = _expand_asym_unit(asym_unit, rot, trans)
594
    full_types = np.array([asym_types[i] for i in inverses])
595
    motif = frac_motif @ cell
596
597
    kwargs = {
598
        'name': block.name,
599
        'asymmetric_unit': asym_inds,
600
        'wyckoff_multiplicities': wyc_muls,
601
        'types': full_types
602
    }
603
604
    return PeriodicSet(motif, cell, **kwargs)
605
606
607
# # TODO: function needs to be finished.
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
608
# def periodicset_from_ase_atoms(
609
#         atoms,
610
#         remove_hydrogens=False,
611
#         disorder='skip'
612
# ) -> PeriodicSet:
613
#     """:class:`ase.atoms.Atoms` --> :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`.
614
615
#     Parameters
616
#     ----------
617
#     atoms : :class:`ase.atoms.Atoms`
618
#         An ase Atoms object representing a crystal.
619
#     remove_hydrogens : bool, optional
620
#         Remove Hydrogens from the crystal.
621
#     disorder : str, optional
622
#         Controls how disordered structures are handled. Default is ``skip``
623
#         which raises a ParseError if the input has any disorder, since
624
#         disorder conflicts with the periodic set model. To read disordered
625
#         structures anyway, choose either :code:`ordered_sites` to remove sites
626
#         with disorder or :code:`all_sites` include all sites regardless.
627
628
#     Returns
629
#     -------
630
#     :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
631
#         Represents the crystal as a periodic set, consisting of a finite set
632
#         of points (motif) and lattice (unit cell). Contains other useful data,
633
#         e.g. the crystal's name and information about the asymmetric unit for
634
#         calculation.
635
636
#     Raises
637
#     ------
638
#     ParseError      ## rewrite!!
639
#         Raised if the structure can/should not be parsed for the following reasons:
640
#         1. no sites found or motif is empty after removing H or disordered sites,
641
#         2. a site has missing coordinates,
642
#         3. disorder == 'skip' and any of:
643
#             (a) any atom has fractional occupancy,
644
#             (b) any atom's label ends with '?'.
645
#     """
646
647
#     if remove_hydrogens:
648
#         for i in np.where(atoms.get_atomic_numbers() == 1)[0][::-1]:
649
#             atoms.pop(i)
650
651
#     if disorder == 'skip':
652
#         occ_info = atoms.info['occupancy']
653
#         if occ_info is not None:
654
#             for d in occ_info.values():
655
#                 for occ in d.values():
656
#                     if float(occ) < 1:
657
#                         raise ParseError('Ase atoms object has disorder')
658
659
#     elif disorder == 'ordered_sites':
660
#         occ_info = atoms.info['occupancy']
661
#         if occ_info is not None:
662
#             remove_inds = []
663
#             for i, d in occ_info.items():
664
#                 for occ in d.values():
665
#                     if float(occ) < 1:
666
#                         remove_inds.append(int(i))
667
668
#     # .pop removes just one atom.
669
670
#     # if disorder == 'skip':
671
#     #     if not structure.is_ordered:
672
#     #         raise ParseError(f'{name} has disorder')
673
#     # elif disorder == 'ordered_sites':
674
#     #     remove_inds = []
675
#     #     for i, comp in enumerate(structure.species_and_occu):
676
#     #         if comp.num_atoms < 1:
677
#     #             remove_inds.append(i)
678
#     #     structure.remove_sites(remove_inds)
679
680
#     cell = atoms.get_cell().array
681
#     sg = atoms.info['spacegroup']
682
#     asym_unit = ase.spacegroup.get_basis(atoms)
683
#     frac_motif, asym_inds, wyc_muls, _ = _expand_asym_unit(asym_unit, sg.rotations, sg.translations)
684
#     motif = frac_motif @ cell
685
    
0 ignored issues
show
Coding Style introduced by
Trailing whitespace
Loading history...
686
#     kwargs = {
687
#         'name': None,
688
#         'asymmetric_unit': asym_inds,
689
#         'wyckoff_multiplicities': wyc_muls,
690
#         'types': atoms.get_atomic_numbers()
691
#     }
692
693
#     return PeriodicSet(motif, cell, **kwargs)
694
695
696
def periodicset_from_pymatgen_cifblock(
697
        block,
698
        remove_hydrogens=False,
699
        disorder='skip',
700
) -> PeriodicSet:
701
    """:class:`pymatgen.io.cif.CifBlock` --> :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`.
702
703
    Parameters
704
    ----------
705
    block : :class:`pymatgen.core.cif.CifBlock`
706
        A pymatgen CifBlock object representing a crystal.
707
    remove_hydrogens : bool, optional
708
        Remove Hydrogens from the crystal.
709
    disorder : str, optional
710
        Controls how disordered structures are handled. Default is ``skip``
711
        which raises a ParseError if the input has any disorder, since
712
        disorder conflicts with the periodic set model. To read disordered
713
        structures anyway, choose either :code:`ordered_sites` to remove sites
714
        with disorder or :code:`all_sites` include all sites regardless.
715
716
    Returns
717
    -------
718
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
719
        Represents the crystal as a periodic set, consisting of a finite set
720
        of points (motif) and lattice (unit cell). Contains other useful data,
721
        e.g. the crystal's name and information about the asymmetric unit for
722
        calculation.
723
724
    Raises
725
    ------
726
    ParseError
727
        Raised if the structure can/should not be parsed for the following reasons:
728
        1. no sites found or motif is empty after removing H or disordered sites,
729
        2. a site has missing coordinates,
730
        3. disorder == 'skip' and any of:
731
            (a) any atom has fractional occupancy,
732
            (b) any atom's label ends with '?'.
733
    """
734
735
    from pymatgen.core.periodic_table import _pt_data
0 ignored issues
show
introduced by
Unable to import 'pymatgen.core.periodic_table'
Loading history...
introduced by
Import outside toplevel (pymatgen.core.periodic_table._pt_data)
Loading history...
736
737
    odict = block.data
738
739
    try:
740
        cellpar = [_cif_val_to_float(odict[tag]) for tag in CIF_TAGS['cellpar']]
741
        cell = cellpar_to_cell(*cellpar)
742
    except KeyError:
743
        raise ParseError(f'{block.header} has missing cell information')
744
745
    # check for missing data ?
746
    asym_unit = [[_cif_val_to_float(s) for s in odict.get(name)]
747
                 for name in CIF_TAGS['atom_site_fract']]
748
    if None in asym_unit:
749
        asym_motif = [[_cif_val_to_float(s) for s in odict.get(name)]
750
                      for name in CIF_TAGS['atom_site_cartn']]
751
        if None in asym_motif:
752
            raise ParseError(f'{block.header} has no sites')
753
        asym_unit = np.array(asym_motif) @ np.linalg.inv(cell)
754
755
    asym_unit = np.mod(np.array(asym_unit).T, 1)
756
    # asym_unit = _snap_small_prec_coords(asym_unit)
757
758
    # check _atom_site_label as well?
759
    symbols = odict.get('_atom_site_type_symbol')
760
    if symbols is None:
761
        warnings.warn('missing atomic types')
762
        asym_types = [0 for _ in range(len(asym_unit))]
763
    else:
764
        asym_types = [_pt_data[s]['Atomic no'] for s in symbols]
765
766
    remove_sites = []
767
768
    occupancies = odict.get('_atom_site_occupancy')
769
    labels = odict.get('_atom_site_label')
770
    if occupancies is not None:
771
        if disorder == 'skip':
772
            if any(_atom_has_disorder(lab, occ) for lab, occ in zip(labels, occupancies)):
773
                raise ParseError(f'{block.header} has disorder')
774
        elif disorder == 'ordered_sites':
775
            remove_sites.extend(
776
                (i for i, (lab, occ) in enumerate(zip(labels, occupancies))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable i does not seem to be defined.
Loading history...
777
                 if _atom_has_disorder(lab, occ)))
778
779
    if remove_hydrogens:
780
        remove_sites.extend((i for i, num in enumerate(asym_types) if num == 1))
781
782
    asym_unit = np.delete(asym_unit, remove_sites, axis=0)
783
    asym_types = [s for i, s in enumerate(asym_types) if i not in remove_sites]
784
785
    if disorder != 'all_sites':
786
        keep_sites = _unique_sites(asym_unit)
787
        if not np.all(keep_sites):
788
            warnings.warn('may have overlapping sites; duplicates will be removed')
789
        asym_unit = asym_unit[keep_sites]
790
        asym_types = [sym for sym, keep in zip(asym_types, keep_sites) if keep]
791
792
    if asym_unit.shape[0] == 0:
793
        raise ParseError(f'{block.header} has no valid sites')
794
795
    rot, trans = _parse_sitesym_pymatgen(odict)
796
    frac_motif, asym_inds, wyc_muls, inverses = _expand_asym_unit(asym_unit, rot, trans)
797
    full_types = np.array([asym_types[i] for i in inverses])
798
    motif = frac_motif @ cell
799
800
    kwargs = {
801
        'name': block.header,
802
        'asymmetric_unit': asym_inds,
803
        'wyckoff_multiplicities': wyc_muls,
804
        'types': full_types
805
    }
806
807
    return PeriodicSet(motif, cell, **kwargs)
808
809
810
# TODO: add Raises to docstr
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
811
def periodicset_from_pymatgen_structure(
812
        structure,
813
        remove_hydrogens=False,
814
        disorder='skip'
815
) -> PeriodicSet:
816
    """:class:`pymatgen.core.structure.Structure` --> :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`.
817
    Does not set the name of the periodic set, as there seems to be no such attribute in
818
    the pymatgen Structure object.
819
820
    Parameters
821
    ----------
822
    structure : :class:`pymatgen.core.structure.Structure`
823
        A pymatgen Structure object representing a crystal.
824
    remove_hydrogens : bool, optional
825
        Remove Hydrogens from the crystal.
826
    disorder : str, optional
827
        Controls how disordered structures are handled. Default is ``skip``
828
        which raises a ParseError if the input has any disorder, since
829
        disorder conflicts with the periodic set model. To read disordered
830
        structures anyway, choose either :code:`ordered_sites` to remove sites
831
        with disorder or :code:`all_sites` include all sites regardless.
832
833
    Returns
834
    -------
835
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
836
        Represents the crystal as a periodic set, consisting of a finite set
837
        of points (motif) and lattice (unit cell). Contains other useful data,
838
        e.g. the crystal's name and information about the asymmetric unit for
839
        calculation.
840
    """
841
842
    from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
0 ignored issues
show
introduced by
Unable to import 'pymatgen.symmetry.analyzer'
Loading history...
introduced by
Import outside toplevel (pymatgen.symmetry.analyzer.SpacegroupAnalyzer)
Loading history...
843
844
    if remove_hydrogens:
845
        structure.remove_species(['H', 'D'])
846
847
    if disorder == 'skip':
848
        if not structure.is_ordered:
849
            raise ParseError('Pymatgen structure object has disorder')
850
    elif disorder == 'ordered_sites':
851
        remove_inds = []
852
        for i, comp in enumerate(structure.species_and_occu):
853
            if comp.num_atoms < 1:
854
                remove_inds.append(i)
855
        structure.remove_sites(remove_inds)
856
857
    motif = structure.cart_coords
858
    cell = structure.lattice.matrix
859
    sym_structure = SpacegroupAnalyzer(structure).get_symmetrized_structure()
860
    equiv_inds = sym_structure.equivalent_indices
861
862
    kwargs = {
863
        'asymmetric_unit': np.array([l[0] for l in equiv_inds]),
864
        'wyckoff_multiplicities': np.array([len(l) for l in equiv_inds]),
865
        'types': np.array(sym_structure.atomic_numbers)
866
    }
867
868
    return PeriodicSet(motif, cell, **kwargs)
869
870
871
def periodicset_from_ccdc_entry(
872
        entry,
873
        remove_hydrogens=False,
874
        disorder='skip',
875
        heaviest_component=False,
876
        molecular_centres=False
877
) -> PeriodicSet:
878
    """:class:`ccdc.entry.Entry` --> :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`.
879
    Entry is the type returned by :class:`ccdc.io.EntryReader`.
880
881
    Parameters
882
    ----------
883
    entry : :class:`ccdc.entry.Entry`
884
        A ccdc Entry object representing a database entry.
885
    remove_hydrogens : bool, optional
886
        Remove Hydrogens from the crystal.
887
    disorder : str, optional
888
        Controls how disordered structures are handled. Default is ``skip``
889
        which raises a ParseError if the input has any disorder, since
890
        disorder conflicts with the periodic set model. To read disordered
891
        structures anyway, choose either :code:`ordered_sites` to remove sites
892
        with disorder or :code:`all_sites` include all sites regardless.
893
    heaviest_component : bool, optional
894
        Removes all but the heaviest molecule in the asymmeric unit, intended
895
        for removing solvents.
896
    molecular_centres : bool, default False
897
        Extract the centres of molecules in the unit cell and store in
898
        the attribute molecular_centres of the returned PeriodicSet.
899
900
    Returns
901
    -------
902
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
903
        Represents the crystal as a periodic set, consisting of a finite set
904
        of points (motif) and lattice (unit cell). Contains other useful data,
905
        e.g. the crystal's name and information about the asymmetric unit for
906
        calculation.
907
908
    Raises
909
    ------
910
    ParseError :
911
        Raised if the structure fails parsing for any of the following:
912
        1. entry.has_3d_structure is False,
913
        2. disorder == 'skip' and any of:
914
            (a) any disorder flag is True,
915
            (b) any atom has fractional occupancy,
916
            (c) any atom's label ends with '?',
917
        3. entry.crystal.molecule.all_atoms_have_sites is False,
918
        4. a.fractional_coordinates is None for any a in entry.crystal.disordered_molecule,
919
        5. The motif is empty after removing H, disordered sites or solvents.
920
    """
921
922
    # Entry specific flags
923
    if not entry.has_3d_structure:
924
        raise ParseError(f'{entry.identifier} has no 3D structure')
925
926
    if disorder == 'skip' and entry.has_disorder:
927
        raise ParseError(f'{entry.identifier} has disorder')
928
929
    return periodicset_from_ccdc_crystal(
930
        entry.crystal,
931
        remove_hydrogens=remove_hydrogens,
932
        disorder=disorder,
933
        heaviest_component=heaviest_component,
934
        molecular_centres=molecular_centres
935
    )
936
937
938
def periodicset_from_ccdc_crystal(
939
        crystal,
940
        remove_hydrogens=False,
941
        disorder='skip',
942
        heaviest_component=False,
943
        molecular_centres=False
944
) -> PeriodicSet:
945
    """:class:`ccdc.crystal.Crystal` --> :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`.
946
    Crystal is the type returned by :class:`ccdc.io.CrystalReader`.
947
948
    Parameters
949
    ----------
950
    crystal : :class:`ccdc.crystal.Crystal`
951
        A ccdc Crystal object representing a crystal structure.
952
    remove_hydrogens : bool, optional
953
        Remove Hydrogens from the crystal.
954
    disorder : str, optional
955
        Controls how disordered structures are handled. Default is ``skip``
956
        which raises a ParseError if the input has any disorder, since
957
        disorder conflicts with the periodic set model. To read disordered
958
        structures anyway, choose either :code:`ordered_sites` to remove sites
959
        with disorder or :code:`all_sites` include all sites regardless.
960
    heaviest_component : bool, optional
961
        Removes all but the heaviest molecule in the asymmeric unit,
962
        intended for removing solvents.
963
    molecular_centres : bool, default False
964
        Extract the centres of molecules in the unit cell and store in
965
        the attribute molecular_centres of the returned PeriodicSet.
966
967
    Returns
968
    -------
969
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
970
        Represents the crystal as a periodic set, consisting of a finite set
971
        of points (motif) and lattice (unit cell). Contains other useful data,
972
        e.g. the crystal's name and information about the asymmetric unit for
973
        calculation.
974
975
    Raises
976
    ------
977
    ParseError :
978
        Raised if the structure fails parsing for any of the following:
979
        1. disorder == 'skip' and any of:
980
            (a) any disorder flag is True,
981
            (b) any atom has fractional occupancy,
982
            (c) any atom's label ends with '?',
983
        2. crystal.molecule.all_atoms_have_sites is False,
984
        3. a.fractional_coordinates is None for any a in crystal.disordered_molecule,
985
        4. The motif is empty after removing H, disordered sites or solvents.
986
    """
987
988
    molecule = crystal.disordered_molecule
989
990
    # if skipping disorder, check disorder flags and then all atoms
991
    if disorder == 'skip':
992
        if crystal.has_disorder or \
993
         any(_atom_has_disorder(a.label, a.occupancy) for a in molecule.atoms):
994
            raise ParseError(f'{crystal.identifier} has disorder')
995
996
    elif disorder == 'ordered_sites':
997
        molecule.remove_atoms(a for a in molecule.atoms
998
                              if _atom_has_disorder(a.label, a.occupancy))
999
1000
    if remove_hydrogens:
1001
        molecule.remove_atoms(a for a in molecule.atoms if a.atomic_symbol in 'HD')
1002
1003
    if heaviest_component and len(molecule.components) > 1:
1004
        molecule = _heaviest_component(molecule)
1005
1006
    # remove atoms with missing coord data and warn
1007
    if any(a.fractional_coordinates is None for a in molecule.atoms):
1008
        warnings.warn('trying to remove atoms without sites')
1009
        molecule.remove_atoms(a for a in molecule.atoms if a.fractional_coordinates is None)
1010
1011
    if not molecule.all_atoms_have_sites:
1012
        raise ParseError(f'{crystal.identifier} has atoms without sites')
1013
1014
    crystal.molecule = molecule
1015
    cell = cellpar_to_cell(*crystal.cell_lengths, *crystal.cell_angles)
1016
1017
    if molecular_centres:
1018
        frac_centres = _frac_molecular_centres_ccdc(crystal)
1019
        mol_centres = frac_centres @ cell
1020
        return PeriodicSet(mol_centres, cell, name=crystal.identifier)
1021
1022
    asym_atoms = crystal.asymmetric_unit_molecule.atoms
1023
    # check for None?
1024
    asym_unit = np.array([tuple(a.fractional_coordinates) for a in asym_atoms])
1025
    asym_unit = np.mod(asym_unit, 1)
1026
    # asym_unit = _snap_small_prec_coords(asym_unit)
1027
    asym_types = [a.atomic_number for a in asym_atoms]
1028
1029
    if disorder != 'all_sites':
1030
        keep_sites = _unique_sites(asym_unit)
1031
        if not np.all(keep_sites):
1032
            warnings.warn('may have overlapping sites; duplicates will be removed')
1033
        asym_unit = asym_unit[keep_sites]
1034
        asym_types = [sym for sym, keep in zip(asym_types, keep_sites) if keep]
1035
1036
    if asym_unit.shape[0] == 0:
1037
        raise ParseError(f'{crystal.identifier} has no valid sites')
1038
1039
    sitesym = crystal.symmetry_operators
1040
    # try spacegroup numbers?
1041
    if not sitesym:
1042
        sitesym = ['x,y,z']
1043
1044
    rot = np.array([np.array(crystal.symmetry_rotation(op)).reshape((3, 3))
1045
                    for op in sitesym])
1046
    trans = np.array([crystal.symmetry_translation(op) for op in sitesym])
1047
    frac_motif, asym_inds, wyc_muls, inverses = _expand_asym_unit(asym_unit, rot, trans)
1048
    motif = frac_motif @ cell
1049
1050
    kwargs = {
1051
        'name': crystal.identifier,
1052
        'asymmetric_unit': asym_inds,
1053
        'wyckoff_multiplicities': wyc_muls,
1054
        'types': np.array([asym_types[i] for i in inverses])
1055
    }
1056
1057
    periodic_set = PeriodicSet(motif, cell, **kwargs)
1058
1059
    # if molecular_centres:
1060
    #     frac_centres = _frac_molecular_centres_ccdc(crystal)
1061
    #     periodic_set.molecular_centres = frac_centres @ cell
1062
1063
    return periodic_set
1064
1065
1066
def periodicset_from_gemmi_block(
1067
        block,
1068
        remove_hydrogens=False,
1069
        disorder='skip'
1070
) -> PeriodicSet:
1071
    """:class:`gemmi.cif.Block` --> :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`.
1072
    Block is the type returned by :class:`gemmi.cif.read_file`.
1073
1074
    Parameters
1075
    ----------
1076
    block : :class:`ase.io.cif.CIFBlock`
1077
        An ase CIFBlock object representing a crystal.
1078
    remove_hydrogens : bool, optional
1079
        Remove Hydrogens from the crystal.
1080
    disorder : str, optional
1081
        Controls how disordered structures are handled. Default is ``skip``
1082
        which raises a ParseError if the input has any disorder, since
1083
        disorder conflicts with the periodic set model. To read disordered
1084
        structures anyway, choose either :code:`ordered_sites` to remove sites
1085
        with disorder or :code:`all_sites` include all sites regardless.
1086
1087
    Returns
1088
    -------
1089
    :class:`amd.PeriodicSet <.periodicset.PeriodicSet>`
1090
        Represents the crystal as a periodic set, consisting of a finite set
1091
        of points (motif) and lattice (unit cell). Contains other useful data,
1092
        e.g. the crystal's name and information about the asymmetric unit for
1093
        calculation.
1094
1095
    Raises
1096
    ------
1097
    ParseError :
1098
        Raised if the structure fails to be parsed for any of the following:
1099
        1. Required data is missing (e.g. cell parameters),
1100
        2. disorder == 'skip' and any of:
1101
            (a) any atom has fractional occupancy,
1102
            (b) any atom's label ends with '?',
1103
        3. The motif is empty after removing H or disordered sites.
1104
    """
1105
1106
    cellpar = [ase.io.cif.convert_value(block.find_value(tag))
1107
               for tag in CIF_TAGS['cellpar']]
1108
    if None in cellpar:
1109
        raise ParseError(f'{block.name} has missing cell information')
1110
1111
    cell = cellpar_to_cell(*cellpar)
1112
1113
    xyz_loop = block.find(CIF_TAGS['atom_site_fract']).loop
1114
    if xyz_loop is None:
1115
        # check for crtsn!
1116
        raise ParseError(f'{block.name} has missing coordinate data')
1117
1118
    # loop will contain all xyz cols, maybe labels, maybe types, maybe occs
1119
1120
    loop_dict = _gemmi_loop_to_dict(xyz_loop)
1121
    xyz_str = [loop_dict[t] for t in CIF_TAGS['atom_site_fract']]
1122
    asym_unit = [[ase.io.cif.convert_value(c) for c in coords] for coords in xyz_str]
1123
1124
    asym_unit = np.mod(np.array(asym_unit).T, 1)
1125
    # asym_unit = _snap_small_prec_coords(asym_unit)
1126
1127
    if '_atom_site_type_symbol' in loop_dict:
1128
        asym_syms = loop_dict['_atom_site_type_symbol']
1129
        asym_types = [ase.data.atomic_numbers[s] for s in asym_syms]
1130
    else:
1131
        warnings.warn('missing atomic types')
1132
        asym_types = [0 for _ in range(len(asym_unit))]
1133
1134
    remove_sites = []
1135
1136
    # if labels exist, check them for disorder
1137
    if '_atom_site_label' in loop_dict:
1138
        labels = loop_dict['_atom_site_label']
1139
    else:
1140
        labels = ['' for _ in range(xyz_loop.length())]
1141
1142
    # if occupancies exist, check them for disorder
1143
    if '_atom_site_occupancy' in loop_dict:
1144
        occupancies = [ase.io.cif.convert_value(occ)
1145
                       for occ in loop_dict['_atom_site_occupancy']]
1146
    else:
1147
        occupancies = [None for _ in range(xyz_loop.length())]
1148
1149
    if disorder == 'skip':
1150
        if any(_atom_has_disorder(lab, occ) for lab, occ in zip(labels, occupancies)):
1151
            raise ParseError(f'{block.name} has disorder')
1152
    elif disorder == 'ordered_sites':
1153
        for i, (lab, occ) in enumerate(zip(labels, occupancies)):
1154
            if _atom_has_disorder(lab, occ):
1155
                remove_sites.append(i)
1156
1157
    if remove_hydrogens:
1158
        remove_sites.extend((i for i, num in enumerate(asym_types) if num == 1))
0 ignored issues
show
introduced by
The variable i does not seem to be defined for all execution paths.
Loading history...
1159
1160
    asym_unit = np.delete(asym_unit, remove_sites, axis=0)
1161
    asym_types = [s for i, s in enumerate(asym_types) if i not in remove_sites]
1162
1163
    if disorder != 'all_sites':
1164
        keep_sites = _unique_sites(asym_unit)
1165
        if not np.all(keep_sites):
1166
            warnings.warn('may have overlapping sites; duplicates will be removed')
1167
        asym_unit = asym_unit[keep_sites]
1168
        asym_types = [sym for sym, keep in zip(asym_types, keep_sites) if keep]
1169
1170
    if asym_unit.shape[0] == 0:
1171
        raise ParseError(f'{block.name} has no valid sites')
1172
1173
    # get symops
1174
    sitesym = []
1175
    for tag in CIF_TAGS['symop']:
1176
        symop_loop = block.find([tag]).loop
1177
        if symop_loop is not None:
1178
            symop_loop_dict = _gemmi_loop_to_dict(symop_loop)
1179
            sitesym = symop_loop_dict[tag]
1180
            break
1181
    # try spacegroup names/nums?
1182
    if not sitesym:
1183
        sitesym = ['x,y,z',]
1184
1185
    rot, trans = ase_parse_sitesym(sitesym)
1186
    frac_motif, asym_inds, wyc_muls, inverses = _expand_asym_unit(asym_unit, rot, trans)
1187
    full_types = np.array([asym_types[i] for i in inverses])
1188
    motif = frac_motif @ cell
1189
1190
    kwargs = {
1191
        'name': block.name,
1192
        'asymmetric_unit': asym_inds,
1193
        'wyckoff_multiplicities': wyc_muls,
1194
        'types': full_types
1195
    }
1196
1197
    return PeriodicSet(motif, cell, **kwargs)
1198
1199
1200
def _expand_asym_unit(
1201
        asym_unit: np.ndarray,
1202
        rotations: np.ndarray,
1203
        translations: np.ndarray
1204
) -> Tuple[np.ndarray, ...]:
1205
    """
1206
    Asymmetric unit frac coords, list of rotations, list of translations
1207
    -->
1208
    full fractional motif, asymmetric unit indices, multiplicities, inverses.
1209
    """
1210
1211
    all_sites = []
1212
    asym_inds = [0]
1213
    multiplicities = []
1214
    inverses = []
1215
    m, dims = asym_unit.shape
1216
1217
    expanded_sites = np.zeros((m, len(rotations), dims))
1218
    for i in range(m):
1219
        expanded_sites[i] = np.dot(rotations, asym_unit[i]) + translations
1220
    expanded_sites = np.mod(expanded_sites, 1)
1221
1222
    for inv, sites in enumerate(expanded_sites):
1223
1224
        multiplicity = 0
1225
1226
        for site_ in sites:
1227
1228
            if not all_sites:
1229
                all_sites.append(site_)
1230
                inverses.append(inv)
1231
                multiplicity += 1
1232
                continue
1233
1234
            # check if site_ overlaps with existing sites
1235
            diffs1 = np.abs(site_ - all_sites)
1236
            diffs2 = np.abs(diffs1 - 1)
1237
            mask = np.all((diffs1 <= _EQUIV_SITE_TOL) | (diffs2 <= _EQUIV_SITE_TOL), axis=-1)
1238
1239
            if np.any(mask):
1240
                where_equal = np.argwhere(mask).flatten()
1241
                for ind in where_equal:
1242
                    if inverses[ind] == inv: # invariant
1243
                        pass
1244
                    else: # equivalent to a different site
1245
                        warnings.warn(f'has equivalent sites at positions {inverses[ind]}, {inv}')
1246
            else:
1247
                all_sites.append(site_)
1248
                inverses.append(inv)
1249
                multiplicity += 1
1250
1251
        if multiplicity > 0:
1252
            multiplicities.append(multiplicity)
1253
            asym_inds.append(len(all_sites))
1254
1255
    frac_motif = np.array(all_sites)
1256
    asym_inds = np.array(asym_inds[:-1])
1257
    multiplicities = np.array(multiplicities)
1258
1259
    return frac_motif, asym_inds, multiplicities, inverses
1260
1261
1262
@numba.njit()
1263
def _unique_sites(asym_unit):
1264
    """Uniquify (within _EQUIV_SITE_TOL) a list of fractional coordinates,
1265
    considering all points modulo 1. Returns an array of bools such that
1266
    asym_unit[_unique_sites(asym_unit)] is a uniquified list."""
1267
    site_diffs1 = np.abs(np.expand_dims(asym_unit, 1) - asym_unit)
1268
    site_diffs2 = np.abs(site_diffs1 - 1)
1269
    sites_neq_mask = np.logical_and((site_diffs1 > _EQUIV_SITE_TOL),
1270
                                    (site_diffs2 > _EQUIV_SITE_TOL))
1271
    overlapping = np.triu(sites_neq_mask.sum(axis=-1) == 0, 1)
1272
    return overlapping.sum(axis=0) == 0
1273
1274
1275
def _parse_sitesym_pymatgen(data):
1276
    """In order to generate symmetry equivalent positions, the symmetry
1277
    operations are parsed. If the symops are not present, the space
1278
    group symbol is parsed, and symops are generated."""
1279
1280
    from pymatgen.symmetry.groups import SpaceGroup
0 ignored issues
show
introduced by
Unable to import 'pymatgen.symmetry.groups'
Loading history...
introduced by
Import outside toplevel (pymatgen.symmetry.groups.SpaceGroup)
Loading history...
1281
    from pymatgen.core.operations import SymmOp
0 ignored issues
show
introduced by
Unable to import 'pymatgen.core.operations'
Loading history...
introduced by
Import outside toplevel (pymatgen.core.operations.SymmOp)
Loading history...
1282
    import pymatgen.io.cif
0 ignored issues
show
introduced by
Import outside toplevel (pymatgen.io.cif)
Loading history...
introduced by
Unable to import 'pymatgen.io.cif'
Loading history...
1283
1284
    symops = []
1285
1286
    # try to parse xyz symops
1287
    for symmetry_label in CIF_TAGS['symop']:
1288
1289
        xyz = data.get(symmetry_label)
1290
        if not xyz:
1291
            continue
1292
        if isinstance(xyz, str):
1293
            xyz = [xyz]
1294
        try:
1295
            symops = [SymmOp.from_xyz_string(s) for s in xyz]
1296
            break
1297
        except ValueError:
1298
            continue
1299
1300
    # try to parse symbol
1301
    if not symops:
1302
1303
        for symmetry_label in CIF_TAGS['spacegroup_name']:
1304
1305
            sg = data.get(symmetry_label)
1306
            if not sg:
1307
                continue
1308
            sg = re.sub(r'[\s_]', '', sg)
1309
1310
            try:
1311
                spg = pymatgen.io.cif.space_groups.get(sg)
1312
                if not spg:
1313
                    continue
1314
                symops = SpaceGroup(spg).symmetry_ops
1315
                break
1316
            except ValueError:
1317
                pass
1318
1319
            try:
1320
                for d in pymatgen.io.cif._get_cod_data():
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _get_cod_data was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
1321
                    if sg == re.sub(r'\s+', '', d['hermann_mauguin']):
1322
                        xyz = d['symops']
1323
                        symops = [SymmOp.from_xyz_string(s) for s in xyz]
1324
                        break
1325
            except Exception:
0 ignored issues
show
Best Practice introduced by
Catching very general exceptions such as Exception is usually not recommended.

Generally, you would want to handle very specific errors in the exception handler. This ensure that you do not hide other types of errors which should be fixed.

So, unless you specifically plan to handle any error, consider adding a more specific exception.

Loading history...
1326
                continue
1327
1328
            if symops:
1329
                break
1330
1331
    # try to parse international number
1332
    if not symops:
1333
        for symmetry_label in CIF_TAGS['spacegroup_number']:
1334
            num = data.get(symmetry_label)
1335
            if not num:
1336
                continue
1337
1338
            try:
1339
                i = int(_cif_val_to_float(num))
1340
                symops = SpaceGroup.from_int_number(i).symmetry_ops
1341
                break
1342
            except ValueError:
1343
                continue
1344
1345
    if not symops:
1346
        symops = [SymmOp.from_xyz_string(s) for s in ['x', 'y', 'z']]
1347
1348
    rotations = [op.rotation_matrix for op in symops]
1349
    translations = [op.translation_vector for op in symops]
1350
1351
    return rotations, translations
1352
1353
1354
def _frac_molecular_centres_ccdc(crystal):
1355
    """Returns any geometric centres of molecules in the unit cell.
1356
    Expects a ccdc Crystal object and returns fractional coordiantes."""
1357
1358
    frac_centres = []
1359
    for comp in crystal.packing(inclusion='CentroidIncluded').components:
1360
        coords = [a.fractional_coordinates for a in comp.atoms]
1361
        x, y, z = zip(*coords)
1362
        m = len(coords)
1363
        frac_centres.append((sum(x) / m, sum(y) / m, sum(z) / m))
1364
    frac_centres = np.mod(np.array(frac_centres), 1)
1365
    return frac_centres[_unique_sites(frac_centres)]
1366
1367
1368
def _atom_has_disorder(label, occupancy):
1369
    """Return True if label ends with ? or occupancy is a number < 1."""
1370
    return label.endswith('?') or (np.isscalar(occupancy) and occupancy < 1)
1371
1372
1373
# def _snap_small_prec_coords(frac_coords):
1374
#     """Find where frac_coords is within 1e-4 of 1/3 or 2/3, change to 1/3 and 2/3.
1375
#     Recommended by pymatgen's CIF parser. May use later."""
1376
#     frac_coords[np.abs(1 - 3 * frac_coords) < 1e-4] = 1 / 3.
1377
#     frac_coords[np.abs(1 - 3 * frac_coords / 2) < 1e-4] = 2 / 3.
1378
#     return frac_coords
1379
1380
1381
def _heaviest_component(molecule):
1382
    """Heaviest component (removes all but the heaviest component of the asym unit).
1383
    Intended for removing solvents. Probably doesn't play well with disorder"""
1384
    component_weights = []
1385
    for component in molecule.components:
1386
        weight = 0
1387
        for a in component.atoms:
1388
            if isinstance(a.atomic_weight, (float, int)):
1389
                if isinstance(a.occupancy, (float, int)):
1390
                    weight += a.occupancy * a.atomic_weight
1391
                else:
1392
                    weight += a.atomic_weight
1393
        component_weights.append(weight)
1394
    largest_component_ind = np.argmax(np.array(component_weights))
1395
    molecule = molecule.components[largest_component_ind]
1396
    return molecule
1397
1398
1399
def _refcodes_from_families(refcode_families):
1400
    """List of strings --> all CSD refcodes starting with one of the strings.
1401
    Intended to be passed a list of families and return all refcodes in them."""
1402
1403
    try:
1404
        import ccdc.search
0 ignored issues
show
introduced by
Import outside toplevel (ccdc.search)
Loading history...
1405
    except (ImportError, RuntimeError) as _:
1406
        msg = 'Failed to import csd-python-api, please'\
1407
              'check it is installed and licensed.'
1408
        raise ImportError(msg)
1409
1410
    all_refcodes = []
1411
    for refcode in refcode_families:
1412
        query = ccdc.search.TextNumericSearch()
1413
        query.add_identifier(refcode)
1414
        hits = [hit.identifier for hit in query.search()]
1415
        all_refcodes.extend(hits)
1416
1417
    # filter to unique refcodes
1418
    seen = set()
1419
    seen_add = seen.add
1420
    refcodes = [
1421
        refcode for refcode in all_refcodes
1422
        if not (refcode in seen or seen_add(refcode))]
1423
1424
    return refcodes
1425
1426
1427
def _gemmi_loop_to_dict(loop):
1428
    """gemmi Loop object --> dict, tags: values"""
1429
    tablified_loop = [[] for _ in range(len(loop.tags))]
1430
    n_cols = loop.width()
1431
    for i, item in enumerate(loop.values):
1432
        tablified_loop[i % n_cols].append(item)
1433
    return {tag: l for tag, l in zip(loop.tags, tablified_loop)}
0 ignored issues
show
Unused Code introduced by
Unnecessary use of a comprehension
Loading history...
1434
1435
1436
def _cif_val_to_float(text):
1437
    """Remove uncertainty brackets from strings and return float."""
1438
1439
    try:
1440
        # Note that the ending ) is sometimes missing. That is why the code has
1441
        # been modified to treat it as optional. Same logic applies to lists.
1442
        return float(re.sub(r'\(.+\)*', '', text))
1443
    except TypeError:
1444
        if isinstance(text, list) and len(text) == 1:
1445
            return float(re.sub(r'\(.+\)*', '', text[0]))
1446
    except ValueError as err:
1447
        if text.strip() == '.':
1448
            return 0
1449
        raise err
1450
    raise ValueError(f'{text} cannot be converted to float')
1451