|
1
|
|
|
"""Contains I/O tools, including a .CIF reader and CSD reader |
|
2
|
|
|
(``csd-python-api`` only) to extract periodic set representations |
|
3
|
|
|
of crystals which can be passed to :func:`.calculate.AMD` and :func:`.calculate.PDD`. |
|
4
|
|
|
|
|
5
|
|
|
These intermediate :class:`.periodicset.PeriodicSet` representations can be written |
|
6
|
|
|
to a .hdf5 file with :class:`SetWriter`, which can be read back with :class:`SetReader`. |
|
7
|
|
|
This is much faster than rereading a .CIF and recomputing invariants. |
|
8
|
|
|
""" |
|
9
|
|
|
|
|
10
|
|
|
import os |
|
11
|
|
|
import warnings |
|
12
|
|
|
|
|
13
|
|
|
import numpy as np |
|
14
|
|
|
|
|
15
|
|
|
from .periodicset import PeriodicSet |
|
16
|
|
|
from .utils import _extend_signature, cellpar_to_cell |
|
17
|
|
|
from ._reader import _Reader |
|
18
|
|
|
|
|
19
|
|
|
try: |
|
20
|
|
|
import ccdc.io # EntryReader |
|
21
|
|
|
import ccdc.search # TextNumericSearch |
|
22
|
|
|
_CCDC_ENABLED = True |
|
23
|
|
|
except (ImportError, RuntimeError) as exception: |
|
24
|
|
|
_CCDC_ENABLED = False |
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
def _warning(message, category, filename, lineno, *args, **kwargs): |
|
|
|
|
|
|
28
|
|
|
return f'{filename}:{lineno}: {category.__name__}: {message}\n' |
|
29
|
|
|
|
|
30
|
|
|
warnings.formatwarning = _warning |
|
31
|
|
|
|
|
32
|
|
|
|
|
33
|
|
|
class CifReader(_Reader): |
|
34
|
|
|
"""Read all structures in a .CIF with ``ase`` or ``ccdc`` |
|
35
|
|
|
(``csd-python-api`` only), yielding :class:`.periodicset.PeriodicSet` |
|
36
|
|
|
objects which can be passed to :func:`.calculate.AMD` or |
|
37
|
|
|
:func:`.calculate.PDD`. |
|
38
|
|
|
|
|
39
|
|
|
Examples: |
|
40
|
|
|
|
|
41
|
|
|
:: |
|
42
|
|
|
|
|
43
|
|
|
# Put all crystals in a .CIF in a list |
|
44
|
|
|
structures = list(amd.CifReader('mycif.cif')) |
|
45
|
|
|
|
|
46
|
|
|
# Reads just one if the .CIF has just one crystal |
|
47
|
|
|
periodic_set = amd.CifReader('mycif.cif').read_one() |
|
48
|
|
|
|
|
49
|
|
|
# If a folder has several .CIFs each with one crystal, use |
|
50
|
|
|
structures = list(amd.CifReader('path/to/folder', folder=True)) |
|
51
|
|
|
|
|
52
|
|
|
# Make list of AMDs (with k=100) of crystals in a .CIF |
|
53
|
|
|
amds = [amd.AMD(periodic_set, 100) for periodic_set in amd.CifReader('mycif.cif')] |
|
54
|
|
|
""" |
|
55
|
|
|
|
|
56
|
|
|
@_extend_signature(_Reader.__init__) |
|
57
|
|
|
def __init__( |
|
58
|
|
|
self, |
|
59
|
|
|
path, |
|
60
|
|
|
reader='ase', |
|
61
|
|
|
folder=False, |
|
62
|
|
|
**kwargs): |
|
63
|
|
|
|
|
64
|
|
|
super().__init__(**kwargs) |
|
65
|
|
|
|
|
66
|
|
|
if reader not in ('ase', 'ccdc'): |
|
67
|
|
|
raise ValueError(f'Invalid reader {reader}; must be ase or ccdc.') |
|
68
|
|
|
|
|
69
|
|
|
if reader == 'ase' and self.heaviest_component: |
|
70
|
|
|
raise NotImplementedError('Parameter heaviest_component not implimented for ase, only ccdc.') |
|
|
|
|
|
|
71
|
|
|
|
|
72
|
|
|
if reader == 'ase': |
|
73
|
|
|
extensions = {'cif'} |
|
74
|
|
|
file_parser = ase.io.cif.parse_cif |
|
|
|
|
|
|
75
|
|
|
pset_converter = self._CIFBlock_to_PeriodicSet |
|
76
|
|
|
|
|
77
|
|
|
elif reader == 'ccdc': |
|
78
|
|
|
if not _CCDC_ENABLED: |
|
79
|
|
|
raise ImportError("Failed to import csd-python-api; check it is installed and licensed.") |
|
|
|
|
|
|
80
|
|
|
extensions = ccdc.io.EntryReader.known_suffixes |
|
81
|
|
|
file_parser = ccdc.io.EntryReader |
|
82
|
|
|
pset_converter = self._Entry_to_PeriodicSet |
|
83
|
|
|
|
|
84
|
|
|
if folder: |
|
85
|
|
|
generator = self._folder_generator(path, file_parser, extensions) |
|
|
|
|
|
|
86
|
|
|
else: |
|
87
|
|
|
generator = file_parser(path) |
|
88
|
|
|
|
|
89
|
|
|
self._generator = self._map(pset_converter, generator) |
|
|
|
|
|
|
90
|
|
|
|
|
91
|
|
|
def _folder_generator(self, path, file_parser, extensions): |
|
92
|
|
|
for file in os.listdir(path): |
|
93
|
|
|
suff = os.path.splitext(file)[1][1:] |
|
94
|
|
|
if suff.lower() in extensions: |
|
95
|
|
|
self.current_filename = file |
|
96
|
|
|
yield from file_parser(os.path.join(path, file)) |
|
97
|
|
|
|
|
98
|
|
|
|
|
99
|
|
|
class CSDReader(_Reader): |
|
100
|
|
|
"""Read Entries from the CSD, yielding :class:`.periodicset.PeriodicSet` objects. |
|
101
|
|
|
|
|
102
|
|
|
The CSDReader returns :class:`.periodicset.PeriodicSet` objects which can be passed |
|
103
|
|
|
to :func:`.calculate.AMD` or :func:`.calculate.PDD`. |
|
104
|
|
|
|
|
105
|
|
|
Examples: |
|
106
|
|
|
|
|
107
|
|
|
Get crystals with refcodes in a list:: |
|
108
|
|
|
|
|
109
|
|
|
refcodes = ['DEBXIT01', 'DEBXIT05', 'HXACAN01'] |
|
110
|
|
|
structures = list(amd.CSDReader(refcodes)) |
|
111
|
|
|
|
|
112
|
|
|
Read refcode families (any whose refcode starts with strings in the list):: |
|
113
|
|
|
|
|
114
|
|
|
refcodes = ['ACSALA', 'HXACAN'] |
|
115
|
|
|
structures = list(amd.CSDReader(refcodes, families=True)) |
|
116
|
|
|
|
|
117
|
|
|
Create a generic reader, read crystals by name with :meth:`CSDReader.entry()`:: |
|
118
|
|
|
|
|
119
|
|
|
reader = amd.CSDReader() |
|
120
|
|
|
debxit01 = reader.entry('DEBXIT01') |
|
121
|
|
|
|
|
122
|
|
|
# looping over this generic reader will yield all CSD entries |
|
123
|
|
|
for periodic_set in reader: |
|
124
|
|
|
... |
|
125
|
|
|
|
|
126
|
|
|
Make list of AMD (with k=100) for crystals in these families:: |
|
127
|
|
|
|
|
128
|
|
|
refcodes = ['ACSALA', 'HXACAN'] |
|
129
|
|
|
amds = [] |
|
130
|
|
|
for periodic_set in amd.CSDReader(refcodes, families=True): |
|
131
|
|
|
amds.append(amd.AMD(periodic_set, 100)) |
|
132
|
|
|
""" |
|
133
|
|
|
|
|
134
|
|
|
@_extend_signature(_Reader.__init__) |
|
135
|
|
|
def __init__( |
|
136
|
|
|
self, |
|
137
|
|
|
refcodes=None, |
|
138
|
|
|
families=False, |
|
139
|
|
|
**kwargs): |
|
140
|
|
|
|
|
141
|
|
|
if not _CCDC_ENABLED: |
|
142
|
|
|
raise ImportError("Failed to import csd-python-api; check it is installed and licensed.") |
|
|
|
|
|
|
143
|
|
|
|
|
144
|
|
|
super().__init__(**kwargs) |
|
145
|
|
|
|
|
146
|
|
|
if isinstance(refcodes, str) and refcodes.lower() == 'csd': |
|
147
|
|
|
refcodes = None |
|
148
|
|
|
|
|
149
|
|
|
if refcodes is None: |
|
150
|
|
|
families = False |
|
151
|
|
|
else: |
|
152
|
|
|
refcodes = [refcodes] if isinstance(refcodes, str) else list(refcodes) |
|
153
|
|
|
|
|
154
|
|
|
# families parameter reads all crystals with ids starting with passed refcodes |
|
155
|
|
|
if families: |
|
156
|
|
|
all_refcodes = [] |
|
157
|
|
|
for refcode in refcodes: |
|
158
|
|
|
query = ccdc.search.TextNumericSearch() |
|
159
|
|
|
query.add_identifier(refcode) |
|
160
|
|
|
all_refcodes.extend((hit.identifier for hit in query.search())) |
|
|
|
|
|
|
161
|
|
|
|
|
162
|
|
|
# filter to unique refcodes |
|
163
|
|
|
seen = set() |
|
164
|
|
|
seen_add = seen.add |
|
165
|
|
|
refcodes = [ |
|
166
|
|
|
refcode for refcode in all_refcodes |
|
167
|
|
|
if not (refcode in seen or seen_add(refcode))] |
|
168
|
|
|
|
|
169
|
|
|
self._entry_reader = ccdc.io.EntryReader('CSD') |
|
170
|
|
|
self._generator = self._map( |
|
171
|
|
|
self._Entry_to_PeriodicSet, |
|
172
|
|
|
self._ccdc_generator(refcodes)) |
|
173
|
|
|
|
|
174
|
|
|
def _ccdc_generator(self, refcodes): |
|
175
|
|
|
"""Generates ccdc Entries from CSD refcodes""" |
|
176
|
|
|
|
|
177
|
|
|
if refcodes is None: |
|
178
|
|
|
for entry in self._entry_reader: |
|
179
|
|
|
yield entry |
|
180
|
|
|
else: |
|
181
|
|
|
for refcode in refcodes: |
|
182
|
|
|
try: |
|
183
|
|
|
entry = self._entry_reader.entry(refcode) |
|
184
|
|
|
yield entry |
|
185
|
|
|
except RuntimeError: |
|
186
|
|
|
warnings.warn( |
|
187
|
|
|
f'Identifier {refcode} not found in database') |
|
188
|
|
|
|
|
189
|
|
|
def entry(self, refcode: str) -> PeriodicSet: |
|
190
|
|
|
"""Read a PeriodicSet given any CSD refcode.""" |
|
191
|
|
|
|
|
192
|
|
|
entry = self._entry_reader.entry(refcode) |
|
193
|
|
|
periodic_set = self._Entry_to_PeriodicSet(entry) |
|
194
|
|
|
return periodic_set |
|
195
|
|
|
|
|
196
|
|
|
|
|
197
|
|
|
def crystal_to_periodicset(crystal): |
|
198
|
|
|
"""ccdc.crystal.Crystal --> amd.periodicset.PeriodicSet. |
|
199
|
|
|
Ignores disorder, missing sites/coords, checks & no options. |
|
200
|
|
|
Is a stripped-down version of the function used in CifReader.""" |
|
201
|
|
|
|
|
202
|
|
|
cell = cellpar_to_cell(*crystal.cell_lengths, *crystal.cell_angles) |
|
203
|
|
|
|
|
204
|
|
|
# asymmetric unit fractional coordinates |
|
205
|
|
|
asym_frac_motif = np.array([tuple(a.fractional_coordinates) |
|
206
|
|
|
for a in crystal.asymmetric_unit_molecule.atoms]) |
|
207
|
|
|
asym_frac_motif = np.mod(asym_frac_motif, 1) |
|
208
|
|
|
|
|
209
|
|
|
# if the above removed everything, skip this structure |
|
210
|
|
|
if asym_frac_motif.shape[0] == 0: |
|
211
|
|
|
raise ValueError(f'{crystal.identifier} has no coordinates') |
|
212
|
|
|
|
|
213
|
|
|
sitesym = crystal.symmetry_operators |
|
214
|
|
|
if not sitesym: |
|
215
|
|
|
sitesym = ('x,y,z', ) |
|
216
|
|
|
r = _Reader() |
|
217
|
|
|
r.current_identifier = crystal.identifier |
|
218
|
|
|
frac_motif, asym_unit, multiplicities, _ = r.expand(asym_frac_motif, sitesym) |
|
219
|
|
|
motif = frac_motif @ cell |
|
220
|
|
|
|
|
221
|
|
|
kwargs = { |
|
222
|
|
|
'name': crystal.identifier, |
|
223
|
|
|
'asymmetric_unit': asym_unit, |
|
224
|
|
|
'wyckoff_multiplicities': multiplicities, |
|
225
|
|
|
} |
|
226
|
|
|
|
|
227
|
|
|
periodic_set = PeriodicSet(motif, cell, **kwargs) |
|
228
|
|
|
return periodic_set |
|
229
|
|
|
|
|
230
|
|
|
|
|
231
|
|
|
def cifblock_to_periodicset(block): |
|
232
|
|
|
"""ase.io.cif.CIFBlock --> amd.periodicset.PeriodicSet. |
|
233
|
|
|
Ignores disorder, missing sites/coords, checks & no options. |
|
234
|
|
|
Is a stripped-down version of the function used in CifReader.""" |
|
235
|
|
|
|
|
236
|
|
|
cell = block.get_cell().array |
|
237
|
|
|
asym_frac_motif = [block.get(name) for name in _Reader.atom_site_fract_tags] |
|
238
|
|
|
|
|
239
|
|
|
if None in asym_frac_motif: |
|
240
|
|
|
asym_motif = [block.get(name) for name in _Reader.atom_site_cartn_tags] |
|
241
|
|
|
if None in asym_motif: |
|
242
|
|
|
warnings.warn( |
|
243
|
|
|
f'Skipping {block.name} as coordinates were not found') |
|
244
|
|
|
return None |
|
245
|
|
|
|
|
246
|
|
|
asym_frac_motif = np.array(asym_motif) @ np.linalg.inv(cell) |
|
247
|
|
|
|
|
248
|
|
|
asym_frac_motif = np.mod(np.array(asym_frac_motif).T, 1) |
|
249
|
|
|
|
|
250
|
|
|
if asym_frac_motif.shape[0] == 0: |
|
251
|
|
|
raise ValueError(f'{block.name} has no coordinates') |
|
252
|
|
|
|
|
253
|
|
|
sitesym = ('x,y,z', ) |
|
254
|
|
|
for tag in _Reader.symop_tags: |
|
255
|
|
|
if tag in block: |
|
256
|
|
|
sitesym = block[tag] |
|
257
|
|
|
break |
|
258
|
|
|
|
|
259
|
|
|
if isinstance(sitesym, str): |
|
260
|
|
|
sitesym = [sitesym] |
|
261
|
|
|
|
|
262
|
|
|
dummy_reader = _Reader() |
|
263
|
|
|
dummy_reader.current_identifier = block.name |
|
264
|
|
|
frac_motif, asym_unit, multiplicities, _ = dummy_reader.expand(asym_frac_motif, sitesym) |
|
265
|
|
|
motif = frac_motif @ cell |
|
266
|
|
|
|
|
267
|
|
|
kwargs = { |
|
268
|
|
|
'name': block.name, |
|
269
|
|
|
'asymmetric_unit': asym_unit, |
|
270
|
|
|
'wyckoff_multiplicities': multiplicities |
|
271
|
|
|
} |
|
272
|
|
|
|
|
273
|
|
|
periodic_set = PeriodicSet(motif, cell, **kwargs) |
|
274
|
|
|
return periodic_set |
|
275
|
|
|
|