Test Failed
Push — master ( 37d7fb...c02a6e )
by Daniel
07:38
created

amd.compare._unwrap_periodicset_list()   A

Complexity

Conditions 2

Size

Total Lines 9
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 9
rs 10
c 0
b 0
f 0
cc 2
nop 2
1
"""Functions for comparing AMDs and PDDs of crystals.
2
"""
3
4
from typing import List, Optional, Union, Tuple
5
from functools import partial
6
from itertools import combinations
7
import os
8
from pathlib import Path
9
10
import numpy as np
11
import numpy.typing as npt
12
import pandas as pd
13
from scipy.spatial.distance import cdist, pdist, squareform
14
from joblib import Parallel, delayed
15
import tqdm
16
17
from .io import CifReader, CSDReader
18
from .calculate import AMD, PDD
19
from ._emd import network_simplex
20
from .periodicset import PeriodicSet, PeriodicSetType
21
from .utils import neighbours_from_distance_matrix
22
23
24
def compare(
25
        crystals,
26
        crystals_=None,
27
        by: str = 'AMD',
28
        k: int = 100,
29
        nearest: Optional[int] = None,
30
        reader: str = 'gemmi',
31
        remove_hydrogens: bool = False,
32
        disorder: str = 'skip',
33
        heaviest_component: bool = False,
34
        molecular_centres: bool = False,
35
        families: bool = False,
36
        show_warnings: bool = True,
37
        collapse_tol: float = 1e-4,
38
        metric: str = 'chebyshev',
39
        n_jobs: Optional[int] = None,
40
        backend: str = 'multiprocessing',
41
        verbose: bool = False,
42
        low_memory: bool = False,
43
        **kwargs
44
) -> pd.DataFrame:
45
    """Given one or two sets of crystals, compare by AMD or PDD and
46
    return a pandas DataFrame of the distance matrix.
47
48
    Given one or two paths to cifs/folders, lists of CSD refcodes or
49
    periodic sets, compare by AMD or PDD and return a DataFrame of the
50
    distance matrix with names in the columns and index. Default is to
51
    comapre by AMD with k = 100. Accepts most keyword arguments accepted
52
    by :class:`CifReader <.io.CifReader>`,
53
    :class:`CSDReader <.io.CSDReader>` and functions from
54
    :mod:`.compare`.
55
56
    Parameters
57
    ----------
58
    crystals : list of :class:`PeriodicSet <.periodicset.PeriodicSet>` or str
59
        One or a collection of paths, refcodes, file objects or
60
        :class:`PeriodicSets <.periodicset.PeriodicSet>`.
61
    crystals\_ : list of :class:`PeriodicSet <.periodicset.PeriodicSet>` or str, optional
62
        One or a collection of paths, refcodes, file objects or
63
        :class:`PeriodicSets <.periodicset.PeriodicSet>`.
64
    by : str, default 'AMD'
65
        Use AMD or PDD to compare crystals.
66
    k : int, default 100
67
        Number of neighbour atoms to use for AMD/PDD.
68
    nearest : int, deafult None
69
        Find a number of nearest neighbours instead of a full distance
70
        matrix between crystals.
71
    reader : str, optional
72
        The backend package used to parse the CIF. The default is
73
        :code:`gemmi`, :code:`pymatgen` and :code:`ase` are also
74
        accepted, as well as :code:`ccdc` if csd-python-api is
75
        installed. The ccdc reader should be able to read any format
76
        accepted by :class:`ccdc.io.EntryReader`, though only CIFs have
77
        been tested.
78
    remove_hydrogens : bool, optional
79
        Remove hydrogens from the crystals.
80
    disorder : str, optional
81
        Controls how disordered structures are handled. Default is
82
        ``skip`` which skips any crystal with disorder, since disorder
83
        conflicts with the periodic set model. To read disordered
84
        structures anyway, choose either :code:`ordered_sites` to remove
85
        atoms with disorder or :code:`all_sites` include all atoms
86
        regardless of disorder.
87
    heaviest_component : bool, optional, csd-python-api only
88
        Removes all but the heaviest molecule in
89
        the asymmeric unit, intended for removing solvents.
90
    molecular_centres : bool, default False, csd-python-api only
91
        Use the centres of molecules for comparison
92
        instead of centres of atoms.
93
    families : bool, optional, csd-python-api only
94
        Read all entries whose refcode starts with
95
        the given strings, or 'families' (e.g. giving 'DEBXIT' reads all
96
        entries with refcodes starting with DEBXIT).
97
    show_warnings : bool, optional
98
        Controls whether warnings that arise during reading are printed.
99
    collapse_tol: float, default 1e-4, ``by='PDD'`` only
100
        If two PDD rows have all elements closer
101
        than ``collapse_tol``, they are merged and weights are given to
102
        rows in proportion to the number of times they appeared.
103
    metric : str or callable, default 'chebyshev'
104
        The metric to compare AMDs/PDDs with. AMDs are compared directly
105
        with this metric. EMD is the metric used between PDDs, which
106
        requires giving a metric to use between PDD rows. Chebyshev
107
        (L-infinity) distance is the default. Accepts any metric
108
        accepted by :func:`scipy.spatial.distance.cdist`.
109
    n_jobs : int, default None, ``by='PDD'`` only
110
        Maximum number of concurrent jobs for
111
        parallel processing with :code:`joblib`. Set to -1 to use the
112
        maximum. Using parallel processing may be slower for small
113
        inputs.
114
    backend : str, default 'multiprocessing', ``by='PDD'`` only
115
        The parallelization backend implementation for PDD comparisons.
116
        For a list of supported backends, see the backend argument of
117
        :class:`joblib.Parallel`.
118
    verbose : bool, default False
119
        Prints a progress bar when reading crystals, calculating
120
        AMDs/PDDs and comparing PDDs. If using parallel processing
121
        (n_jobs > 1), the verbose argument of :class:`joblib.Parallel`
122
        is used, otherwise uses ``tqdm``.
123
    low_memory : bool, default False, ``by='AMD'`` only
124
        Use a slower but more memory efficient
125
        method for large collections of AMDs (metric 'chebyshev' only).
126
127
    Returns
128
    -------
129
    df : :class:`pandas.DataFrame`
130
        DataFrame of the distance matrix for the given crystals compared
131
        by the chosen invariant.
132
133
    Raises
134
    ------
135
    ValueError
136
        If by is not 'AMD' or 'PDD', if either set given have no valid
137
        crystals to compare, or if crystals or crystals\_ are an invalid
138
        type.
139
140
    Examples
141
    --------
142
    Compare everything in a .cif (deafult, AMD with k=100)::
143
144
        df = amd.compare('data.cif')
145
146
    Compare everything in one cif with all crystals in all cifs in a
147
    directory (PDD, k=50)::
148
149
        df = amd.compare('data.cif', 'dir/to/cifs', by='PDD', k=50)
150
151
    **Examples (csd-python-api only)**
152
153
    Compare two crystals by CSD refcode (PDD, k=50)::
154
155
        df = amd.compare('DEBXIT01', 'DEBXIT02', by='PDD', k=50)
156
157
    Compare everything in a refcode family (AMD, k=100)::
158
159
        df = amd.compare('DEBXIT', families=True)
160
    """
161
162
    by = by.upper()
163
    if by not in ('AMD', 'PDD'):
164
        raise ValueError(
165
            "'by' parameter of amd.compare() must be one of 'AMD' or 'PDD' "
166
            f"(passed '{by}')"
167
        )
168
169
    reader_kwargs = {
170
        'reader': reader,
171
        'families': families,
172
        'remove_hydrogens': remove_hydrogens,
173
        'disorder': disorder,
174
        'heaviest_component': heaviest_component,
175
        'molecular_centres': molecular_centres,
176
        'show_warnings': show_warnings,
177
        'verbose': verbose,
178
    }
179
180
    compare_kwargs = {
181
        'metric': metric,
182
        'n_jobs': n_jobs,
183
        'backend': backend,
184
        'verbose': verbose,
185
        'low_memory': low_memory,
186
        **kwargs
187
    }
188
189
    # Get list(s) of periodic sets from first input
190
    crystals = _unwrap_periodicset_list(crystals, **reader_kwargs)
191
    if not crystals:
192
        raise ValueError(
193
            'First set passed to amd.compare() contains no valid '
194
            'crystals/periodic sets'
195
        )
196
    names = [s.name for s in crystals]
197
    if verbose:
198
        container = tqdm.tqdm(crystals, desc='Calculating', delay=1)
199
    else:
200
        container = crystals
201
202
    # Get list(s) of periodic sets from second input if given
203
    if crystals_ is None:
204
        names_ = names
205
        container_ = None
206
    else:
207
        crystals_ = _unwrap_periodicset_list(crystals_, **reader_kwargs)
208
        if not crystals_:
209
            raise ValueError(
210
                'Second set passed to amd.compare() contains no valid '
211
                'crystals/periodic sets'
212
            )
213
        names_ = [s.name for s in crystals_]
214
        if verbose:
215
            container_ = tqdm.tqdm(crystals_, desc='Calculating', delay=1)
216
        else:
217
            container_ = crystals_
218
219
    if by == 'AMD':
220
        invs = [AMD(s, k) for s in container]
221
        if verbose:
222
            container.close()
223
        compare_kwargs.pop('n_jobs', None)
224
        compare_kwargs.pop('backend', None)
225
        compare_kwargs.pop('verbose', None)
226
227
        if crystals_ is None:
228
            dm = AMD_pdist(invs, **compare_kwargs)
229
        else:
230
            invs_ = [AMD(s, k) for s in container_]
231
            dm = AMD_cdist(invs, invs_, **compare_kwargs)
232
233
    elif by == 'PDD':
234
        invs = [PDD(s, k, collapse_tol=collapse_tol) for s in container]
235
        compare_kwargs.pop('low_memory', None)
236
237
        if crystals_ is None:
238
            dm = PDD_pdist(invs, **compare_kwargs)
239
        else:
240
            invs_ = [PDD(s, k, collapse_tol=collapse_tol) for s in container_]
241
            dm = PDD_cdist(invs, invs_, **compare_kwargs)
242
243
    if nearest:
244
        nn_dm, inds = neighbours_from_distance_matrix(nearest, dm)
0 ignored issues
show
introduced by
The variable dm does not seem to be defined for all execution paths.
Loading history...
245
        data = {}
246
        for i in range(nearest):
247
            data['ID ' + str(i+1)] = [names_[j] for j in inds[:, i]]
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable str does not seem to be defined.
Loading history...
248
            data['DIST ' + str(i+1)] = nn_dm[:, i]
249
        df = pd.DataFrame(data, index=names)
250
    else:
251
        if dm.ndim == 1:
252
            dm = squareform(dm)
253
        df = pd.DataFrame(dm, index=names, columns=names_)
254
255
    return df
256
257
258
def EMD(
259
        pdd: npt.NDArray,
260
        pdd_: npt.NDArray,
261
        metric: Optional[str] = 'chebyshev',
262
        return_transport: Optional[bool] = False,
263
        **kwargs
264
) -> float:
265
    """Calculate the Earth mover's distance (EMD) between two PDDs, aka
266
    the Wasserstein metric.
267
268
    Parameters
269
    ----------
270
    pdd : :class:`numpy.ndarray`
271
        PDD of a crystal.
272
    pdd\_ : :class:`numpy.ndarray`
273
        PDD of a crystal.
274
    metric : str or callable, default 'chebyshev'
275
        EMD between PDDs requires defining a distance between PDD rows.
276
        By default, Chebyshev (L-infinity) distance is chosen like with
277
        AMDs. Accepts any metric accepted by
278
        :func:`scipy.spatial.distance.cdist`.
279
    return_transport: bool, default False
280
        Instead return a tuple ``(emd, transport_plan)`` where
281
        transport_plan describes the optimal flow.
282
283
    Returns
284
    -------
285
    emd : float
286
        Earth mover's distance between two PDDs. If ``return_transport``
287
        is True, return a tuple (emd, transport_plan).
288
289
    Raises
290
    ------
291
    ValueError
292
        Thrown if ``pdd`` and ``pdd_`` do not have the same number of
293
        columns.
294
    """
295
296
    dm = cdist(pdd[:, 1:], pdd_[:, 1:], metric=metric, **kwargs)
297
    emd_dist, transport_plan = network_simplex(pdd[:, 0], pdd_[:, 0], dm)
298
299
    if return_transport:
300
        return emd_dist, transport_plan
301
    return emd_dist
302
303
304
def AMD_cdist(
305
        amds: npt.ArrayLike,
306
        amds_: npt.ArrayLike,
307
        metric: str = 'chebyshev',
308
        low_memory: bool = False,
309
        **kwargs
310
) -> npt.NDArray:
311
    """Compare two sets of AMDs with each other, returning a distance
312
    matrix. This function is essentially
313
    :func:`scipy.spatial.distance.cdist` with the default metric
314
    ``chebyshev`` and a low memory option.
315
316
    Parameters
317
    ----------
318
    amds : ArrayLike
319
        A list/array of AMDs.
320
    amds\_ : ArrayLike
321
        A list/array of AMDs.
322
    metric : str or callable, default 'chebyshev'
323
        Usually AMDs are compared with the Chebyshev (L-infinitys) distance.
324
        Accepts any metric accepted by :func:`scipy.spatial.distance.cdist`.
325
    low_memory : bool, default False
326
        Use a slower but more memory efficient method for large collections of
327
        AMDs (metric 'chebyshev' only).
328
329
    Returns
330
    -------
331
    dm : :class:`numpy.ndarray`
332
        A distance matrix shape ``(len(amds), len(amds_))``. ``dm[ij]`` is the
333
        distance (given by ``metric``) between ``amds[i]`` and ``amds[j]``.
334
    """
335
336
    amds, amds_ = np.asarray(amds), np.asarray(amds_)
337
338
    if len(amds.shape) == 1:
339
        amds = np.array([amds])
340
    if len(amds_.shape) == 1:
341
        amds_ = np.array([amds_])
342
343
    if low_memory:
344
        if metric != 'chebyshev':
345
            raise ValueError(
346
                "'low_memory' parameter of amd.AMD_cdist() only implemented "
347
                "with metric='chebyshev'."
348
            )
349
        dm = np.empty((len(amds), len(amds_)))
350
        for i, amd_vec in enumerate(amds):
351
            dm[i] = np.amax(np.abs(amds_ - amd_vec), axis=-1)
352
    else:
353
        dm = cdist(amds, amds_, metric=metric, **kwargs)
354
355
    return dm
356
357
358
def AMD_pdist(
359
        amds: npt.ArrayLike,
360
        metric: str = 'chebyshev',
361
        low_memory: bool = False,
362
        **kwargs
363
) -> npt.NDArray:
364
    """Compare a set of AMDs pairwise, returning a condensed distance
365
    matrix. This function is essentially
366
    :func:`scipy.spatial.distance.pdist` with the default metric
367
    ``chebyshev`` and a low memory parameter.
368
369
    Parameters
370
    ----------
371
    amds : ArrayLike
372
        An list/array of AMDs.
373
    metric : str or callable, default 'chebyshev'
374
        Usually AMDs are compared with the Chebyshev (L-infinity)
375
        distance. Accepts any metric accepted by
376
        :func:`scipy.spatial.distance.pdist`.
377
    low_memory : bool, default False
378
        Use a slower but more memory efficient method for large
379
        collections of AMDs (metric 'chebyshev' only).
380
381
    Returns
382
    -------
383
    cdm : :class:`numpy.ndarray`
384
        Returns a condensed distance matrix. Collapses a square distance
385
        matrix into a vector, just keeping the upper half. See the
386
        function :func:`squareform <scipy.spatial.distance.squareform>`
387
        from SciPy to convert to a symmetric square distance matrix.
388
    """
389
390
    amds = np.asarray(amds)
391
392
    if len(amds.shape) == 1:
393
        amds = np.array([amds])
394
395
    if low_memory:
396
        m = len(amds)
397
        if metric != 'chebyshev':
398
            raise ValueError(
399
                "'low_memory' parameter of amd.AMD_pdist() only implemented "
400
                "with metric='chebyshev'."
401
            )
402
        cdm = np.empty((m * (m - 1)) // 2, dtype=np.float64)
403
        ind = 0
404
        for i in range(m):
405
            ind_ = ind + m - i - 1
406
            cdm[ind:ind_] = np.amax(np.abs(amds[i+1:] - amds[i]), axis=-1)
407
            ind = ind_
408
    else:
409
        cdm = pdist(amds, metric=metric, **kwargs)
410
411
    return cdm
412
413
414
def PDD_cdist(
415
        pdds: List[npt.NDArray],
416
        pdds_: List[npt.NDArray],
417
        metric: str = 'chebyshev',
418
        backend: str = 'multiprocessing',
419
        n_jobs: Optional[int] = None,
420
        verbose: bool = False,
421
        **kwargs
422
) -> npt.NDArray:
423
    """Compare two sets of PDDs with each other, returning a distance
424
    matrix. Supports parallel processing via joblib. If using
425
    parallelisation, make sure to include an if __name__ == '__main__'
426
    guard around this function.
427
428
    Parameters
429
    ----------
430
    pdds : List[:class:`numpy.ndarray`]
431
        A list of PDDs.
432
    pdds\_ : List[:class:`numpy.ndarray`]
433
        A list of PDDs.
434
    metric : str or callable, default 'chebyshev'
435
        Usually PDD rows are compared with the Chebyshev/l-infinity
436
        distance. Accepts any metric accepted by
437
        :func:`scipy.spatial.distance.cdist`.
438
    backend : str, default 'multiprocessing'
439
        The parallelization backend implementation. For a list of
440
        supported backends, see the backend argument of
441
        :class:`joblib.Parallel`.
442
    n_jobs : int, default None
443
        Maximum number of concurrent jobs for parallel processing with
444
        ``joblib``. Set to -1 to use the maximum. Using parallel
445
        processing may be slower for small inputs.
446
    verbose : bool, default False
447
        Prints a progress bar. If using parallel processing
448
        (n_jobs > 1), the verbose argument of :class:`joblib.Parallel`
449
        is used, otherwise uses tqdm.
450
451
    Returns
452
    -------
453
    dm : :class:`numpy.ndarray`
454
        Returns a distance matrix shape ``(len(pdds), len(pdds_))``. The
455
        :math:`ij` th entry is the distance between ``pdds[i]`` and
456
        ``pdds_[j]`` given by Earth mover's distance.
457
    """
458
459
    kwargs.pop('return_transport', None)
460
    k = pdds[0].shape[-1] - 1
461
    if verbose:
462
        verbose = 3
463
464
    if n_jobs is not None and n_jobs not in (0, 1):
465
        # TODO: put results into preallocated empty array in place
466
        dm = Parallel(backend=backend, n_jobs=n_jobs, verbose=verbose)(
467
            delayed(partial(EMD, metric=metric, **kwargs))(pdds[i], pdds_[j])
468
            for i in range(len(pdds)) for j in range(len(pdds_))
469
        )
470
        dm = np.array(dm).reshape((len(pdds), len(pdds_)))
471
472
    else:
473
        n, m = len(pdds), len(pdds_)
474
        dm = np.empty((n, m))
475
        if verbose:
476
            desc = f'Comparing {len(pdds)}x{len(pdds_)} PDDs (k={k})'
477
            progress_bar = tqdm.tqdm(desc=desc, total=n*m)
478
        for i in range(n):
479
            for j in range(m):
480
                dm[i, j] = EMD(pdds[i], pdds_[j], metric=metric, **kwargs)
481
                if verbose:
482
                    progress_bar.update(1)
0 ignored issues
show
introduced by
The variable progress_bar does not seem to be defined in case verbose on line 475 is False. Are you sure this can never be the case?
Loading history...
483
        if verbose:
484
            progress_bar.close()
485
486
    return dm
487
488
489
def PDD_pdist(
490
        pdds: List[npt.NDArray],
491
        metric: str = 'chebyshev',
492
        backend: str = 'multiprocessing',
493
        n_jobs: Optional[int] = None,
494
        verbose: bool = False,
495
        **kwargs
496
) -> npt.NDArray:
497
    """Compare a set of PDDs pairwise, returning a condensed distance
498
    matrix. Supports parallelisation via joblib. If using
499
    parallelisation, make sure to include a if __name__ == '__main__'
500
    guard around this function.
501
502
    Parameters
503
    ----------
504
    pdds : List[:class:`numpy.ndarray`]
505
        A list of PDDs.
506
    metric : str or callable, default 'chebyshev'
507
        Usually PDD rows are compared with the Chebyshev/l-infinity
508
        distance. Accepts any metric accepted by
509
        :func:`scipy.spatial.distance.cdist`.
510
    backend : str, default 'multiprocessing'
511
        The parallelization backend implementation. For a list of
512
        supported backends, see the backend argument of
513
        :class:`joblib.Parallel`.
514
    n_jobs : int, default None
515
        Maximum number of concurrent jobs for parallel processing with
516
        ``joblib``. Set to -1 to use the maximum. Using parallel
517
        processing may be slower for small inputs.
518
    verbose : bool, default False
519
        Prints a progress bar. If using parallel processing
520
        (n_jobs > 1), the verbose argument of :class:`joblib.Parallel`
521
        is used, otherwise uses tqdm.
522
523
    Returns
524
    -------
525
    cdm : :class:`numpy.ndarray`
526
        Returns a condensed distance matrix. Collapses a square distance
527
        matrix into a vector, just keeping the upper half. See the
528
        function :func:`squareform <scipy.spatial.distance.squareform>`
529
        from SciPy to convert to a symmetric square distance matrix.
530
    """
531
532
    kwargs.pop('return_transport', None)
533
    k = pdds[0].shape[-1] - 1
534
    if verbose:
535
        verbose = 3
536
537
    if n_jobs is not None and n_jobs > 1:
538
        # TODO: put results into preallocated empty array in place
539
        cdm = Parallel(backend=backend, n_jobs=n_jobs, verbose=verbose)(
540
            delayed(partial(EMD, metric=metric, **kwargs))(pdds[i], pdds[j])
541
            for i, j in combinations(range(len(pdds)), 2)
542
        )
543
        cdm = np.array(cdm)
544
545
    else:
546
        m = len(pdds)
547
        cdm_len = (m * (m - 1)) // 2
548
        cdm = np.empty(cdm_len, dtype=np.float64)
549
        inds = ((i, j) for i in range(0, m - 1) for j in range(i + 1, m))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable j does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable i does not seem to be defined.
Loading history...
550
        if verbose:
551
            desc = f'Comparing {len(pdds)} PDDs pairwise (k={k})'
552
            progress_bar = tqdm.tqdm(desc=desc, total=cdm_len)
553
        for r, (i, j) in enumerate(inds):
554
            cdm[r] = EMD(pdds[i], pdds[j], metric=metric, **kwargs)
555
            if verbose:
556
                progress_bar.update(1)
0 ignored issues
show
introduced by
The variable progress_bar does not seem to be defined in case verbose on line 550 is False. Are you sure this can never be the case?
Loading history...
557
        if verbose:
558
            progress_bar.close()
559
    return cdm
560
561
562
def emd(pdd: npt.NDArray, pdd_: npt.NDArray, **kwargs) -> float:
563
    """Alias for :func:`EMD() <.compare.EMD>`."""
564
    return EMD(pdd, pdd_, **kwargs)
565
566
567
def _unwrap_periodicset_list(psets_or_str, **reader_kwargs):
568
    """Given a valid input for amd.compare(), return a list of
569
    PeriodicSets. Accepts PeriodicSets, paths (to files or folders),
570
    refcodes or lists of those."""
571
572
    if isinstance(psets_or_str, list):
573
        return [s for item in psets_or_str
574
                for s in _extract_periodicsets(item, **reader_kwargs)]
575
    return _extract_periodicsets(psets_or_str, **reader_kwargs)
576
577
578
def _extract_periodicsets(item, **reader_kwargs):
579
    """Given a path, PeriodicSet, tuple or list of CSD refcodes, return
580
    a list of the PeriodicSet(s)."""
581
582
    if isinstance(item, PeriodicSet):
583
        return [item]
584
    if isinstance(item, Tuple):
585
        return [PeriodicSet(item[0], item[1])]
586
587
    try:
588
        path = Path(item)
589
    except TypeError:
590
        raise ValueError(
591
            'amd.compare() expected a str, os.PathLike or amd.PeriodicSet, '
592
            f"but was given type '{item.__class__.__name__}'"
593
        )
594
595
    if path.is_file() or path.is_dir():
596
        reader_kwargs.pop('families', None)
597
        reader_kwargs.pop('refcodes', None)
598
        return list(CifReader(path, **reader_kwargs))
599
    elif isinstance(item, str):
600
        reader_kwargs.pop('reader', None)
601
        try:
602
            return list(CSDReader(item.upper(), **reader_kwargs))
603
        except ImportError:
604
            raise ValueError(
605
                'amd.compare() expected a path, os.PathLike or '
606
                f"amd.PeriodicSet, but was given '{item}'; to "
607
                'interpret as a CSD refcode, install csd-python-api'
608
            )
609
        except:
610
            raise ValueError(
611
                f'amd.compare() expected a path, os.PathLike, amd.PeriodicSet '
612
                f"or CSD refcode, but was given '{item}'"
613
            )
614
    else:
615
        raise ValueError(
616
            'amd.compare() expected a path, os.PathLike, amd.PeriodicSet or '
617
            f"CSD refcode, but was given type '{item.__class__.__name__}'"
618
        )
619