Passed
Pull Request — master (#433)
by Fernando
01:14
created

torchio.datasets.episurg.EPISURG.get_paired()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
from pathlib import Path
2
from typing import Optional
3
4
from ..typing import TypePath
5
from ..transforms import Transform
6
from ..download import download_and_extract_archive
7
from .. import SubjectsDataset, Subject, ScalarImage, LabelMap
8
9
10
class EPISURG(SubjectsDataset):
11
    """
12
    `EPISURG`_ is a clinical dataset of :math:`T_1`-weighted MRI from 430
13
    epileptic patients who underwent resective brain surgery at the National
14
    Hospital of Neurology and Neurosurgery (Queen Square, London, United
15
    Kingdom) between 1990 and 2018.
16
17
    The dataset comprises 430 postoperative MRI. The corresponding preoperative
18
    MRI is present for 268 subjects.
19
20
    Three human raters segmented the resection cavity on partially overlapping
21
    subsets of EPISURG.
22
23
    If you use this dataset for your research, you agree with the *Data use
24
    agreement* presented at the EPISURG entry on the `UCL Research Data
25
    Repository <EPISURG>`_ and you must cite the corresponding publications.
26
27
    .. _EPISURG: https://doi.org/10.5522/04/9996158.v1
28
29
    Args:
30
        root: Root directory to which the dataset will be downloaded.
31
        transform: An instance of
32
            :class:`~torchio.transforms.transform.Transform`.
33
        download: If set to ``True``, will download the data into :attr:`root`.
34
35
    .. warning:: The size of this dataset is multiple GB.
36
        If you set :attr:`download` to ``True``, it will take some time
37
        to be downloaded if it is not already present.
38
    """
39
40
    data_url = 'https://s3-eu-west-1.amazonaws.com/pstorage-ucl-2748466690/26153588/EPISURG.zip'  # noqa: E501
41
    md5 = '5ec5831a2c6fbfdc8489ba2910a6504b'
42
43
    def __init__(
44
            self,
45
            root: TypePath,
46
            transform: Optional[Transform] = None,
47
            download: bool = False,
48
            **kwargs,
49
            ):
50
        root = Path(root).expanduser().absolute()
51
        if download:
52
            self._download(root)
53
        subjects_list = self._get_subjects_list(root)
54
        self.kwargs = kwargs
55
        super().__init__(subjects_list, transform=transform, **kwargs)
56
57
    @staticmethod
58
    def _check_exists(root, modalities):
59
        for modality in modalities:
60
            modality_dir = root / modality
61
            if not modality_dir.is_dir():
62
                exists = False
63
                break
64
        else:
65
            exists = True
66
        return exists
67
68
    @staticmethod
69
    def _get_subjects_list(root):
70
        subjects_dir = root / 'EPISURG' / 'subjects'
71
        subjects = []
72
        for subject_dir in sorted(subjects_dir.glob('sub-*')):
73
            subject_id = subject_dir.name[-4:]
74
            images_dict = {'subject_id': subject_id}
75
            preop_dir = subject_dir / 'preop'
76
            preop_paths = list(preop_dir.glob('*preop*'))
77
            assert len(preop_paths) <= 1
78
            if preop_paths:
79
                images_dict['preop_mri'] = ScalarImage(preop_paths[0])
80
            postop_dir = subject_dir / 'postop'
81
            postop_path = list(postop_dir.glob('*postop-t1mri*'))[0]
82
            images_dict['postop_mri'] = ScalarImage(postop_path)
83
            for seg_path in postop_dir.glob('*seg*'):
84
                seg_id = seg_path.name[-8]
85
                images_dict[f'seg_{seg_id}'] = LabelMap(seg_path)
86
            subjects.append(Subject(**images_dict))
87
        return subjects
88
89
    def _download(self, root):
90
        """Download the EPISURG data if it does not exist already."""
91
        if (root / 'EPISURG').is_dir():
92
            return
93
        root.mkdir(exist_ok=True, parents=True)
94
        download_and_extract_archive(
95
            self.data_url,
96
            download_root=root,
97
            md5=self.md5,
98
        )
99
        (root / 'EPISURG.zip').unlink()  # cleanup
100
101
    def _glob_subjects(self, string):
102
        subjects = []
103
        for subject in self._subjects:
104
            for image_name in subject:
105
                if string in image_name:
106
                    subjects.append(subject)
107
                    break
108
        return subjects
109
110
    def _get_labeled_subjects(self):
111
        return self._glob_subjects('seg')
112
113
    def _get_paired_subjects(self):
114
        return self._glob_subjects('preop')
115
116
    def _get_subset(self, subjects):
117
        dataset = SubjectsDataset(
118
            subjects,
119
            transform=self._transform,
120
            **(self.kwargs),
121
        )
122
        return dataset
123
124
    def get_labeled(self) -> SubjectsDataset:
125
        """Get dataset from subjects with manual annotations."""
126
        return self._get_subset(self._get_labeled_subjects())
127
128
    def get_unlabeled(self) -> SubjectsDataset:
129
        """Get dataset from subjects without manual annotations."""
130
        subjects = [
131
            s for s in self._subjects
132
            if s not in self._get_labeled_subjects()
133
        ]
134
        return self._get_subset(subjects)
135
136
    def get_paired(self) -> SubjectsDataset:
137
        """Get dataset from subjects with pre- and post-op MRI."""
138
        return self._get_subset(self._get_paired_subjects())
139