1
|
|
|
import csv |
2
|
|
|
from pathlib import Path |
3
|
|
|
from typing import Optional |
4
|
|
|
|
5
|
|
|
from ..typing import TypePath |
6
|
|
|
from ..transforms import Transform |
7
|
|
|
from ..download import download_and_extract_archive |
8
|
|
|
from .. import SubjectsDataset, Subject, ScalarImage, LabelMap |
9
|
|
|
|
10
|
|
|
|
11
|
|
|
class EPISURG(SubjectsDataset): |
12
|
|
|
""" |
13
|
|
|
`EPISURG`_ is a clinical dataset of :math:`T_1`-weighted MRI from 430 |
14
|
|
|
epileptic patients who underwent resective brain surgery at the National |
15
|
|
|
Hospital of Neurology and Neurosurgery (Queen Square, London, United |
16
|
|
|
Kingdom) between 1990 and 2018. |
17
|
|
|
|
18
|
|
|
The dataset comprises 430 postoperative MRI. The corresponding preoperative |
19
|
|
|
MRI is present for 268 subjects. |
20
|
|
|
|
21
|
|
|
Three human raters segmented the resection cavity on partially overlapping |
22
|
|
|
subsets of EPISURG. |
23
|
|
|
|
24
|
|
|
If you use this dataset for your research, you agree with the *Data use |
25
|
|
|
agreement* presented at the EPISURG entry on the `UCL Research Data |
26
|
|
|
Repository <EPISURG>`_ and you must cite the corresponding publications. |
27
|
|
|
|
28
|
|
|
.. _EPISURG: https://doi.org/10.5522/04/9996158.v1 |
29
|
|
|
|
30
|
|
|
Args: |
31
|
|
|
root: Root directory to which the dataset will be downloaded. |
32
|
|
|
transform: An instance of |
33
|
|
|
:class:`~torchio.transforms.transform.Transform`. |
34
|
|
|
download: If set to ``True``, will download the data into :attr:`root`. |
35
|
|
|
|
36
|
|
|
.. warning:: The size of this dataset is multiple GB. |
37
|
|
|
If you set :attr:`download` to ``True``, it will take some time |
38
|
|
|
to be downloaded if it is not already present. |
39
|
|
|
""" |
40
|
|
|
|
41
|
|
|
data_url = 'https://s3-eu-west-1.amazonaws.com/pstorage-ucl-2748466690/26153588/EPISURG.zip' # noqa: E501 |
42
|
|
|
md5 = '5ec5831a2c6fbfdc8489ba2910a6504b' |
43
|
|
|
|
44
|
|
|
def __init__( |
45
|
|
|
self, |
46
|
|
|
root: TypePath, |
47
|
|
|
transform: Optional[Transform] = None, |
48
|
|
|
download: bool = False, |
49
|
|
|
**kwargs, |
50
|
|
|
): |
51
|
|
|
root = Path(root).expanduser().absolute() |
52
|
|
|
if download: |
53
|
|
|
self._download(root) |
54
|
|
|
subjects_list = self._get_subjects_list(root) |
55
|
|
|
self.kwargs = kwargs |
56
|
|
|
super().__init__(subjects_list, transform=transform, **kwargs) |
57
|
|
|
|
58
|
|
|
@staticmethod |
59
|
|
|
def _check_exists(root, modalities): |
60
|
|
|
for modality in modalities: |
61
|
|
|
modality_dir = root / modality |
62
|
|
|
if not modality_dir.is_dir(): |
63
|
|
|
exists = False |
64
|
|
|
break |
65
|
|
|
else: |
66
|
|
|
exists = True |
67
|
|
|
return exists |
68
|
|
|
|
69
|
|
|
@staticmethod |
70
|
|
|
def _get_subjects_list(root): |
71
|
|
|
episurg_dir = root / 'EPISURG' |
72
|
|
|
subjects_dir = episurg_dir / 'subjects' |
73
|
|
|
csv_path = episurg_dir / 'subjects.csv' |
74
|
|
|
with open(csv_path) as csvfile: |
75
|
|
|
reader = csv.DictReader(csvfile) |
76
|
|
|
subjects = [] |
77
|
|
|
for row in reader: |
78
|
|
|
subject_id = row['Subject'] |
79
|
|
|
subject_dir = subjects_dir / subject_id |
80
|
|
|
subject_dict = { |
81
|
|
|
'subject_id': subject_id, |
82
|
|
|
'hemisphere': row['Hemisphere'], |
83
|
|
|
'surgery_type': row['Type'], |
84
|
|
|
} |
85
|
|
|
preop_dir = subject_dir / 'preop' |
86
|
|
|
preop_paths = list(preop_dir.glob('*preop*')) |
87
|
|
|
assert len(preop_paths) <= 1 |
88
|
|
|
if preop_paths: |
89
|
|
|
subject_dict['preop_mri'] = ScalarImage(preop_paths[0]) |
90
|
|
|
postop_dir = subject_dir / 'postop' |
91
|
|
|
postop_path = list(postop_dir.glob('*postop-t1mri*'))[0] |
92
|
|
|
subject_dict['postop_mri'] = ScalarImage(postop_path) |
93
|
|
|
for seg_path in postop_dir.glob('*seg*'): |
94
|
|
|
seg_id = seg_path.name[-8] |
95
|
|
|
subject_dict[f'seg_{seg_id}'] = LabelMap(seg_path) |
96
|
|
|
subjects.append(Subject(**subject_dict)) |
97
|
|
|
return subjects |
98
|
|
|
|
99
|
|
|
def _download(self, root): |
100
|
|
|
"""Download the EPISURG data if it does not exist already.""" |
101
|
|
|
if (root / 'EPISURG').is_dir(): |
102
|
|
|
return |
103
|
|
|
root.mkdir(exist_ok=True, parents=True) |
104
|
|
|
download_and_extract_archive( |
105
|
|
|
self.data_url, |
106
|
|
|
download_root=root, |
107
|
|
|
md5=self.md5, |
108
|
|
|
) |
109
|
|
|
(root / 'EPISURG.zip').unlink() # cleanup |
110
|
|
|
|
111
|
|
|
def _glob_subjects(self, string): |
112
|
|
|
subjects = [] |
113
|
|
|
for subject in self._subjects: |
114
|
|
|
for image_name in subject: |
115
|
|
|
if string in image_name: |
116
|
|
|
subjects.append(subject) |
117
|
|
|
break |
118
|
|
|
return subjects |
119
|
|
|
|
120
|
|
|
def _get_labeled_subjects(self): |
121
|
|
|
return self._glob_subjects('seg') |
122
|
|
|
|
123
|
|
|
def _get_paired_subjects(self): |
124
|
|
|
return self._glob_subjects('preop') |
125
|
|
|
|
126
|
|
|
def _get_subset(self, subjects): |
127
|
|
|
dataset = SubjectsDataset( |
128
|
|
|
subjects, |
129
|
|
|
transform=self._transform, |
130
|
|
|
**(self.kwargs), |
131
|
|
|
) |
132
|
|
|
return dataset |
133
|
|
|
|
134
|
|
|
def get_labeled(self) -> SubjectsDataset: |
135
|
|
|
"""Get dataset from subjects with manual annotations.""" |
136
|
|
|
return self._get_subset(self._get_labeled_subjects()) |
137
|
|
|
|
138
|
|
|
def get_unlabeled(self) -> SubjectsDataset: |
139
|
|
|
"""Get dataset from subjects without manual annotations.""" |
140
|
|
|
subjects = [ |
141
|
|
|
s for s in self._subjects |
142
|
|
|
if s not in self._get_labeled_subjects() |
143
|
|
|
] |
144
|
|
|
return self._get_subset(subjects) |
145
|
|
|
|
146
|
|
|
def get_paired(self) -> SubjectsDataset: |
147
|
|
|
"""Get dataset from subjects with pre- and post-op MRI.""" |
148
|
|
|
return self._get_subset(self._get_paired_subjects()) |
149
|
|
|
|