Passed
Push — master ( e50ba8...58536b )
by Fernando
01:15
created

torchio.datasets.episurg.EPISURG._glob_subjects()   A

Complexity

Conditions 4

Size

Total Lines 8
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 8
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 4
nop 2
1
import csv
2
from pathlib import Path
3
from typing import Optional
4
5
from ..typing import TypePath
6
from ..transforms import Transform
7
from ..download import download_and_extract_archive
8
from .. import SubjectsDataset, Subject, ScalarImage, LabelMap
9
10
11
class EPISURG(SubjectsDataset):
12
    """
13
    `EPISURG`_ is a clinical dataset of :math:`T_1`-weighted MRI from 430
14
    epileptic patients who underwent resective brain surgery at the National
15
    Hospital of Neurology and Neurosurgery (Queen Square, London, United
16
    Kingdom) between 1990 and 2018.
17
18
    The dataset comprises 430 postoperative MRI. The corresponding preoperative
19
    MRI is present for 268 subjects.
20
21
    Three human raters segmented the resection cavity on partially overlapping
22
    subsets of EPISURG.
23
24
    If you use this dataset for your research, you agree with the *Data use
25
    agreement* presented at the EPISURG entry on the `UCL Research Data
26
    Repository <EPISURG>`_ and you must cite the corresponding publications.
27
28
    .. _EPISURG: https://doi.org/10.5522/04/9996158.v1
29
30
    Args:
31
        root: Root directory to which the dataset will be downloaded.
32
        transform: An instance of
33
            :class:`~torchio.transforms.transform.Transform`.
34
        download: If set to ``True``, will download the data into :attr:`root`.
35
36
    .. warning:: The size of this dataset is multiple GB.
37
        If you set :attr:`download` to ``True``, it will take some time
38
        to be downloaded if it is not already present.
39
    """
40
41
    data_url = 'https://s3-eu-west-1.amazonaws.com/pstorage-ucl-2748466690/26153588/EPISURG.zip'  # noqa: E501
42
    md5 = '5ec5831a2c6fbfdc8489ba2910a6504b'
43
44
    def __init__(
45
            self,
46
            root: TypePath,
47
            transform: Optional[Transform] = None,
48
            download: bool = False,
49
            **kwargs,
50
            ):
51
        root = Path(root).expanduser().absolute()
52
        if download:
53
            self._download(root)
54
        subjects_list = self._get_subjects_list(root)
55
        self.kwargs = kwargs
56
        super().__init__(subjects_list, transform=transform, **kwargs)
57
58
    @staticmethod
59
    def _check_exists(root, modalities):
60
        for modality in modalities:
61
            modality_dir = root / modality
62
            if not modality_dir.is_dir():
63
                exists = False
64
                break
65
        else:
66
            exists = True
67
        return exists
68
69
    @staticmethod
70
    def _get_subjects_list(root):
71
        episurg_dir = root / 'EPISURG'
72
        subjects_dir = episurg_dir / 'subjects'
73
        csv_path = episurg_dir / 'subjects.csv'
74
        with open(csv_path) as csvfile:
75
            reader = csv.DictReader(csvfile)
76
            subjects = []
77
            for row in reader:
78
                subject_id = row['Subject']
79
                subject_dir = subjects_dir / subject_id
80
                subject_dict = {
81
                    'subject_id': subject_id,
82
                    'hemisphere': row['Hemisphere'],
83
                    'surgery_type': row['Type'],
84
                }
85
                preop_dir = subject_dir / 'preop'
86
                preop_paths = list(preop_dir.glob('*preop*'))
87
                assert len(preop_paths) <= 1
88
                if preop_paths:
89
                    subject_dict['preop_mri'] = ScalarImage(preop_paths[0])
90
                postop_dir = subject_dir / 'postop'
91
                postop_path = list(postop_dir.glob('*postop-t1mri*'))[0]
92
                subject_dict['postop_mri'] = ScalarImage(postop_path)
93
                for seg_path in postop_dir.glob('*seg*'):
94
                    seg_id = seg_path.name[-8]
95
                    subject_dict[f'seg_{seg_id}'] = LabelMap(seg_path)
96
                subjects.append(Subject(**subject_dict))
97
        return subjects
98
99
    def _download(self, root):
100
        """Download the EPISURG data if it does not exist already."""
101
        if (root / 'EPISURG').is_dir():
102
            return
103
        root.mkdir(exist_ok=True, parents=True)
104
        download_and_extract_archive(
105
            self.data_url,
106
            download_root=root,
107
            md5=self.md5,
108
        )
109
        (root / 'EPISURG.zip').unlink()  # cleanup
110
111
    def _glob_subjects(self, string):
112
        subjects = []
113
        for subject in self._subjects:
114
            for image_name in subject:
115
                if string in image_name:
116
                    subjects.append(subject)
117
                    break
118
        return subjects
119
120
    def _get_labeled_subjects(self):
121
        return self._glob_subjects('seg')
122
123
    def _get_paired_subjects(self):
124
        return self._glob_subjects('preop')
125
126
    def _get_subset(self, subjects):
127
        dataset = SubjectsDataset(
128
            subjects,
129
            transform=self._transform,
130
            **(self.kwargs),
131
        )
132
        return dataset
133
134
    def get_labeled(self) -> SubjectsDataset:
135
        """Get dataset from subjects with manual annotations."""
136
        return self._get_subset(self._get_labeled_subjects())
137
138
    def get_unlabeled(self) -> SubjectsDataset:
139
        """Get dataset from subjects without manual annotations."""
140
        subjects = [
141
            s for s in self._subjects
142
            if s not in self._get_labeled_subjects()
143
        ]
144
        return self._get_subset(subjects)
145
146
    def get_paired(self) -> SubjectsDataset:
147
        """Get dataset from subjects with pre- and post-op MRI."""
148
        return self._get_subset(self._get_paired_subjects())
149