torchio.datasets.rsna_miccai   A
last analyzed

Complexity

Total Complexity 14

Size/Duplication

Total Lines 112
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 14
eloc 64
dl 0
loc 112
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
D RSNAMICCAI._get_subjects() 0 43 12
A RSNAMICCAI.__init__() 0 15 2
1
import csv
2
import warnings
3
from pathlib import Path
4
from typing import List, Sequence
5
6
from ..typing import TypePath
7
from .. import SubjectsDataset, Subject, ScalarImage
8
9
10
class RSNAMICCAI(SubjectsDataset):
11
    """RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge dataset.
12
13
    This is a helper class for the dataset used in the
14
    `RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge`_ hosted on
15
    `kaggle <https://www.kaggle.com/>`_. The dataset must be downloaded before
16
    instantiating this class (as oposed to, e.g., :class:`torchio.datasets.IXI`).
17
18
    This `kaggle kernel <https://www.kaggle.com/fepegar/preprocessing-mri-with-torchio/>`_
19
    includes a usage example including preprocessing of all the scans.
20
21
    If you reference or use the dataset in any form, include the following
22
    citation:
23
24
    U.Baid, et al., "The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor
25
    Segmentation and Radiogenomic Classification", arXiv:2107.02314, 2021.
26
27
    Args:
28
        root_dir: Directory containing the dataset (``train`` directory,
29
            ``test`` directory, etc.).
30
        train: If ``True``, the ``train`` set will be used. Otherwise the
31
            ``test`` set will be used.
32
        ignore_empty: If ``True``, the three subjects flagged as "presenting
33
            issues" (empty images) by the challenge organizers will be ignored.
34
            The subject IDs are ``00109``, ``00123`` and ``00709``.
35
36
    Example:
37
        >>> import torchio as tio
38
        >>> from subprocess import call
39
        >>> call('kaggle competitions download -c rsna-miccai-brain-tumor-radiogenomic-classification'.split())
40
        >>> root_dir = 'rsna-miccai-brain-tumor-radiogenomic-classification'
41
        >>> train_set = tio.datasets.RSNAMICCAI(root_dir, train=True)
42
        >>> test_set = tio.datasets.RSNAMICCAI(root_dir, train=False)
43
        >>> len(train_set), len(test_set)
44
        (582, 87)
45
46
47
    .. _RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification
48
    """  # noqa: E501
49
    id_key = 'BraTS21ID'
50
    label_key = 'MGMT_value'
51
    bad_subjects = '00109', '00123', '00709'
52
53
    def __init__(
54
            self,
55
            root_dir: TypePath,
56
            train: bool = True,
57
            ignore_empty: bool = True,
58
            modalities: Sequence[str] = ('T1w', 'T1wCE', 'T2w', 'FLAIR'),
59
            **kwargs,
60
            ):
61
        self.root_dir = Path(root_dir).expanduser().resolve()
62
        if isinstance(modalities, str):
63
            modalities = [modalities]
64
        self.modalities = modalities
65
        subjects = self._get_subjects(self.root_dir, train, ignore_empty)
66
        super().__init__(subjects, **kwargs)
67
        self.train = train
68
69
    def _get_subjects(
70
            self,
71
            root_dir: Path,
72
            train: bool,
73
            ignore_empty: bool,
74
            ) -> List[Subject]:
75
        subjects = []
76
        if train:
77
            csv_path = root_dir / 'train_labels.csv'
78
            try:
79
                with open(csv_path) as csvfile:
80
                    reader = csv.DictReader(csvfile)
81
                    labels_dict = {
82
                        row[self.id_key]: int(row[self.label_key])
83
                        for row in reader
84
                    }
85
            except FileNotFoundError:
86
                warnings.warn('Labels CSV not found. Ignoring MGMT labels')
87
                labels_dict = {}
88
            subjects_dir = root_dir / 'train'
89
        else:
90
            subjects_dir = root_dir / 'test'
91
92
        for subject_dir in sorted(subjects_dir.iterdir()):
93
            subject_id = subject_dir.name
94
            if ignore_empty and subject_id in self.bad_subjects:
95
                continue
96
            try:
97
                int(subject_id)
98
            except ValueError:
99
                continue
100
            images_dict = {self.id_key: subject_dir.name}
101
            if train and labels_dict:
0 ignored issues
show
introduced by
The variable labels_dict does not seem to be defined for all execution paths.
Loading history...
102
                images_dict[self.label_key] = labels_dict[subject_id]
103
            for modality in self.modalities:
104
                image_dir = subject_dir / modality
105
                filepaths = list(image_dir.iterdir())
106
                num_files = len(filepaths)
107
                path = filepaths[0] if num_files == 1 else image_dir
108
                images_dict[modality] = ScalarImage(path)
109
            subject = Subject(images_dict)
110
            subjects.append(subject)
111
        return subjects
112