Passed
Push — master ( 5bce14...6d23a1 )
by Fernando
01:23
created

torchio/datasets/rsna_miccai.py (1 issue)

1
import csv
2
from typing import List
3
from pathlib import Path
4
5
from ..typing import TypePath
6
from .. import SubjectsDataset, Subject, ScalarImage
7
8
9
class RSNAMICCAI(SubjectsDataset):
10
    """RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge dataset.
11
12
    This is a helper class for the dataset used in the
13
    `RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge`_ hosted on
14
    `kaggle <https://www.kaggle.com/>`_. The dataset must be downloaded before
15
    instantiating this class (as oposed to, e.g., :class:`torchio.datasets.IXI`).
16
17
    This `kaggle kernel <https://www.kaggle.com/fepegar/preprocessing-mri-with-torchio/>`_
18
    includes a usage example including preprocessing of all the scans.
19
20
    If you reference or use the dataset in any form, include the following
21
    citation:
22
23
    U.Baid, et al., "The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor
24
    Segmentation and Radiogenomic Classification", arXiv:2107.02314, 2021.
25
26
    Args:
27
        root_dir: Directory containing the dataset (``train`` directory,
28
            ``test`` directory, etc.).
29
        train: If ``True``, the ``train`` set will be used. Otherwise the
30
            ``test`` set will be used.
31
        ignore_empty: If ``True``, the three subjects flagged as "presenting
32
            issues" (empty images) by the challenge organizers will be ignored.
33
            The subject IDs are ``00109``, ``00123`` and ``00709``.
34
35
    Example:
36
        >>> import torchio as tio
37
        >>> from subprocess import call
38
        >>> call('kaggle competitions download -c rsna-miccai-brain-tumor-radiogenomic-classification'.split())
39
        >>> root_dir = 'rsna-miccai-brain-tumor-radiogenomic-classification'
40
        >>> train_set = tio.datasets.RSNAMICCAI(root_dir, train=True)
41
        >>> test_set = tio.datasets.RSNAMICCAI(root_dir, train=False)
42
        >>> len(train_set), len(test_set)
43
        (582, 87)
44
45
46
    .. _RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification
47
    """  # noqa: E501
48
    id_key = 'BraTS21ID'
49
    label_key = 'MGMT_value'
50
    modalities = 'T1w', 'T1wCE', 'T2w', 'FLAIR'
51
    bad_subjects = '00109', '00123', '00709'
52
53
    def __init__(
54
            self,
55
            root_dir: TypePath,
56
            train: bool = True,
57
            ignore_empty: bool = True,
58
            **kwargs,
59
            ):
60
        self.root_dir = Path(root_dir).expanduser().resolve()
61
        subjects = self._get_subjects(self.root_dir, train, ignore_empty)
62
        super().__init__(subjects, **kwargs)
63
        self.train = train
64
65
    def _get_subjects(
66
            self,
67
            root_dir: Path,
68
            train: bool,
69
            ignore_empty: bool,
70
            ) -> List[Subject]:
71
        subjects = []
72
        if train:
73
            csv_path = root_dir / 'train_labels.csv'
74
            with open(csv_path) as csvfile:
75
                reader = csv.DictReader(csvfile)
76
                labels_dict = {
77
                    row[self.id_key]: int(row[self.label_key])
78
                    for row in reader
79
                }
80
            subjects_dir = root_dir / 'train'
81
        else:
82
            subjects_dir = root_dir / 'test'
83
84
        for subject_dir in sorted(subjects_dir.iterdir()):
85
            subject_id = subject_dir.name
86
            if ignore_empty and subject_id in self.bad_subjects:
87
                continue
88
            try:
89
                int(subject_id)
90
            except ValueError:
91
                continue
92
            images_dict = {self.id_key: subject_dir.name}
93
            if train:
94
                images_dict[self.label_key] = labels_dict[subject_id]
0 ignored issues
show
The variable labels_dict does not seem to be defined for all execution paths.
Loading history...
95
            for modality in self.modalities:
96
                image_dir = subject_dir / modality
97
                filepaths = list(image_dir.iterdir())
98
                num_files = len(filepaths)
99
                path = filepaths[0] if num_files == 1 else image_dir
100
                images_dict[modality] = ScalarImage(path)
101
            subject = Subject(images_dict)
102
            subjects.append(subject)
103
        return subjects
104