1 | import csv |
||
2 | from typing import List |
||
3 | from pathlib import Path |
||
4 | |||
5 | from ..typing import TypePath |
||
6 | from .. import SubjectsDataset, Subject, ScalarImage |
||
7 | |||
8 | |||
9 | class RSNAMICCAI(SubjectsDataset): |
||
10 | """RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge dataset. |
||
11 | |||
12 | This is a helper class for the dataset used in the |
||
13 | `RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge`_ hosted on |
||
14 | `kaggle <https://www.kaggle.com/>`_. The dataset must be downloaded before |
||
15 | instantiating this class (as oposed to, e.g., :class:`torchio.datasets.IXI`). |
||
16 | |||
17 | This `kaggle kernel <https://www.kaggle.com/fepegar/preprocessing-mri-with-torchio/>`_ |
||
18 | includes a usage example including preprocessing of all the scans. |
||
19 | |||
20 | If you reference or use the dataset in any form, include the following |
||
21 | citation: |
||
22 | |||
23 | U.Baid, et al., "The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor |
||
24 | Segmentation and Radiogenomic Classification", arXiv:2107.02314, 2021. |
||
25 | |||
26 | Args: |
||
27 | root_dir: Directory containing the dataset (``train`` directory, |
||
28 | ``test`` directory, etc.). |
||
29 | train: If ``True``, the ``train`` set will be used. Otherwise the |
||
30 | ``test`` set will be used. |
||
31 | ignore_empty: If ``True``, the three subjects flagged as "presenting |
||
32 | issues" (empty images) by the challenge organizers will be ignored. |
||
33 | The subject IDs are ``00109``, ``00123`` and ``00709``. |
||
34 | |||
35 | Example: |
||
36 | >>> import torchio as tio |
||
37 | >>> from subprocess import call |
||
38 | >>> call('kaggle competitions download -c rsna-miccai-brain-tumor-radiogenomic-classification'.split()) |
||
39 | >>> root_dir = 'rsna-miccai-brain-tumor-radiogenomic-classification' |
||
40 | >>> train_set = tio.datasets.RSNAMICCAI(root_dir, train=True) |
||
41 | >>> test_set = tio.datasets.RSNAMICCAI(root_dir, train=False) |
||
42 | >>> len(train_set), len(test_set) |
||
43 | (582, 87) |
||
44 | |||
45 | |||
46 | .. _RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification |
||
47 | """ # noqa: E501 |
||
48 | id_key = 'BraTS21ID' |
||
49 | label_key = 'MGMT_value' |
||
50 | modalities = 'T1w', 'T1wCE', 'T2w', 'FLAIR' |
||
51 | bad_subjects = '00109', '00123', '00709' |
||
52 | |||
53 | def __init__( |
||
54 | self, |
||
55 | root_dir: TypePath, |
||
56 | train: bool = True, |
||
57 | ignore_empty: bool = True, |
||
58 | **kwargs, |
||
59 | ): |
||
60 | self.root_dir = Path(root_dir).expanduser().resolve() |
||
61 | subjects = self._get_subjects(self.root_dir, train, ignore_empty) |
||
62 | super().__init__(subjects, **kwargs) |
||
63 | self.train = train |
||
64 | |||
65 | def _get_subjects( |
||
66 | self, |
||
67 | root_dir: Path, |
||
68 | train: bool, |
||
69 | ignore_empty: bool, |
||
70 | ) -> List[Subject]: |
||
71 | subjects = [] |
||
72 | if train: |
||
73 | csv_path = root_dir / 'train_labels.csv' |
||
74 | with open(csv_path) as csvfile: |
||
75 | reader = csv.DictReader(csvfile) |
||
76 | labels_dict = { |
||
77 | row[self.id_key]: int(row[self.label_key]) |
||
78 | for row in reader |
||
79 | } |
||
80 | subjects_dir = root_dir / 'train' |
||
81 | else: |
||
82 | subjects_dir = root_dir / 'test' |
||
83 | |||
84 | for subject_dir in sorted(subjects_dir.iterdir()): |
||
85 | subject_id = subject_dir.name |
||
86 | if ignore_empty and subject_id in self.bad_subjects: |
||
87 | continue |
||
88 | try: |
||
89 | int(subject_id) |
||
90 | except ValueError: |
||
91 | continue |
||
92 | images_dict = {self.id_key: subject_dir.name} |
||
93 | if train: |
||
94 | images_dict[self.label_key] = labels_dict[subject_id] |
||
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
95 | for modality in self.modalities: |
||
96 | image_dir = subject_dir / modality |
||
97 | filepaths = list(image_dir.iterdir()) |
||
98 | num_files = len(filepaths) |
||
99 | path = filepaths[0] if num_files == 1 else image_dir |
||
100 | images_dict[modality] = ScalarImage(path) |
||
101 | subject = Subject(images_dict) |
||
102 | subjects.append(subject) |
||
103 | return subjects |
||
104 |