torchio.datasets.rsna_miccai.RSNAMICCAI._get_subjects() - Code Metrics - Inspection of "Add modalities kwarg" - fepegar/torchio - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( f66f2a...49e504 )

by Fernando

created 2021-10-11 15:42 UTC

RSNAMICCAI._get_subjects() D

↳ Parent: torchio.datasets.rsna_miccai

Complexity

Conditions

Size

Total Lines	43
Code Lines	39

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	39
dl	0
loc	43
rs	4.8
c	0
b	0
f	0
cc	12
nop	4

How to fix Complexity

import csv
import warnings
from pathlib import Path
from typing import List, Sequence

from ..typing import TypePath
from .. import SubjectsDataset, Subject, ScalarImage


class RSNAMICCAI(SubjectsDataset):
    """RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge dataset.

    This is a helper class for the dataset used in the
    `RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge`_ hosted on
    `kaggle <https://www.kaggle.com/>`_. The dataset must be downloaded before
    instantiating this class (as oposed to, e.g., :class:`torchio.datasets.IXI`).

    This `kaggle kernel <https://www.kaggle.com/fepegar/preprocessing-mri-with-torchio/>`_
    includes a usage example including preprocessing of all the scans.

    If you reference or use the dataset in any form, include the following
    citation:

    U.Baid, et al., "The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor
    Segmentation and Radiogenomic Classification", arXiv:2107.02314, 2021.

    Args:
        root_dir: Directory containing the dataset (``train`` directory,
            ``test`` directory, etc.).
        train: If ``True``, the ``train`` set will be used. Otherwise the
            ``test`` set will be used.
        ignore_empty: If ``True``, the three subjects flagged as "presenting
            issues" (empty images) by the challenge organizers will be ignored.
            The subject IDs are ``00109``, ``00123`` and ``00709``.

    Example:
        >>> import torchio as tio
        >>> from subprocess import call
        >>> call('kaggle competitions download -c rsna-miccai-brain-tumor-radiogenomic-classification'.split())
        >>> root_dir = 'rsna-miccai-brain-tumor-radiogenomic-classification'
        >>> train_set = tio.datasets.RSNAMICCAI(root_dir, train=True)
        >>> test_set = tio.datasets.RSNAMICCAI(root_dir, train=False)
        >>> len(train_set), len(test_set)
        (582, 87)


    .. _RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification
    """  # noqa: E501
    id_key = 'BraTS21ID'
    label_key = 'MGMT_value'
    bad_subjects = '00109', '00123', '00709'

    def __init__(
            self,
            root_dir: TypePath,
            train: bool = True,
            ignore_empty: bool = True,
            modalities: Sequence[str] = ('T1w', 'T1wCE', 'T2w', 'FLAIR'),
            **kwargs,
            ):
        self.root_dir = Path(root_dir).expanduser().resolve()
        if isinstance(modalities, str):
            modalities = [modalities]
        self.modalities = modalities
        subjects = self._get_subjects(self.root_dir, train, ignore_empty)
        super().__init__(subjects, **kwargs)
        self.train = train

    def _get_subjects(
            self,
            root_dir: Path,
            train: bool,
            ignore_empty: bool,
            ) -> List[Subject]:
        subjects = []
        if train:
            csv_path = root_dir / 'train_labels.csv'
            try:
                with open(csv_path) as csvfile:
                    reader = csv.DictReader(csvfile)
                    labels_dict = {
                        row[self.id_key]: int(row[self.label_key])
                        for row in reader
                    }
            except FileNotFoundError:
                warnings.warn('Labels CSV not found. Ignoring MGMT labels')
                labels_dict = {}
            subjects_dir = root_dir / 'train'
        else:
            subjects_dir = root_dir / 'test'

        for subject_dir in sorted(subjects_dir.iterdir()):
            subject_id = subject_dir.name
            if ignore_empty and subject_id in self.bad_subjects:
                continue
            try:
                int(subject_id)
            except ValueError:
                continue
            images_dict = {self.id_key: subject_dir.name}
            if train and labels_dict:

                images_dict[self.label_key] = labels_dict[subject_id]
            for modality in self.modalities:
                image_dir = subject_dir / modality
                filepaths = list(image_dir.iterdir())
                num_files = len(filepaths)
                path = filepaths[0] if num_files == 1 else image_dir
                images_dict[modality] = ScalarImage(path)
            subject = Subject(images_dict)
            subjects.append(subject)
        return subjects


1			import csv
2			import warnings
3			from pathlib import Path
4			from typing import List, Sequence
5
6			from ..typing import TypePath
7			from .. import SubjectsDataset, Subject, ScalarImage
8
9
10			class RSNAMICCAI(SubjectsDataset):
11			"""RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge dataset.
12
13			This is a helper class for the dataset used in the
14			`RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge`_ hosted on
15			`kaggle <https://www.kaggle.com/>`_. The dataset must be downloaded before
16			instantiating this class (as oposed to, e.g., :class:`torchio.datasets.IXI`).
17
18			This `kaggle kernel <https://www.kaggle.com/fepegar/preprocessing-mri-with-torchio/>`_
19			includes a usage example including preprocessing of all the scans.
20
21			If you reference or use the dataset in any form, include the following
22			citation:
23
24			U.Baid, et al., "The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor
25			Segmentation and Radiogenomic Classification", arXiv:2107.02314, 2021.
26
27			Args:
28			root_dir: Directory containing the dataset (``train`` directory,
29			``test`` directory, etc.).
30			train: If ``True``, the ``train`` set will be used. Otherwise the
31			``test`` set will be used.
32			ignore_empty: If ``True``, the three subjects flagged as "presenting
33			issues" (empty images) by the challenge organizers will be ignored.
34			The subject IDs are ``00109``, ``00123`` and ``00709``.
35
36			Example:
37			>>> import torchio as tio
38			>>> from subprocess import call
39			>>> call('kaggle competitions download -c rsna-miccai-brain-tumor-radiogenomic-classification'.split())
40			>>> root_dir = 'rsna-miccai-brain-tumor-radiogenomic-classification'
41			>>> train_set = tio.datasets.RSNAMICCAI(root_dir, train=True)
42			>>> test_set = tio.datasets.RSNAMICCAI(root_dir, train=False)
43			>>> len(train_set), len(test_set)
44			(582, 87)
45
46
47			.. _RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification
48			""" # noqa: E501
49			id_key = 'BraTS21ID'
50			label_key = 'MGMT_value'
51			bad_subjects = '00109', '00123', '00709'
52
53			def __init__(
54			self,
55			root_dir: TypePath,
56			train: bool = True,
57			ignore_empty: bool = True,
58			modalities: Sequence[str] = ('T1w', 'T1wCE', 'T2w', 'FLAIR'),
59			**kwargs,
60			):
61			self.root_dir = Path(root_dir).expanduser().resolve()
62			if isinstance(modalities, str):
63			modalities = [modalities]
64			self.modalities = modalities
65			subjects = self._get_subjects(self.root_dir, train, ignore_empty)
66			super().__init__(subjects, **kwargs)
67			self.train = train
68
69			def _get_subjects(
70			self,
71			root_dir: Path,
72			train: bool,
73			ignore_empty: bool,
74			) -> List[Subject]:
75			subjects = []
76			if train:
77			csv_path = root_dir / 'train_labels.csv'
78			try:
79			with open(csv_path) as csvfile:
80			reader = csv.DictReader(csvfile)
81			labels_dict = {
82			row[self.id_key]: int(row[self.label_key])
83			for row in reader
84			}
85			except FileNotFoundError:
86			warnings.warn('Labels CSV not found. Ignoring MGMT labels')
87			labels_dict = {}
88			subjects_dir = root_dir / 'train'
89			else:
90			subjects_dir = root_dir / 'test'
91
92			for subject_dir in sorted(subjects_dir.iterdir()):
93			subject_id = subject_dir.name
94			if ignore_empty and subject_id in self.bad_subjects:
95			continue
96			try:
97			int(subject_id)
98			except ValueError:
99			continue
100			images_dict = {self.id_key: subject_dir.name}
101			if train and labels_dict:
			0 ignored issues – show introduced 2021-09-05 02:02 UTC by Report Bug Copy Issue Report The variable `labels_dict` does not seem to be defined for all execution paths. Loading history...
102			images_dict[self.label_key] = labels_dict[subject_id]
103			for modality in self.modalities:
104			image_dir = subject_dir / modality
105			filepaths = list(image_dir.iterdir())
106			num_files = len(filepaths)
107			path = filepaths[0] if num_files == 1 else image_dir
108			images_dict[modality] = ScalarImage(path)
109			subject = Subject(images_dict)
110			subjects.append(subject)
111			return subjects
112

fepegar / torchio

Push — master ( f66f2a...49e504 )

RSNAMICCAI._get_subjects() D

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like