torchio.datasets.episurg.EPISURG._glob_subjects() - Code Metrics - Inspection of "Add EPISURG dataset (#433)" - fepegar/torchio - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( e50ba8...58536b )

by Fernando

created 2021-01-29 17:48 UTC

torchio.datasets.episurg.EPISURG._glob_subjects() A

↳ Parent: torchio.datasets.episurg

Complexity

Conditions

Size

Total Lines	8
Code Lines	8

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	8
dl	0
loc	8
rs	10
c	0
b	0
f	0
cc	4
nop	2

import csv
from pathlib import Path
from typing import Optional

from ..typing import TypePath
from ..transforms import Transform
from ..download import download_and_extract_archive
from .. import SubjectsDataset, Subject, ScalarImage, LabelMap


class EPISURG(SubjectsDataset):
    """
    `EPISURG`_ is a clinical dataset of :math:`T_1`-weighted MRI from 430
    epileptic patients who underwent resective brain surgery at the National
    Hospital of Neurology and Neurosurgery (Queen Square, London, United
    Kingdom) between 1990 and 2018.

    The dataset comprises 430 postoperative MRI. The corresponding preoperative
    MRI is present for 268 subjects.

    Three human raters segmented the resection cavity on partially overlapping
    subsets of EPISURG.

    If you use this dataset for your research, you agree with the *Data use
    agreement* presented at the EPISURG entry on the `UCL Research Data
    Repository <EPISURG>`_ and you must cite the corresponding publications.

    .. _EPISURG: https://doi.org/10.5522/04/9996158.v1

    Args:
        root: Root directory to which the dataset will be downloaded.
        transform: An instance of
            :class:`~torchio.transforms.transform.Transform`.
        download: If set to ``True``, will download the data into :attr:`root`.

    .. warning:: The size of this dataset is multiple GB.
        If you set :attr:`download` to ``True``, it will take some time
        to be downloaded if it is not already present.
    """

    data_url = 'https://s3-eu-west-1.amazonaws.com/pstorage-ucl-2748466690/26153588/EPISURG.zip'  # noqa: E501
    md5 = '5ec5831a2c6fbfdc8489ba2910a6504b'

    def __init__(
            self,
            root: TypePath,
            transform: Optional[Transform] = None,
            download: bool = False,
            **kwargs,
            ):
        root = Path(root).expanduser().absolute()
        if download:
            self._download(root)
        subjects_list = self._get_subjects_list(root)
        self.kwargs = kwargs
        super().__init__(subjects_list, transform=transform, **kwargs)

    @staticmethod
    def _check_exists(root, modalities):
        for modality in modalities:
            modality_dir = root / modality
            if not modality_dir.is_dir():
                exists = False
                break
        else:
            exists = True
        return exists

    @staticmethod
    def _get_subjects_list(root):
        episurg_dir = root / 'EPISURG'
        subjects_dir = episurg_dir / 'subjects'
        csv_path = episurg_dir / 'subjects.csv'
        with open(csv_path) as csvfile:
            reader = csv.DictReader(csvfile)
            subjects = []
            for row in reader:
                subject_id = row['Subject']
                subject_dir = subjects_dir / subject_id
                subject_dict = {
                    'subject_id': subject_id,
                    'hemisphere': row['Hemisphere'],
                    'surgery_type': row['Type'],
                }
                preop_dir = subject_dir / 'preop'
                preop_paths = list(preop_dir.glob('*preop*'))
                assert len(preop_paths) <= 1
                if preop_paths:
                    subject_dict['preop_mri'] = ScalarImage(preop_paths[0])
                postop_dir = subject_dir / 'postop'
                postop_path = list(postop_dir.glob('*postop-t1mri*'))[0]
                subject_dict['postop_mri'] = ScalarImage(postop_path)
                for seg_path in postop_dir.glob('*seg*'):
                    seg_id = seg_path.name[-8]
                    subject_dict[f'seg_{seg_id}'] = LabelMap(seg_path)
                subjects.append(Subject(**subject_dict))
        return subjects

    def _download(self, root):
        """Download the EPISURG data if it does not exist already."""
        if (root / 'EPISURG').is_dir():
            return
        root.mkdir(exist_ok=True, parents=True)
        download_and_extract_archive(
            self.data_url,
            download_root=root,
            md5=self.md5,
        )
        (root / 'EPISURG.zip').unlink()  # cleanup

    def _glob_subjects(self, string):
        subjects = []
        for subject in self._subjects:
            for image_name in subject:
                if string in image_name:
                    subjects.append(subject)
                    break
        return subjects

    def _get_labeled_subjects(self):
        return self._glob_subjects('seg')

    def _get_paired_subjects(self):
        return self._glob_subjects('preop')

    def _get_subset(self, subjects):
        dataset = SubjectsDataset(
            subjects,
            transform=self._transform,
            **(self.kwargs),
        )
        return dataset

    def get_labeled(self) -> SubjectsDataset:
        """Get dataset from subjects with manual annotations."""
        return self._get_subset(self._get_labeled_subjects())

    def get_unlabeled(self) -> SubjectsDataset:
        """Get dataset from subjects without manual annotations."""
        subjects = [
            s for s in self._subjects
            if s not in self._get_labeled_subjects()
        ]
        return self._get_subset(subjects)

    def get_paired(self) -> SubjectsDataset:
        """Get dataset from subjects with pre- and post-op MRI."""
        return self._get_subset(self._get_paired_subjects())


1			import csv
2			from pathlib import Path
3			from typing import Optional
4
5			from ..typing import TypePath
6			from ..transforms import Transform
7			from ..download import download_and_extract_archive
8			from .. import SubjectsDataset, Subject, ScalarImage, LabelMap
9
10
11			class EPISURG(SubjectsDataset):
12			"""
13			`EPISURG`_ is a clinical dataset of :math:`T_1`-weighted MRI from 430
14			epileptic patients who underwent resective brain surgery at the National
15			Hospital of Neurology and Neurosurgery (Queen Square, London, United
16			Kingdom) between 1990 and 2018.
17
18			The dataset comprises 430 postoperative MRI. The corresponding preoperative
19			MRI is present for 268 subjects.
20
21			Three human raters segmented the resection cavity on partially overlapping
22			subsets of EPISURG.
23
24			If you use this dataset for your research, you agree with the *Data use
25			agreement* presented at the EPISURG entry on the `UCL Research Data
26			Repository <EPISURG>`_ and you must cite the corresponding publications.
27
28			.. _EPISURG: https://doi.org/10.5522/04/9996158.v1
29
30			Args:
31			root: Root directory to which the dataset will be downloaded.
32			transform: An instance of
33			:class:`~torchio.transforms.transform.Transform`.
34			download: If set to ``True``, will download the data into :attr:`root`.
35
36			.. warning:: The size of this dataset is multiple GB.
37			If you set :attr:`download` to ``True``, it will take some time
38			to be downloaded if it is not already present.
39			"""
40
41			data_url = 'https://s3-eu-west-1.amazonaws.com/pstorage-ucl-2748466690/26153588/EPISURG.zip' # noqa: E501
42			md5 = '5ec5831a2c6fbfdc8489ba2910a6504b'
43
44			def __init__(
45			self,
46			root: TypePath,
47			transform: Optional[Transform] = None,
48			download: bool = False,
49			**kwargs,
50			):
51			root = Path(root).expanduser().absolute()
52			if download:
53			self._download(root)
54			subjects_list = self._get_subjects_list(root)
55			self.kwargs = kwargs
56			super().__init__(subjects_list, transform=transform, **kwargs)
57
58			@staticmethod
59			def _check_exists(root, modalities):
60			for modality in modalities:
61			modality_dir = root / modality
62			if not modality_dir.is_dir():
63			exists = False
64			break
65			else:
66			exists = True
67			return exists
68
69			@staticmethod
70			def _get_subjects_list(root):
71			episurg_dir = root / 'EPISURG'
72			subjects_dir = episurg_dir / 'subjects'
73			csv_path = episurg_dir / 'subjects.csv'
74			with open(csv_path) as csvfile:
75			reader = csv.DictReader(csvfile)
76			subjects = []
77			for row in reader:
78			subject_id = row['Subject']
79			subject_dir = subjects_dir / subject_id
80			subject_dict = {
81			'subject_id': subject_id,
82			'hemisphere': row['Hemisphere'],
83			'surgery_type': row['Type'],
84			}
85			preop_dir = subject_dir / 'preop'
86			preop_paths = list(preop_dir.glob('preop'))
87			assert len(preop_paths) <= 1
88			if preop_paths:
89			subject_dict['preop_mri'] = ScalarImage(preop_paths[0])
90			postop_dir = subject_dir / 'postop'
91			postop_path = list(postop_dir.glob('postop-t1mri'))[0]
92			subject_dict['postop_mri'] = ScalarImage(postop_path)
93			for seg_path in postop_dir.glob('seg'):
94			seg_id = seg_path.name[-8]
95			subject_dict[f'seg_{seg_id}'] = LabelMap(seg_path)
96			subjects.append(Subject(**subject_dict))
97			return subjects
98
99			def _download(self, root):
100			"""Download the EPISURG data if it does not exist already."""
101			if (root / 'EPISURG').is_dir():
102			return
103			root.mkdir(exist_ok=True, parents=True)
104			download_and_extract_archive(
105			self.data_url,
106			download_root=root,
107			md5=self.md5,
108			)
109			(root / 'EPISURG.zip').unlink() # cleanup
110
111			def _glob_subjects(self, string):
112			subjects = []
113			for subject in self._subjects:
114			for image_name in subject:
115			if string in image_name:
116			subjects.append(subject)
117			break
118			return subjects
119
120			def _get_labeled_subjects(self):
121			return self._glob_subjects('seg')
122
123			def _get_paired_subjects(self):
124			return self._glob_subjects('preop')
125
126			def _get_subset(self, subjects):
127			dataset = SubjectsDataset(
128			subjects,
129			transform=self._transform,
130			**(self.kwargs),
131			)
132			return dataset
133
134			def get_labeled(self) -> SubjectsDataset:
135			"""Get dataset from subjects with manual annotations."""
136			return self._get_subset(self._get_labeled_subjects())
137
138			def get_unlabeled(self) -> SubjectsDataset:
139			"""Get dataset from subjects without manual annotations."""
140			subjects = [
141			s for s in self._subjects
142			if s not in self._get_labeled_subjects()
143			]
144			return self._get_subset(subjects)
145
146			def get_paired(self) -> SubjectsDataset:
147			"""Get dataset from subjects with pre- and post-op MRI."""
148			return self._get_subset(self._get_paired_subjects())
149

fepegar / torchio

Push — master ( e50ba8...58536b )

torchio.datasets.episurg.EPISURG._glob_subjects() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like