torchio.datasets.episurg.EPISURG.get_paired() - Code Metrics - Inspection of "Add EPISURG dataset" - fepegar/torchio - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#433)

by Fernando

created 2021-01-28 16:15 UTC

torchio.datasets.episurg.EPISURG.get_paired() A

↳ Parent: torchio.datasets.episurg

Complexity

Conditions

Size

Total Lines	3
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	2
dl	0
loc	3
rs	10
c	0
b	0
f	0
cc	1
nop	1

from pathlib import Path
from typing import Optional

from ..typing import TypePath
from ..transforms import Transform
from ..download import download_and_extract_archive
from .. import SubjectsDataset, Subject, ScalarImage, LabelMap


class EPISURG(SubjectsDataset):
    """
    `EPISURG`_ is a clinical dataset of :math:`T_1`-weighted MRI from 430
    epileptic patients who underwent resective brain surgery at the National
    Hospital of Neurology and Neurosurgery (Queen Square, London, United
    Kingdom) between 1990 and 2018.

    The dataset comprises 430 postoperative MRI. The corresponding preoperative
    MRI is present for 268 subjects.

    Three human raters segmented the resection cavity on partially overlapping
    subsets of EPISURG.

    If you use this dataset for your research, you agree with the *Data use
    agreement* presented at the EPISURG entry on the `UCL Research Data
    Repository <EPISURG>`_ and you must cite the corresponding publications.

    .. _EPISURG: https://doi.org/10.5522/04/9996158.v1

    Args:
        root: Root directory to which the dataset will be downloaded.
        transform: An instance of
            :class:`~torchio.transforms.transform.Transform`.
        download: If set to ``True``, will download the data into :attr:`root`.

    .. warning:: The size of this dataset is multiple GB.
        If you set :attr:`download` to ``True``, it will take some time
        to be downloaded if it is not already present.
    """

    data_url = 'https://s3-eu-west-1.amazonaws.com/pstorage-ucl-2748466690/26153588/EPISURG.zip'  # noqa: E501
    md5 = '5ec5831a2c6fbfdc8489ba2910a6504b'

    def __init__(
            self,
            root: TypePath,
            transform: Optional[Transform] = None,
            download: bool = False,
            **kwargs,
            ):
        root = Path(root).expanduser().absolute()
        if download:
            self._download(root)
        subjects_list = self._get_subjects_list(root)
        self.kwargs = kwargs
        super().__init__(subjects_list, transform=transform, **kwargs)

    @staticmethod
    def _check_exists(root, modalities):
        for modality in modalities:
            modality_dir = root / modality
            if not modality_dir.is_dir():
                exists = False
                break
        else:
            exists = True
        return exists

    @staticmethod
    def _get_subjects_list(root):
        subjects_dir = root / 'EPISURG' / 'subjects'
        subjects = []
        for subject_dir in sorted(subjects_dir.glob('sub-*')):
            subject_id = subject_dir.name[-4:]
            images_dict = {'subject_id': subject_id}
            preop_dir = subject_dir / 'preop'
            preop_paths = list(preop_dir.glob('*preop*'))
            assert len(preop_paths) <= 1
            if preop_paths:
                images_dict['preop_mri'] = ScalarImage(preop_paths[0])
            postop_dir = subject_dir / 'postop'
            postop_path = list(postop_dir.glob('*postop-t1mri*'))[0]
            images_dict['postop_mri'] = ScalarImage(postop_path)
            for seg_path in postop_dir.glob('*seg*'):
                seg_id = seg_path.name[-8]
                images_dict[f'seg_{seg_id}'] = LabelMap(seg_path)
            subjects.append(Subject(**images_dict))
        return subjects

    def _download(self, root):
        """Download the EPISURG data if it does not exist already."""
        if (root / 'EPISURG').is_dir():
            return
        root.mkdir(exist_ok=True, parents=True)
        download_and_extract_archive(
            self.data_url,
            download_root=root,
            md5=self.md5,
        )
        (root / 'EPISURG.zip').unlink()  # cleanup

    def _glob_subjects(self, string):
        subjects = []
        for subject in self._subjects:
            for image_name in subject:
                if string in image_name:
                    subjects.append(subject)
                    break
        return subjects

    def _get_labeled_subjects(self):
        return self._glob_subjects('seg')

    def _get_paired_subjects(self):
        return self._glob_subjects('preop')

    def _get_subset(self, subjects):
        dataset = SubjectsDataset(
            subjects,
            transform=self._transform,
            **(self.kwargs),
        )
        return dataset

    def get_labeled(self) -> SubjectsDataset:
        """Get dataset from subjects with manual annotations."""
        return self._get_subset(self._get_labeled_subjects())

    def get_unlabeled(self) -> SubjectsDataset:
        """Get dataset from subjects without manual annotations."""
        subjects = [
            s for s in self._subjects
            if s not in self._get_labeled_subjects()
        ]
        return self._get_subset(subjects)

    def get_paired(self) -> SubjectsDataset:
        """Get dataset from subjects with pre- and post-op MRI."""
        return self._get_subset(self._get_paired_subjects())


1			from pathlib import Path
2			from typing import Optional
3
4			from ..typing import TypePath
5			from ..transforms import Transform
6			from ..download import download_and_extract_archive
7			from .. import SubjectsDataset, Subject, ScalarImage, LabelMap
8
9
10			class EPISURG(SubjectsDataset):
11			"""
12			`EPISURG`_ is a clinical dataset of :math:`T_1`-weighted MRI from 430
13			epileptic patients who underwent resective brain surgery at the National
14			Hospital of Neurology and Neurosurgery (Queen Square, London, United
15			Kingdom) between 1990 and 2018.
16
17			The dataset comprises 430 postoperative MRI. The corresponding preoperative
18			MRI is present for 268 subjects.
19
20			Three human raters segmented the resection cavity on partially overlapping
21			subsets of EPISURG.
22
23			If you use this dataset for your research, you agree with the *Data use
24			agreement* presented at the EPISURG entry on the `UCL Research Data
25			Repository <EPISURG>`_ and you must cite the corresponding publications.
26
27			.. _EPISURG: https://doi.org/10.5522/04/9996158.v1
28
29			Args:
30			root: Root directory to which the dataset will be downloaded.
31			transform: An instance of
32			:class:`~torchio.transforms.transform.Transform`.
33			download: If set to ``True``, will download the data into :attr:`root`.
34
35			.. warning:: The size of this dataset is multiple GB.
36			If you set :attr:`download` to ``True``, it will take some time
37			to be downloaded if it is not already present.
38			"""
39
40			data_url = 'https://s3-eu-west-1.amazonaws.com/pstorage-ucl-2748466690/26153588/EPISURG.zip' # noqa: E501
41			md5 = '5ec5831a2c6fbfdc8489ba2910a6504b'
42
43			def __init__(
44			self,
45			root: TypePath,
46			transform: Optional[Transform] = None,
47			download: bool = False,
48			**kwargs,
49			):
50			root = Path(root).expanduser().absolute()
51			if download:
52			self._download(root)
53			subjects_list = self._get_subjects_list(root)
54			self.kwargs = kwargs
55			super().__init__(subjects_list, transform=transform, **kwargs)
56
57			@staticmethod
58			def _check_exists(root, modalities):
59			for modality in modalities:
60			modality_dir = root / modality
61			if not modality_dir.is_dir():
62			exists = False
63			break
64			else:
65			exists = True
66			return exists
67
68			@staticmethod
69			def _get_subjects_list(root):
70			subjects_dir = root / 'EPISURG' / 'subjects'
71			subjects = []
72			for subject_dir in sorted(subjects_dir.glob('sub-*')):
73			subject_id = subject_dir.name[-4:]
74			images_dict = {'subject_id': subject_id}
75			preop_dir = subject_dir / 'preop'
76			preop_paths = list(preop_dir.glob('preop'))
77			assert len(preop_paths) <= 1
78			if preop_paths:
79			images_dict['preop_mri'] = ScalarImage(preop_paths[0])
80			postop_dir = subject_dir / 'postop'
81			postop_path = list(postop_dir.glob('postop-t1mri'))[0]
82			images_dict['postop_mri'] = ScalarImage(postop_path)
83			for seg_path in postop_dir.glob('seg'):
84			seg_id = seg_path.name[-8]
85			images_dict[f'seg_{seg_id}'] = LabelMap(seg_path)
86			subjects.append(Subject(**images_dict))
87			return subjects
88
89			def _download(self, root):
90			"""Download the EPISURG data if it does not exist already."""
91			if (root / 'EPISURG').is_dir():
92			return
93			root.mkdir(exist_ok=True, parents=True)
94			download_and_extract_archive(
95			self.data_url,
96			download_root=root,
97			md5=self.md5,
98			)
99			(root / 'EPISURG.zip').unlink() # cleanup
100
101			def _glob_subjects(self, string):
102			subjects = []
103			for subject in self._subjects:
104			for image_name in subject:
105			if string in image_name:
106			subjects.append(subject)
107			break
108			return subjects
109
110			def _get_labeled_subjects(self):
111			return self._glob_subjects('seg')
112
113			def _get_paired_subjects(self):
114			return self._glob_subjects('preop')
115
116			def _get_subset(self, subjects):
117			dataset = SubjectsDataset(
118			subjects,
119			transform=self._transform,
120			**(self.kwargs),
121			)
122			return dataset
123
124			def get_labeled(self) -> SubjectsDataset:
125			"""Get dataset from subjects with manual annotations."""
126			return self._get_subset(self._get_labeled_subjects())
127
128			def get_unlabeled(self) -> SubjectsDataset:
129			"""Get dataset from subjects without manual annotations."""
130			subjects = [
131			s for s in self._subjects
132			if s not in self._get_labeled_subjects()
133			]
134			return self._get_subset(subjects)
135
136			def get_paired(self) -> SubjectsDataset:
137			"""Get dataset from subjects with pre- and post-op MRI."""
138			return self._get_subset(self._get_paired_subjects())
139

fepegar / torchio

Pull Request — master (#433)

torchio.datasets.episurg.EPISURG.get_paired() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like