Completed
Push — main ( 0c57ec...f6b5bf )
by Yunguan
18s queued 13s
created

paired_mrus_brain.demo_data   A

Complexity

Total Complexity 0

Size/Duplication

Total Lines 241
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 0
eloc 183
dl 0
loc 241
rs 10
c 0
b 0
f 0
1
import logging
2
import os
3
import shutil
4
import zipfile
5
6
import nibabel as nib
7
import numpy as np
8
from tensorflow.keras.utils import get_file
9
from tqdm import tqdm
10
11
DOWNLOAD_FULL_DATA = False
12
DATA_PATH = "dataset"
13
main_path = os.getcwd()
14
15
project_dir = os.path.join(main_path, r"demos/paired_mrus_brain")
16
os.chdir(project_dir)
17
18
######## PARTIAL PREPROCESSED DATA DOWNLOAD (COMMENT OUT) ########
19
# Please comment out this code block if full data needs to be used
20
url = "https://github.com/ucl-candi/dataset_resect/archive/master.zip"
21
fname = "dataset.zip"
22
get_file(os.path.join(os.getcwd(), fname), url)
23
24
# unzip to a temporary folder
25
tmp_folder = "dataset_tmp"
26
with zipfile.ZipFile(fname, "r") as zip_ref:
27
    zip_ref.extractall(tmp_folder)
28
29
if os.path.exists(DATA_PATH):
30
    shutil.rmtree(DATA_PATH)
31
os.mkdir(DATA_PATH)
32
33
# move needed data
34
shutil.move(
35
    os.path.join(tmp_folder, "dataset_resect-master", "paired_mr_us_brain", "test"),
36
    os.path.join("dataset", "test"),
37
)
38
shutil.move(
39
    os.path.join(tmp_folder, "dataset_resect-master", "paired_mr_us_brain", "train"),
40
    os.path.join("dataset", "train"),
41
)
42
43
# remove temporary folder
44
os.remove(fname)
45
shutil.rmtree(tmp_folder)
46
47
######## DOWNLOAD MODEL CKPT FROM MODEL ZOO ########
48
url = "https://github.com/DeepRegNet/deepreg-model-zoo/raw/master/paired_mrus_brain_demo_logs.zip"
49
fname = "pretrained.zip"
50
get_file(os.path.join(os.getcwd(), fname), url)
51
52
with zipfile.ZipFile(fname, "r") as zip_ref:
53
    zip_ref.extractall(os.path.join("dataset", "pretrained"))
54
55
# remove pretrained.zip
56
os.remove(fname)
57
58
# download full data
59
if not DOWNLOAD_FULL_DATA:
60
    exit()
61
logging.warning("Code for downloading full data is not tested.")
62
63
if os.path.exists("dataset_resect") is not True:
64
    os.mkdir("dataset_resect")
65
    os.mkdir(r"dataset_resect/paired_mr_us_brain")
66
url = "https://ns9999k.webs.sigma2.no/10.11582_2020.00025/EASY-RESECT.zip"
67
fname = "EASY-RESECT.zip"
68
path_to_zip_file = "dataset_resect"
69
get_file(os.path.join(os.getcwd(), path_to_zip_file, fname), url)
70
with zipfile.ZipFile(os.path.join(path_to_zip_file, fname), "r") as zip_ref:
71
    zip_ref.extractall(os.path.join(path_to_zip_file, "paired_mr_us_brain"))
72
path_to_nifti = os.path.join(
73
    path_to_zip_file, "paired_mr_us_brain", "EASY-RESECT", "NIFTI"
74
)
75
all_folders = os.listdir(path_to_nifti)
76
for folder in all_folders:
77
    source = os.path.join(path_to_nifti, folder)
78
    destination = "dataset_resect/paired_mr_us_brain"
79
    shutil.move(source, destination)
80
print("Files restructured!")
81
test_ratio = 0.25
82
path_to_data = "dataset_resect/paired_mr_us_brain"
83
cases_list = os.listdir(path_to_data)
84
os.mkdir(os.path.join(path_to_data, "test"))
85
os.mkdir(os.path.join(path_to_data, "train"))
86
num_test = round(len(cases_list) * test_ratio)
87
for folder in cases_list[:num_test]:
88
    source = os.path.join(path_to_data, folder)
89
    destination = os.path.join(path_to_data, "test")
90
    shutil.move(source, destination)
91
for folder in cases_list[num_test:]:
92
    source = os.path.join(path_to_data, folder)
93
    destination = os.path.join(path_to_data, "train")
94
    shutil.move(source, destination)
95
folders = os.listdir(path_to_data)
96
for folder in folders:
97
    sub_folders = os.listdir(os.path.join(path_to_data, folder))
98
    for sub_folder in tqdm(sub_folders):
99
        if "DS_St" in sub_folder:
100
            os.remove(os.path.join(path_to_data, folder, sub_folder))
101
        else:
102
            files = os.listdir(os.path.join(path_to_data, folder, sub_folder))
103
            for file in files:
104
                if "T1" in file:
105
                    arr = nib.load(
106
                        os.path.join(path_to_data, folder, sub_folder, file)
107
                    ).get_data()
108
                    img = nib.Nifti1Image(arr, affine=np.eye(4))
109
                    img.to_filename(
110
                        os.path.join(
111
                            path_to_data,
112
                            folder,
113
                            sub_folder,
114
                            file.split(".nii")[0] + "_resized.nii.gz",
115
                        )
116
                    )
117
                elif "US" in file:
118
                    img = nib.load(os.path.join(path_to_data, folder, sub_folder, file))
119
                    nib.save(
120
                        img,
121
                        os.path.join(
122
                            path_to_data,
123
                            folder,
124
                            sub_folder,
125
                            file.split(".ni")[0] + ".nii.gz",
126
                        ),
127
                    )
128
shutil.rmtree(r"dataset_resect/paired_mr_us_brain/train/EASY-RESECT")
129
shutil.rmtree(r"dataset_resect/paired_mr_us_brain/train/__MACOSX")
130
131
# Preprocess the downloaded data
132
if os.path.exists("dataset_resect/README.md"):
133
    os.remove("dataset_resect/README.md")
134
135
data_folder = "dataset_resect/paired_mr_us_brain"
136
folders = os.listdir(os.path.join(project_dir, data_folder))
137
138
# Move files into correct directories
139
for folder in folders:
140
    sub_folders = os.listdir(os.path.join(project_dir, data_folder, folder))
141
    if (
142
        os.path.exists(os.path.join(project_dir, data_folder, folder, "fixed_images"))
143
        is not True
144
    ):
145
        os.mkdir(os.path.join(project_dir, data_folder, folder, "fixed_images"))
146
        os.mkdir(os.path.join(project_dir, data_folder, folder, "moving_images"))
147
    for sub_folder in sub_folders:
148
        files = os.listdir(os.path.join(project_dir, data_folder, folder, sub_folder))
149
        for file in files:
150
            if "T1" in file:
151
                source = os.path.join(
152
                    project_dir, data_folder, folder, sub_folder, file
153
                )
154
                destination = os.path.join(
155
                    project_dir, data_folder, folder, "fixed_images", file
156
                )
157
                shutil.move(source, destination)
158
            elif "US" in file:
159
                source = os.path.join(
160
                    project_dir, data_folder, folder, sub_folder, file
161
                )
162
                destination = os.path.join(
163
                    project_dir, data_folder, folder, "moving_images", file
164
                )
165
                shutil.move(source, destination)
166
167
print("Files moved into correct directories")
168
169
# Remove unused files
170
for folder in folders:
171
    sub_folders = os.listdir(os.path.join(project_dir, data_folder, folder))
172
    for sub_folder in sub_folders:
173
        if "Case" in sub_folder:
174
            shutil.rmtree(os.path.join(project_dir, data_folder, folder, sub_folder))
175
print("Unused files removed")
176
177
# Rename files to match names
178
for folder in folders:
179
    sub_folders = os.listdir(os.path.join(project_dir, data_folder, folder))
180
    for sub_folder in sub_folders:
181
        files = os.listdir(os.path.join(project_dir, data_folder, folder, sub_folder))
182
        for file in files:
183
            source = os.path.join(project_dir, data_folder, folder, sub_folder, file)
184
            destination = os.path.join(
185
                project_dir,
186
                data_folder,
187
                folder,
188
                sub_folder,
189
                file.split("-")[0] + ".nii.gz",
190
            )
191
            im = nib.load(source)
192
            nib.save(im, destination)
193
            os.remove(source)
194
print("files renamed to match each other")
195
196
# Rescale images
197
c = 0
198
for folder in folders:
199
    sub_folders = os.listdir(os.path.join(project_dir, data_folder, folder))
200
    for sub_folder in sub_folders:
201
        files = os.listdir(os.path.join(project_dir, data_folder, folder, sub_folder))
202
        for file in files:
203
            try:
204
                if "fixed" in sub_folder:
205
                    im_data = np.asarray(
206
                        nib.load(
207
                            os.path.join(
208
                                project_dir, data_folder, folder, sub_folder, file
209
                            )
210
                        ).dataobj,
211
                        dtype=np.float32,
212
                    )
213
214
                    im_data = ((im_data + 150) / (1700 + 150)) * 255.0  # rescale image
215
216
                    img = nib.Nifti1Image(im_data, affine=None)
217
                    nib.save(
218
                        img,
219
                        os.path.join(
220
                            project_dir, data_folder, folder, sub_folder, file
221
                        ),
222
                    )
223
                    img = nib.load(
224
                        os.path.join(
225
                            project_dir, data_folder, folder, "moving_images", file
226
                        )
227
                    )
228
            except nib.filebasedimages.ImageFileError:
229
                os.remove(
230
                    os.path.join(project_dir, data_folder, folder, "fixed_images", file)
231
                )
232
                os.remove(
233
                    os.path.join(
234
                        project_dir, data_folder, folder, "moving_images", file
235
                    )
236
                )
237
                c = c + 1
238
print("Images rescaled")
239
print("All done!")
240
print("Number of files removed due to not loading properly:", c)
241