Completed
Push — main ( 0c57ec...f6b5bf )
by Yunguan
18s queued 13s
created

unpaired_ct_lung.demo_data   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 276
Duplicated Lines 28.99 %

Importance

Changes 0
Metric Value
wmc 9
eloc 178
dl 80
loc 276
rs 10
c 0
b 0
f 0

2 Functions

Rating   Name   Duplication   Size   Complexity  
A move_files_into_correct_path() 13 13 4
A move_test_cases_into_correct_path() 12 12 5

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import os
2
import random
3
import shutil
4
import zipfile
5
6
import nibabel as nib
7
import numpy as np
8
from tensorflow.keras.utils import get_file
9
from tqdm import tqdm
10
11
# if wget is installed remove the following line from comment
12
# import wget
13
14
# if already in the abc/DeepReg directory then do nothing, otherwise
15
# use os.chdir(r'abc/DeepReg') before this line
16
main_path = os.getcwd()
17
os.chdir(main_path)
18
19
######## DOWNLOADING AND UNZIPPING ALL FILES INTO CORRECT PATH ########
20
21
project_dir = "demos/unpaired_ct_lung"
22
os.chdir(project_dir)
23
24
url = "https://zenodo.org/record/3835682/files/training.zip"
25
26
# if wget is installed remove following line from comments and comment
27
# out the fname = 'training.zip' line
28
# fname = wget.download(url)
29
fname = "training.zip"
30
31
# if training.zip is already downloaded in the correct directory then
32
# comment out the following line
33
# os.system("wget " + url)
34
35
36
get_file(os.path.join(os.getcwd(), fname), url)
37
38
print("The file ", fname, " has successfully been downloaded!")
39
40
data_folder_name = "dataset"
41
path_to_data_folder = os.path.join(main_path, project_dir, data_folder_name)
42
if os.path.exists(path_to_data_folder):
43
    shutil.rmtree(path_to_data_folder)
44
os.mkdir(path_to_data_folder)
45
46
with zipfile.ZipFile(fname, "r") as zip_ref:
47
    zip_ref.extractall(data_folder_name)
48
49
print("Files unzipped!")
50
51
os.remove(fname)
52
os.chdir(main_path)
53
54
######## MOVING FILES INTO TRAIN DIRECTORY ########
55
56
path_to_train = os.path.join(main_path, project_dir, data_folder_name, "train")
57
path_to_test = os.path.join(main_path, project_dir, data_folder_name, "test")
58
path_to_images_and_labels = os.path.join(
59
    main_path, project_dir, data_folder_name, "training"
60
)
61
62
labels_fnames = os.listdir(os.path.join(path_to_images_and_labels, "lungMasks"))
63
images_fnames = os.listdir(os.path.join(path_to_images_and_labels, "scans"))
64
65
if os.path.exists(path_to_train) is not True:
66
    os.mkdir(path_to_train)
67
    os.mkdir(os.path.join(path_to_train, "fixed_images"))
68
    os.mkdir(os.path.join(path_to_train, "fixed_labels"))
69
    os.mkdir(os.path.join(path_to_train, "moving_images"))
70
    os.mkdir(os.path.join(path_to_train, "moving_labels"))
71
72
73 View Code Duplication
def move_files_into_correct_path(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
74
    fnames, path_to_images_and_labels, new_path, suffix, sub_folder_name
75
):
76
    os.chdir(os.path.join(path_to_images_and_labels, sub_folder_name))
77
    for file in fnames:
78
        if "insp" in file:
79
            source = file
80
            destination = os.path.join(path_to_train, "fixed_" + suffix)
81
            shutil.move(source, destination)
82
        if "exp" in file:
83
            source = file
84
            destination = os.path.join(path_to_train, "moving_" + suffix)
85
            shutil.move(source, destination)
86
87
88
if os.path.exists(path_to_images_and_labels):
89
    move_files_into_correct_path(
90
        images_fnames, path_to_images_and_labels, path_to_train, "images", "scans"
91
    )
92
    move_files_into_correct_path(
93
        labels_fnames, path_to_images_and_labels, path_to_train, "labels", "lungMasks"
94
    )
95
96
os.chdir(main_path)
97
98
######## MOVING FILES INTO TEST AND VALID DIRECTORY ########
99
100
path_to_test = os.path.join(path_to_data_folder, "test")
101
path_to_valid = os.path.join(path_to_data_folder, "valid")
102
103 View Code Duplication
if os.path.exists(path_to_test) is not True:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
104
105
    os.mkdir(path_to_test)
106
    os.mkdir(os.path.join(path_to_test, "fixed_images"))
107
    os.mkdir(os.path.join(path_to_test, "fixed_labels"))
108
    os.mkdir(os.path.join(path_to_test, "moving_images"))
109
    os.mkdir(os.path.join(path_to_test, "moving_labels"))
110
111
    ratio_of_test_and_valid_samples = 0.4
112
113
    unique_case_names = []
114
    for file in images_fnames:
115
        case_name_as_list = file.split("_")[0:2]
116
        case_name = case_name_as_list[0] + "_" + case_name_as_list[1]
117
        unique_case_names.append(case_name)
118
    unique_case_names = np.unique(unique_case_names)
119
120
    test_and_valid_cases = random.sample(
121
        list(unique_case_names),
122
        int(ratio_of_test_and_valid_samples * len(unique_case_names)),
123
    )
124
    test_cases = test_and_valid_cases[
125
        0 : int(int(ratio_of_test_and_valid_samples * len(unique_case_names) / 2))
126
    ]
127
    valid_cases = test_and_valid_cases[
128
        int(int(ratio_of_test_and_valid_samples * len(unique_case_names) / 2)) + 1 :
129
    ]
130
131
    def move_test_cases_into_correct_path(test_cases, path_to_train, path_to_test):
132
        folder_names = os.listdir(path_to_train)
133
        os.chdir(path_to_train)
134
        for case in test_cases:
135
            for folder in folder_names:
136
                file_names = os.listdir(os.path.join(path_to_train, folder))
137
                for file in file_names:
138
                    if case in file:
139
                        os.chdir(os.path.join(path_to_train, folder))
140
                        source = file
141
                        destination = os.path.join(path_to_test, folder)
142
                        shutil.move(source, destination)
143
144
    move_test_cases_into_correct_path(test_cases, path_to_train, path_to_test)
145
146
    os.mkdir(path_to_valid)
147
    os.mkdir(os.path.join(path_to_valid, "fixed_images"))
148
    os.mkdir(os.path.join(path_to_valid, "fixed_labels"))
149
    os.mkdir(os.path.join(path_to_valid, "moving_images"))
150
    os.mkdir(os.path.join(path_to_valid, "moving_labels"))
151
152
    move_test_cases_into_correct_path(valid_cases, path_to_train, path_to_valid)
153
154
######## NAMING FILES SUCH THAT THEIR NAMES MATCH FOR PAIRING ########
155
156
# name all files such that names match exactly for training
157
158
for folder in os.listdir(path_to_train):
159
    path_to_folder = os.path.join(path_to_train, folder)
160
    os.chdir(path_to_folder)
161
    for file in os.listdir(path_to_folder):
162
        if "_insp" in file:
163
            new_name = file.replace("_insp", "")
164
        elif "_exp" in file:
165
            new_name = file.replace("_exp", "")
166
        else:
167
            continue
168
        source = file
169
        destination = new_name
170
        os.rename(source, destination)
171
172
# name all files such that names match exactly for testing
173
174
for folder in os.listdir(path_to_test):
175
    path_to_folder = os.path.join(path_to_test, folder)
176
    os.chdir(path_to_folder)
177
    for file in os.listdir(path_to_folder):
178
        if "_insp" in file:
179
            new_name = file.replace("_insp", "")
180
        elif "_exp" in file:
181
            new_name = file.replace("_exp", "")
182
        else:
183
            continue
184
        source = file
185
        destination = new_name
186
        os.rename(source, destination)
187
188
# name all files such that names match exactly for validation
189
190
for folder in os.listdir(path_to_valid):
191
    path_to_folder = os.path.join(path_to_valid, folder)
192
    os.chdir(path_to_folder)
193
    for file in os.listdir(path_to_folder):
194
        if "_insp" in file:
195
            new_name = file.replace("_insp", "")
196
        elif "_exp" in file:
197
            new_name = file.replace("_exp", "")
198
        else:
199
            continue
200
        source = file
201
        destination = new_name
202
        os.rename(source, destination)
203
204
shutil.rmtree(os.path.join(path_to_images_and_labels))
205
os.chdir(main_path)
206
207
######## FOR UNPAIRED WE USE IMAMGES FROM ONE TIMEPOINT ONLY ########
208
209
# so now remove fixed_images and fixed_labels
210
# and rename moving_images to images
211
# and moving_labels to labels
212
213
folders = os.listdir(os.path.join(project_dir, data_folder_name))
214
215
for folder in folders:
216
    shutil.rmtree(os.path.join(project_dir, data_folder_name, folder, "fixed_images"))
217
    shutil.rmtree(os.path.join(project_dir, data_folder_name, folder, "fixed_labels"))
218
    os.rename(
219
        os.path.join(project_dir, data_folder_name, folder, "moving_images"),
220
        os.path.join(project_dir, data_folder_name, folder, "images"),
221
    )
222
    os.rename(
223
        os.path.join(project_dir, data_folder_name, folder, "moving_labels"),
224
        os.path.join(project_dir, data_folder_name, folder, "labels"),
225
    )
226
227
print("All files moved and restructured")
228
229
os.chdir(main_path)
230
231
######## NOW WE RESACLE THE IMAGES TO 255 ########
232
233
data_dir = "demos/unpaired_ct_lung/dataset"
234
folders = os.listdir(data_dir)
235
236
for folder in folders:
237
    subfolders = os.listdir(os.path.join(data_dir, folder))
238
    print("\n Working on ", folder, ", progress:")
239
    for subfolder in tqdm(subfolders):
240
        files = os.listdir(os.path.join(data_dir, folder, subfolder))
241
        for file in files:
242 View Code Duplication
            if file.startswith("case_020"):  # this case did not laod correctly
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
243
                os.remove(os.path.join(data_dir, folder, subfolder, file))
244
            else:
245
                im_data = np.asarray(
246
                    nib.load(os.path.join(data_dir, folder, subfolder, file)).dataobj,
247
                    dtype=np.float32,
248
                )
249
                if np.max(im_data) > 255.0:
250
                    im_data = ((im_data + 285) / (3770 + 285)) * 255.0  # rescale image
251
                    img = nib.Nifti1Image(im_data, affine=None)
252
                    nib.save(img, os.path.join(data_dir, folder, subfolder, file))
253
                    if np.max(img.dataobj) > 255.0:
254
                        print(
255
                            "Recheck the following file: ",
256
                            os.path.join(data_dir, folder, subfolder, file),
257
                        )
258
                    nib.save(img, os.path.join(data_dir, folder, subfolder, file))
259
260
######## DOWNLOAD MODEL CKPT FROM MODEL ZOO ########
261
262
url = "https://github.com/DeepRegNet/deepreg-model-zoo/raw/master/demo/unpaired_ct_lung/20210110.zip"
263
264
fname = "pretrained.zip"
265
266
os.chdir(os.path.join(main_path, project_dir))
267
268
get_file(os.path.join(os.getcwd(), fname), url)
269
270
with zipfile.ZipFile(fname, "r") as zip_ref:
271
    zip_ref.extractall(os.path.join(data_folder_name, "pretrained"))
272
273
# remove pretrained.zip
274
os.remove(fname)
275
print("Pretrained model downloaded")
276