Completed
Push — main ( 0c57ec...f6b5bf )
by Yunguan
18s queued 13s
created

paired_ct_lung.demo_data   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 248
Duplicated Lines 32.26 %

Importance

Changes 0
Metric Value
wmc 9
eloc 169
dl 80
loc 248
rs 10
c 0
b 0
f 0

2 Functions

Rating   Name   Duplication   Size   Complexity  
A move_files_into_correct_path() 13 13 4
A move_test_cases_into_correct_path() 12 12 5

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import os
2
import random
3
import shutil
4
import zipfile
5
6
import nibabel as nib
7
import numpy as np
8
from tensorflow.keras.utils import get_file
9
from tqdm import tqdm
10
11
# if wget is installed remove the following line from comment
12
# import wget
13
14
# if already in the abc/DeepReg directory then do nothing, otherwise
15
# use os.chdir(r'abc/DeepReg') before this line
16
main_path = os.getcwd()
17
os.chdir(main_path)
18
19
######## DOWNLOADING AND UNZIPPING ALL FILES INTO CORRECT PATH ########
20
21
project_dir = "demos/paired_ct_lung"
22
data_folder_name = "dataset"
23
os.chdir(project_dir)
24
25
url = "https://zenodo.org/record/3835682/files/training.zip"
26
27
# if wget is installed remove following line from comments and comment
28
# out the fname = 'training.zip' line
29
# fname = wget.download(url)
30
fname = "training.zip"
31
32
# os.system("wget " + url)
33
34
get_file(os.path.join(os.getcwd(), fname), url)
35
36
print("The file ", fname, " has successfully been downloaded!")
37
path_to_data_folder = os.path.join(main_path, project_dir, data_folder_name)
38
if os.path.exists(path_to_data_folder):
39
    shutil.rmtree(path_to_data_folder)
40
os.mkdir(path_to_data_folder)
41
42
with zipfile.ZipFile(fname, "r") as zip_ref:
43
    zip_ref.extractall(data_folder_name)
44
45
print("Files unzipped!")
46
47
os.remove(fname)
48
os.chdir(main_path)
49
50
######## MOVING FILES INTO TRAIN DIRECTORY ########
51
52
path_to_train = os.path.join(main_path, project_dir, data_folder_name, "train")
53
path_to_test = os.path.join(main_path, project_dir, data_folder_name, "test")
54
path_to_images_and_labels = os.path.join(
55
    main_path, project_dir, data_folder_name, "training"
56
)
57
58
labels_fnames = os.listdir(os.path.join(path_to_images_and_labels, "lungMasks"))
59
images_fnames = os.listdir(os.path.join(path_to_images_and_labels, "scans"))
60
61
if os.path.exists(path_to_train) is not True:
62
    os.mkdir(path_to_train)
63
    os.mkdir(os.path.join(path_to_train, "fixed_images"))
64
    os.mkdir(os.path.join(path_to_train, "fixed_labels"))
65
    os.mkdir(os.path.join(path_to_train, "moving_images"))
66
    os.mkdir(os.path.join(path_to_train, "moving_labels"))
67
68
69 View Code Duplication
def move_files_into_correct_path(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
70
    fnames, path_to_images_and_labels, new_path, suffix, sub_folder_name
71
):
72
    os.chdir(os.path.join(path_to_images_and_labels, sub_folder_name))
73
    for file in fnames:
74
        if "insp" in file:
75
            source = file
76
            destination = os.path.join(path_to_train, "fixed_" + suffix)
77
            shutil.move(source, destination)
78
        if "exp" in file:
79
            source = file
80
            destination = os.path.join(path_to_train, "moving_" + suffix)
81
            shutil.move(source, destination)
82
83
84
if os.path.exists(path_to_images_and_labels):
85
    move_files_into_correct_path(
86
        images_fnames, path_to_images_and_labels, path_to_train, "images", "scans"
87
    )
88
    move_files_into_correct_path(
89
        labels_fnames, path_to_images_and_labels, path_to_train, "labels", "lungMasks"
90
    )
91
92
os.chdir(main_path)
93
94
######## MOVING FILES INTO TEST AND VALID DIRECTORY ########
95
path_to_test = os.path.join(path_to_data_folder, "test")
96
path_to_valid = os.path.join(path_to_data_folder, "valid")
97
98 View Code Duplication
if os.path.exists(path_to_test) is not True:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
99
100
    os.mkdir(path_to_test)
101
    os.mkdir(os.path.join(path_to_test, "fixed_images"))
102
    os.mkdir(os.path.join(path_to_test, "fixed_labels"))
103
    os.mkdir(os.path.join(path_to_test, "moving_images"))
104
    os.mkdir(os.path.join(path_to_test, "moving_labels"))
105
106
    ratio_of_test_and_valid_samples = 0.2
107
108
    unique_case_names = []
109
    for file in images_fnames:
110
        case_name_as_list = file.split("_")[0:2]
111
        case_name = case_name_as_list[0] + "_" + case_name_as_list[1]
112
        unique_case_names.append(case_name)
113
    unique_case_names = np.unique(unique_case_names)
114
115
    test_and_valid_cases = random.sample(
116
        list(unique_case_names),
117
        int(ratio_of_test_and_valid_samples * len(unique_case_names)),
118
    )
119
    test_cases = test_and_valid_cases[
120
        0 : int(int(ratio_of_test_and_valid_samples * len(unique_case_names) / 2))
121
    ]
122
    valid_cases = test_and_valid_cases[
123
        int(int(ratio_of_test_and_valid_samples * len(unique_case_names) / 2)) + 1 :
124
    ]
125
126
    def move_test_cases_into_correct_path(test_cases, path_to_train, path_to_test):
127
        folder_names = os.listdir(path_to_train)
128
        os.chdir(path_to_train)
129
        for case in test_cases:
130
            for folder in folder_names:
131
                file_names = os.listdir(os.path.join(path_to_train, folder))
132
                for file in file_names:
133
                    if case in file:
134
                        os.chdir(os.path.join(path_to_train, folder))
135
                        source = file
136
                        destination = os.path.join(path_to_test, folder)
137
                        shutil.move(source, destination)
138
139
    move_test_cases_into_correct_path(test_cases, path_to_train, path_to_test)
140
141
    os.mkdir(path_to_valid)
142
    os.mkdir(os.path.join(path_to_valid, "fixed_images"))
143
    os.mkdir(os.path.join(path_to_valid, "fixed_labels"))
144
    os.mkdir(os.path.join(path_to_valid, "moving_images"))
145
    os.mkdir(os.path.join(path_to_valid, "moving_labels"))
146
147
    move_test_cases_into_correct_path(valid_cases, path_to_train, path_to_valid)
148
149
######## NAMING FILES SUCH THAT THEIR NAMES MATCH FOR PAIRING ########
150
151
# name all files such that names match exactly for training
152
for folder in os.listdir(path_to_train):
153
    path_to_folder = os.path.join(path_to_train, folder)
154
    os.chdir(path_to_folder)
155
    for file in os.listdir(path_to_folder):
156
        if "_insp" in file:
157
            new_name = file.replace("_insp", "")
158
        elif "_exp" in file:
159
            new_name = file.replace("_exp", "")
160
        else:
161
            continue
162
        source = file
163
        destination = new_name
164
        os.rename(source, destination)
165
166
# name all files such that names match exactly for testing
167
168
for folder in os.listdir(path_to_test):
169
    path_to_folder = os.path.join(path_to_test, folder)
170
    os.chdir(path_to_folder)
171
    for file in os.listdir(path_to_folder):
172
        if "_insp" in file:
173
            new_name = file.replace("_insp", "")
174
        elif "_exp" in file:
175
            new_name = file.replace("_exp", "")
176
        else:
177
            continue
178
        source = file
179
        destination = new_name
180
        os.rename(source, destination)
181
182
# name all files such that names match exactly for validation
183
184
for folder in os.listdir(path_to_valid):
185
    path_to_folder = os.path.join(path_to_valid, folder)
186
    os.chdir(path_to_folder)
187
    for file in os.listdir(path_to_folder):
188
        if "_insp" in file:
189
            new_name = file.replace("_insp", "")
190
        elif "_exp" in file:
191
            new_name = file.replace("_exp", "")
192
        else:
193
            continue
194
        source = file
195
        destination = new_name
196
        os.rename(source, destination)
197
198
print("All files moved and restructured")
199
200
shutil.rmtree(os.path.join(path_to_images_and_labels))
201
os.chdir(main_path)
202
203
######## NOW WE NEED TO RESCALE EACH IMAGE ########
204
205
data_dir = f"demos/paired_ct_lung/{data_folder_name}"
206
folders = os.listdir(data_dir)
207
208
for folder in folders:
209
    subfolders = os.listdir(os.path.join(data_dir, folder))
210
    print("\n Working on ", folder, ", progress:")
211
    for subfolder in tqdm(subfolders):
212
        files = os.listdir(os.path.join(data_dir, folder, subfolder))
213
        for file in files:
214 View Code Duplication
            if file.startswith("case_020"):  # this case did not laod correctly
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
215
                os.remove(os.path.join(data_dir, folder, subfolder, file))
216
            else:
217
                im_data = np.asarray(
218
                    nib.load(os.path.join(data_dir, folder, subfolder, file)).dataobj,
219
                    dtype=np.float32,
220
                )
221
                if np.max(im_data) > 255.0:
222
                    im_data = ((im_data + 285) / (3770 + 285)) * 255.0  # rescale image
223
                    img = nib.Nifti1Image(im_data, affine=None)
224
                    nib.save(img, os.path.join(data_dir, folder, subfolder, file))
225
                    if np.max(img.dataobj) > 255.0:
226
                        print(
227
                            "Recheck the following file: ",
228
                            os.path.join(data_dir, folder, subfolder, file),
229
                        )
230
                    nib.save(img, os.path.join(data_dir, folder, subfolder, file))
231
232
######## DOWNLOAD MODEL CKPT FROM MODEL ZOO ########
233
234
url = "https://github.com/DeepRegNet/deepreg-model-zoo/raw/master/demo/paired_ct_lung/20210110.zip"
235
fname = "pretrained.zip"
236
os.chdir(os.path.join(main_path, project_dir))
237
238
# download and unzip into pretrained subfolder
239
get_file(os.path.join(os.getcwd(), fname), url)
240
with zipfile.ZipFile(fname, "r") as zip_ref:
241
    zip_ref.extractall(os.path.join(data_folder_name, "pretrained"))
242
243
# remove pretrained.zip
244
os.remove(fname)
245
print(
246
    "Pretrained model downloaded: %s"
247
    % os.path.abspath(os.path.join(data_folder_name, "pretrained"))
248
)
249