1
|
|
|
''' |
2
|
|
|
Functions on filesystem level. |
3
|
|
|
''' |
4
|
|
|
|
5
|
|
|
import zipfile |
6
|
|
|
import tarfile |
7
|
|
|
import os |
8
|
|
|
import tempfile |
9
|
|
|
import shutil |
10
|
|
|
|
11
|
|
|
from .exceptions import JobException |
12
|
|
|
|
13
|
|
|
import logging |
14
|
|
|
logger = logging.getLogger('opensubmitexec') |
15
|
|
|
|
16
|
|
|
|
17
|
|
|
def unpack_if_needed(destination_path, fpath): |
18
|
|
|
''' |
19
|
|
|
fpath is the fully qualified path to a single file that |
20
|
|
|
might be a ZIP / TGZ archive. |
21
|
|
|
|
22
|
|
|
The function moves the file, or the content if it is an |
23
|
|
|
archive, to the directory given by destination_path. |
24
|
|
|
|
25
|
|
|
The function returns two values. The first one is a |
26
|
|
|
directory name if: |
27
|
|
|
|
28
|
|
|
- fpath is an archive. |
29
|
|
|
- The archive contains only one this single directory with |
30
|
|
|
arbitrary content. |
31
|
|
|
|
32
|
|
|
Otherwise, it is zero. |
33
|
|
|
|
34
|
|
|
This is helpful in catching the typical "right-click to compress" |
35
|
|
|
cases for single ZIP files in Explorer / Finder. |
36
|
|
|
|
37
|
|
|
The second return value is a boolean indicating if |
38
|
|
|
fpath was an archive. |
39
|
|
|
|
40
|
|
|
''' |
41
|
|
|
single_dir = None |
42
|
|
|
did_unpack = False |
43
|
|
|
|
44
|
|
|
dircontent = os.listdir(destination_path) |
45
|
|
|
logger.debug("Content of %s before unarchiving: %s" % |
46
|
|
|
(destination_path, str(dircontent))) |
47
|
|
|
|
48
|
|
|
# Perform un-archiving, in case |
49
|
|
|
if zipfile.is_zipfile(fpath): |
50
|
|
|
logger.debug("Detected ZIP file at %s, unpacking it." % (fpath)) |
51
|
|
|
did_unpack = True |
52
|
|
|
with zipfile.ZipFile(fpath, "r") as zip: |
53
|
|
|
infolist = zip.infolist() |
54
|
|
|
directories = [ |
55
|
|
|
entry.filename for entry in infolist if entry.filename.endswith('/')] |
56
|
|
|
logger.debug("List of directory entries: " + str(directories)) |
57
|
|
|
|
58
|
|
|
# Consider this case: ['subdir1/', 'subdir1/subdir2/'] |
59
|
|
|
if len(directories) > 1: |
60
|
|
|
redundant = [] |
61
|
|
|
for current in directories: |
62
|
|
|
starts_with_this = [ |
63
|
|
|
el for el in directories if el.startswith(current)] |
64
|
|
|
if len(starts_with_this) == len(directories): |
65
|
|
|
# current is a partial directory name that is contained |
66
|
|
|
# in all others |
67
|
|
|
redundant.append(current) |
68
|
|
|
logger.debug("Redundant directory entries: " + str(redundant)) |
69
|
|
|
directories = [ |
70
|
|
|
entry for entry in directories if entry not in redundant] |
71
|
|
|
logger.debug( |
72
|
|
|
"Updated list of directory entries: " + str(directories)) |
73
|
|
|
|
74
|
|
|
files = [ |
75
|
|
|
entry.filename for entry in infolist if not entry.filename.endswith('/')] |
76
|
|
|
logger.debug("List of files: " + str(files)) |
77
|
|
|
if len(directories) == 1: |
78
|
|
|
d = directories[0] |
79
|
|
|
in_this_dir = [entry for entry in files if entry.startswith(d)] |
80
|
|
|
if len(files) == len(in_this_dir): |
81
|
|
|
logger.debug("ZIP archive contains only one subdirectory") |
82
|
|
|
single_dir = d |
83
|
|
|
zip.extractall(destination_path) |
84
|
|
|
elif tarfile.is_tarfile(fpath): |
85
|
|
|
logger.debug("Detected TAR file at %s, unpacking it." % (fpath)) |
86
|
|
|
did_unpack = True |
87
|
|
|
with tarfile.open(fpath) as tar: |
88
|
|
|
infolist = tar.getmembers() |
89
|
|
|
# A TGZ file of one subdirectory with arbitrary files |
90
|
|
|
# has one infolist entry per directory and file |
91
|
|
|
directories = [entry.name for entry in infolist if entry.isdir()] |
92
|
|
|
files = [entry.name for entry in infolist if entry.isfile()] |
93
|
|
|
logger.debug(directories) |
94
|
|
|
logger.debug(files) |
95
|
|
|
if len(directories) == 1: |
96
|
|
|
d = directories[0] |
97
|
|
|
in_this_dir = [entry for entry in files if entry.startswith(d)] |
98
|
|
|
if len(files) == len(in_this_dir): |
99
|
|
|
logger.debug("TGZ archive contains only one subdirectory") |
100
|
|
|
single_dir = d |
101
|
|
|
tar.extractall(destination_path) |
102
|
|
|
else: |
103
|
|
|
if not fpath.startswith(destination_path): |
104
|
|
|
logger.debug( |
105
|
|
|
"File at %s is a single non-archive file, copying it to %s" % (fpath, destination_path)) |
106
|
|
|
shutil.copy(fpath, destination_path) |
107
|
|
|
|
108
|
|
|
dircontent = os.listdir(destination_path) |
109
|
|
|
logger.debug("Content of %s after unarchiving: %s" % |
110
|
|
|
(destination_path, str(dircontent))) |
111
|
|
|
return single_dir, did_unpack |
112
|
|
|
|
113
|
|
|
|
114
|
|
|
def remove_working_directory(directory, config): |
115
|
|
|
if config.getboolean("Execution", "cleanup") is True: |
116
|
|
|
shutil.rmtree(directory, ignore_errors=True) |
117
|
|
|
|
118
|
|
|
|
119
|
|
|
def create_working_dir(config, prefix): |
120
|
|
|
''' |
121
|
|
|
Create a fresh temporary directory, based on the fiven prefix. |
122
|
|
|
Returns the new path. |
123
|
|
|
''' |
124
|
|
|
# Fetch base directory from executor configuration |
125
|
|
|
basepath = config.get("Execution", "directory") |
126
|
|
|
|
127
|
|
|
if not prefix: |
128
|
|
|
prefix = 'opensubmit' |
129
|
|
|
|
130
|
|
|
finalpath = tempfile.mkdtemp(prefix=prefix + '_', dir=basepath) |
131
|
|
|
if not finalpath.endswith(os.sep): |
132
|
|
|
finalpath += os.sep |
133
|
|
|
logger.debug("Created fresh working directory at {0}.".format(finalpath)) |
134
|
|
|
|
135
|
|
|
return finalpath |
136
|
|
|
|
137
|
|
|
|
138
|
|
|
def prepare_working_directory(job, submission_path, validator_path): |
139
|
|
|
''' |
140
|
|
|
Based on two downloaded files in the working directory, |
141
|
|
|
the student submission and the validation package, |
142
|
|
|
the working directory is prepared. |
143
|
|
|
|
144
|
|
|
We unpack student submission first, so that teacher files overwrite |
145
|
|
|
them in case. |
146
|
|
|
|
147
|
|
|
When the student submission is a single directory, we change the |
148
|
|
|
working directory and go directly into it, before dealing with the |
149
|
|
|
validator stuff. |
150
|
|
|
|
151
|
|
|
If unrecoverable errors happen, such as an empty student archive, |
152
|
|
|
a JobException is raised. |
153
|
|
|
''' |
154
|
|
|
submission_fname = os.path.basename(submission_path) |
155
|
|
|
validator_fname = os.path.basename(validator_path) |
156
|
|
|
|
157
|
|
|
# Un-archive student submission |
158
|
|
|
single_dir, did_unpack = unpack_if_needed(job.working_dir, submission_path) |
159
|
|
|
job.student_files = os.listdir(job.working_dir) |
160
|
|
|
if did_unpack: |
161
|
|
|
job.student_files.remove(submission_fname) |
162
|
|
|
|
163
|
|
|
# Fail automatically on empty student submissions |
164
|
|
|
if len(job.student_files) is 0: |
165
|
|
|
info_student = "Your compressed upload is empty - no files in there." |
166
|
|
|
info_tutor = "Submission archive file has no content." |
167
|
|
|
logger.error(info_tutor) |
168
|
|
|
raise JobException(info_student=info_student, info_tutor=info_tutor) |
169
|
|
|
|
170
|
|
|
# Handle student archives containing a single directory with all data |
171
|
|
|
if single_dir: |
172
|
|
|
logger.warning( |
173
|
|
|
"The submission archive contains only one directory. Changing working directory.") |
174
|
|
|
# Set new working directory |
175
|
|
|
job.working_dir = job.working_dir + single_dir + os.sep |
176
|
|
|
# Move validator package there |
177
|
|
|
shutil.move(validator_path, job.working_dir) |
178
|
|
|
validator_path = job.working_dir + validator_fname |
179
|
|
|
# Re-scan for list of student files |
180
|
|
|
job.student_files = os.listdir(job.working_dir) |
181
|
|
|
|
182
|
|
|
# The working directory now only contains the student data and the downloaded |
183
|
|
|
# validator package. |
184
|
|
|
# Update the file list accordingly. |
185
|
|
|
job.student_files.remove(validator_fname) |
186
|
|
|
logger.debug("Student files: {0}".format(job.student_files)) |
187
|
|
|
|
188
|
|
|
# Unpack validator package |
189
|
|
|
single_dir, did_unpack = unpack_if_needed(job.working_dir, validator_path) |
190
|
|
|
if single_dir: |
191
|
|
|
info_student = "Internal error with the validator. Please contact your course responsible." |
192
|
|
|
info_tutor = "Error: Directories are not allowed in the validator archive." |
193
|
|
|
logger.error(info_tutor) |
194
|
|
|
raise JobException(info_student=info_student, info_tutor=info_tutor) |
195
|
|
|
|
196
|
|
|
if not os.path.exists(job.validator_script_name): |
197
|
|
|
if did_unpack: |
198
|
|
|
# The download was an archive, but the validator was not inside. |
199
|
|
|
# This is a failure of the tutor. |
200
|
|
|
info_student = "Internal error with the validator. Please contact your course responsible." |
201
|
|
|
info_tutor = "Error: Missing validator.py in the validator archive." |
202
|
|
|
logger.error(info_tutor) |
203
|
|
|
raise JobException(info_student=info_student, |
204
|
|
|
info_tutor=info_tutor) |
205
|
|
|
else: |
206
|
|
|
# The download is already the script, but has the wrong name |
207
|
|
|
logger.warning("Renaming {0} to {1}.".format( |
208
|
|
|
validator_path, job.validator_script_name)) |
209
|
|
|
shutil.move(validator_path, job.validator_script_name) |
210
|
|
|
|
211
|
|
|
|
212
|
|
|
def has_file(dir, fname): |
213
|
|
|
return os.path.exists(dir + os.sep + fname) |
214
|
|
|
|