Passed
Push — master ( 03c658...45831a )
by Peter
01:14
created

remove_working_directory()   A

Complexity

Conditions 2

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 3
rs 10
cc 2
1
'''
2
    Functions on filesystem level.
3
'''
4
5
import zipfile
6
import tarfile
7
import os
8
import tempfile
9
import shutil
10
11
from .exceptions import JobException
12
13
import logging
14
logger = logging.getLogger('opensubmitexec')
15
16
17
def unpack_if_needed(destination_path, fpath):
18
    '''
19
    fpath is the fully qualified path to a single file that
20
    might be a ZIP / TGZ archive.
21
22
    The function moves the file, or the content if it is an
23
    archive, to the directory given by destination_path.
24
25
    The function returns two values. The first one is a 
26
    directory name if:
27
28
    - fpath is an archive.
29
    - The archive contains only one this single directory with
30
      arbitrary content.
31
32
    Otherwise, it is zero.
33
34
    This is helpful in catching the typical "right-click to compress"
35
    cases for single ZIP files in Explorer / Finder.
36
37
    The second return value is a boolean indicating if 
38
    fpath was an archive.
39
40
    '''
41
    single_dir = None
42
    did_unpack = False
43
44
    dircontent = os.listdir(destination_path)
45
    logger.debug("Content of %s before unarchiving: %s" %
46
                 (destination_path, str(dircontent)))
47
48
    # Perform un-archiving, in case
49
    if zipfile.is_zipfile(fpath):
50
        logger.debug("Detected ZIP file at %s, unpacking it." % (fpath))
51
        did_unpack = True
52
        with zipfile.ZipFile(fpath, "r") as zip:
53
            infolist = zip.infolist()
54
            directories = [
55
                entry.filename for entry in infolist if entry.filename.endswith('/')]
56
            logger.debug("List of directory entries: " + str(directories))
57
58
            # Consider this case: ['subdir1/', 'subdir1/subdir2/']
59
            if len(directories) > 1:
60
                redundant = []
61
                for current in directories:
62
                    starts_with_this = [
63
                        el for el in directories if el.startswith(current)]
64
                    if len(starts_with_this) == len(directories):
65
                        # current is a partial directory name that is contained
66
                        # in all others
67
                        redundant.append(current)
68
                logger.debug("Redundant directory entries: " + str(redundant))
69
                directories = [
70
                    entry for entry in directories if entry not in redundant]
71
                logger.debug(
72
                    "Updated list of directory entries: " + str(directories))
73
74
            files = [
75
                entry.filename for entry in infolist if not entry.filename.endswith('/')]
76
            logger.debug("List of files: " + str(files))
77
            if len(directories) == 1:
78
                d = directories[0]
79
                in_this_dir = [entry for entry in files if entry.startswith(d)]
80
                if len(files) == len(in_this_dir):
81
                    logger.debug("ZIP archive contains only one subdirectory")
82
                    single_dir = d
83
            zip.extractall(destination_path)
84
    elif tarfile.is_tarfile(fpath):
85
        logger.debug("Detected TAR file at %s, unpacking it." % (fpath))
86
        did_unpack = True
87
        with tarfile.open(fpath) as tar:
88
            infolist = tar.getmembers()
89
            # A TGZ file of one subdirectory with arbitrary files
90
            # has one infolist entry per directory and file
91
            directories = [entry.name for entry in infolist if entry.isdir()]
92
            files = [entry.name for entry in infolist if entry.isfile()]
93
            logger.debug(directories)
94
            logger.debug(files)
95
            if len(directories) == 1:
96
                d = directories[0]
97
                in_this_dir = [entry for entry in files if entry.startswith(d)]
98
                if len(files) == len(in_this_dir):
99
                    logger.debug("TGZ archive contains only one subdirectory")
100
                    single_dir = d
101
            tar.extractall(destination_path)
102
    else:
103
        if not fpath.startswith(destination_path):
104
            logger.debug(
105
                "File at %s is a single non-archive file, copying it to %s" % (fpath, destination_path))
106
            shutil.copy(fpath, destination_path)
107
108
    dircontent = os.listdir(destination_path)
109
    logger.debug("Content of %s after unarchiving: %s" %
110
                 (destination_path, str(dircontent)))
111
    return single_dir, did_unpack
112
113
114
def remove_working_directory(directory, config):
115
    if config.getboolean("Execution", "cleanup") is True:
116
        shutil.rmtree(directory, ignore_errors=True)
117
118
119
def create_working_dir(config, prefix):
120
    '''
121
        Create a fresh temporary directory, based on the fiven prefix.
122
        Returns the new path.
123
    '''
124
    # Fetch base directory from executor configuration
125
    basepath = config.get("Execution", "directory")
126
127
    if not prefix:
128
        prefix = 'opensubmit'
129
130
    finalpath = tempfile.mkdtemp(prefix=prefix + '_', dir=basepath)
131
    if not finalpath.endswith(os.sep):
132
        finalpath += os.sep
133
    logger.debug("Created fresh working directory at {0}.".format(finalpath))
134
135
    return finalpath
136
137
138
def prepare_working_directory(job, submission_path, validator_path):
139
    '''
140
    Based on two downloaded files in the working directory,
141
    the student submission and the validation package,
142
    the working directory is prepared.
143
144
    We unpack student submission first, so that teacher files overwrite
145
    them in case.
146
147
    When the student submission is a single directory, we change the
148
    working directory and go directly into it, before dealing with the
149
    validator stuff.
150
151
    If unrecoverable errors happen, such as an empty student archive,
152
    a JobException is raised.
153
    '''
154
    submission_fname = os.path.basename(submission_path)
155
    validator_fname = os.path.basename(validator_path)
156
157
    # Un-archive student submission
158
    single_dir, did_unpack = unpack_if_needed(job.working_dir, submission_path)
159
    job.student_files = os.listdir(job.working_dir)
160
    if did_unpack:
161
        job.student_files.remove(submission_fname)
162
163
    # Fail automatically on empty student submissions
164
    if len(job.student_files) is 0:
165
        info_student = "Your compressed upload is empty - no files in there."
166
        info_tutor = "Submission archive file has no content."
167
        logger.error(info_tutor)
168
        raise JobException(info_student=info_student, info_tutor=info_tutor)
169
170
    # Handle student archives containing a single directory with all data
171
    if single_dir:
172
        logger.warning(
173
            "The submission archive contains only one directory. Changing working directory.")
174
        # Set new working directory
175
        job.working_dir = job.working_dir + single_dir + os.sep
176
        # Move validator package there
177
        shutil.move(validator_path, job.working_dir)
178
        validator_path = job.working_dir + validator_fname
179
        # Re-scan for list of student files
180
        job.student_files = os.listdir(job.working_dir)
181
182
    # The working directory now only contains the student data and the downloaded
183
    # validator package.
184
    # Update the file list accordingly.
185
    job.student_files.remove(validator_fname)
186
    logger.debug("Student files: {0}".format(job.student_files))
187
188
    # Unpack validator package
189
    single_dir, did_unpack = unpack_if_needed(job.working_dir, validator_path)
190
    if single_dir:
191
        info_student = "Internal error with the validator. Please contact your course responsible."
192
        info_tutor = "Error: Directories are not allowed in the validator archive."
193
        logger.error(info_tutor)
194
        raise JobException(info_student=info_student, info_tutor=info_tutor)
195
196
    if not os.path.exists(job.validator_script_name):
197
        if did_unpack:
198
            # The download was an archive, but the validator was not inside.
199
            # This is a failure of the tutor.
200
            info_student = "Internal error with the validator. Please contact your course responsible."
201
            info_tutor = "Error: Missing validator.py in the validator archive."
202
            logger.error(info_tutor)
203
            raise JobException(info_student=info_student,
204
                               info_tutor=info_tutor)
205
        else:
206
            # The download is already the script, but has the wrong name
207
            logger.warning("Renaming {0} to {1}.".format(
208
                validator_path, job.validator_script_name))
209
            shutil.move(validator_path, job.validator_script_name)
210
211
212
def has_file(dir, fname):
213
    return os.path.exists(dir + os.sep + fname)
214