unpack_if_needed()   F
last analyzed

Complexity

Conditions 29

Size

Total Lines 95

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 29
dl 0
loc 95
rs 2
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like unpack_if_needed() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
'''
2
    Functions on filesystem level.
3
'''
4
5
import zipfile
6
import tarfile
7
import os
8
import tempfile
9
import shutil
10
11
from .exceptions import JobException
12
13
import logging
14
logger = logging.getLogger('opensubmitexec')
15
16
17
def unpack_if_needed(destination_path, fpath):
18
    '''
19
    fpath is the fully qualified path to a single file that
20
    might be a ZIP / TGZ archive.
21
22
    The function moves the file, or the content if it is an
23
    archive, to the directory given by destination_path.
24
25
    The function returns two values. The first one is a 
26
    directory name if:
27
28
    - fpath is an archive.
29
    - The archive contains only one this single directory with
30
      arbitrary content.
31
32
    Otherwise, it is zero.
33
34
    This is helpful in catching the typical "right-click to compress"
35
    cases for single ZIP files in Explorer / Finder.
36
37
    The second return value is a boolean indicating if 
38
    fpath was an archive.
39
40
    '''
41
    single_dir = None
42
    did_unpack = False
43
44
    dircontent = os.listdir(destination_path)
45
    logger.debug("Content of %s before unarchiving: %s" %
46
                 (destination_path, str(dircontent)))
47
48
    # Perform un-archiving, in case
49
    if zipfile.is_zipfile(fpath):
50
        logger.debug("Detected ZIP file at %s, unpacking it." % (fpath))
51
        did_unpack = True
52
        with zipfile.ZipFile(fpath, "r") as zip:
53
            infolist = zip.infolist()
54
            directories = [
55
                entry.filename for entry in infolist if entry.filename.endswith('/')]
56
            logger.debug("List of directory entries: " + str(directories))
57
58
            # Consider this case: ['subdir1/', 'subdir1/subdir2/']
59
            if len(directories) > 1:
60
                redundant = []
61
                for current in directories:
62
                    starts_with_this = [
63
                        el for el in directories if el.startswith(current)]
64
                    if len(starts_with_this) == len(directories):
65
                        # current is a partial directory name that is contained
66
                        # in all others
67
                        redundant.append(current)
68
                logger.debug("Redundant directory entries: " + str(redundant))
69
                directories = [
70
                    entry for entry in directories if entry not in redundant]
71
                logger.debug(
72
                    "Updated list of directory entries: " + str(directories))
73
74
            files = [
75
                entry.filename for entry in infolist if not entry.filename.endswith('/')]
76
            logger.debug("List of files: " + str(files))
77
            if len(directories) == 1:
78
                d = directories[0]
79
                in_this_dir = [entry for entry in files if entry.startswith(d)]
80
                if len(files) == len(in_this_dir):
81
                    logger.debug("ZIP archive contains only one subdirectory")
82
                    single_dir = d
83
            zip.extractall(destination_path)
84
    elif tarfile.is_tarfile(fpath):
85
        logger.debug("Detected TAR file at %s, unpacking it." % (fpath))
86
        did_unpack = True
87
        with tarfile.open(fpath) as tar:
88
            infolist = tar.getmembers()
89
            # A TGZ file of one subdirectory with arbitrary files
90
            # has one infolist entry per directory and file
91
            directories = [entry.name for entry in infolist if entry.isdir()]
92
            files = [entry.name for entry in infolist if entry.isfile()]
93
            logger.debug(directories)
94
            logger.debug(files)
95
            if len(directories) == 1:
96
                d = directories[0]
97
                in_this_dir = [entry for entry in files if entry.startswith(d)]
98
                if len(files) == len(in_this_dir):
99
                    logger.debug("TGZ archive contains only one subdirectory")
100
                    single_dir = d
101
            tar.extractall(destination_path)
102
    else:
103
        if not fpath.startswith(destination_path):
104
            logger.debug(
105
                "File at %s is a single non-archive file, copying it to %s" % (fpath, destination_path))
106
            shutil.copy(fpath, destination_path)
107
108
    dircontent = os.listdir(destination_path)
109
    logger.debug("Content of %s after unarchiving: %s" %
110
                 (destination_path, str(dircontent)))
111
    return single_dir, did_unpack
112
113
114
def remove_working_directory(directory, config):
115
    if config.getboolean("Execution", "cleanup") is True:
116
        shutil.rmtree(directory, ignore_errors=True)
117
118
119
def create_working_dir(config, prefix):
120
    '''
121
        Create a fresh temporary directory, based on the fiven prefix.
122
        Returns the new path.
123
    '''
124
    # Fetch base directory from executor configuration
125
    basepath = config.get("Execution", "directory")
126
127
    if not prefix:
128
        prefix = 'opensubmit'
129
130
    finalpath = tempfile.mkdtemp(prefix=prefix + '_', dir=basepath)
131
    if not finalpath.endswith(os.sep):
132
        finalpath += os.sep
133
    logger.debug("Created fresh working directory at {0}.".format(finalpath))
134
135
    return finalpath
136
137
138
def prepare_working_directory(job, submission_path, validator_path):
139
    '''
140
    Based on two downloaded files in the working directory,
141
    the student submission and the validation package,
142
    the working directory is prepared.
143
144
    We unpack student submission first, so that teacher files overwrite
145
    them in case.
146
147
    When the student submission is a single directory, we change the
148
    working directory and go directly into it, before dealing with the
149
    validator stuff.
150
151
    If unrecoverable errors happen, such as an empty student archive,
152
    a JobException is raised.
153
    '''
154
    # Safeguard for fail-fast in disk full scenarios on the executor
155
156
    dusage = shutil.disk_usage(job.working_dir)
157
    if dusage.free < 1024 * 1024 * 50:   # 50 MB
158
        info_student = "Internal error with the validator. Please contact your course responsible."
159
        info_tutor = "Error: Execution cancelled, less then 50MB of disk space free on the executor."
160
        logger.error(info_tutor)
161
        raise JobException(info_student=info_student, info_tutor=info_tutor)
162
163
    submission_fname = os.path.basename(submission_path)
164
    validator_fname = os.path.basename(validator_path)
165
166
    # Un-archive student submission
167
    single_dir, did_unpack = unpack_if_needed(job.working_dir, submission_path)
168
    job.student_files = os.listdir(job.working_dir)
169
    if did_unpack:
170
        job.student_files.remove(submission_fname)
171
172
    # Fail automatically on empty student submissions
173
    if len(job.student_files) is 0:
174
        info_student = "Your compressed upload is empty - no files in there."
175
        info_tutor = "Submission archive file has no content."
176
        logger.error(info_tutor)
177
        raise JobException(info_student=info_student, info_tutor=info_tutor)
178
179
    # Handle student archives containing a single directory with all data
180
    if single_dir:
181
        logger.warning(
182
            "The submission archive contains only one directory. Changing working directory.")
183
        # Set new working directory
184
        job.working_dir = job.working_dir + single_dir + os.sep
185
        # Move validator package there
186
        shutil.move(validator_path, job.working_dir)
187
        validator_path = job.working_dir + validator_fname
188
        # Re-scan for list of student files
189
        job.student_files = os.listdir(job.working_dir)
190
191
    # The working directory now only contains the student data and the downloaded
192
    # validator package.
193
    # Update the file list accordingly.
194
    job.student_files.remove(validator_fname)
195
    logger.debug("Student files: {0}".format(job.student_files))
196
197
    # Unpack validator package
198
    single_dir, did_unpack = unpack_if_needed(job.working_dir, validator_path)
199
    if single_dir:
200
        info_student = "Internal error with the validator. Please contact your course responsible."
201
        info_tutor = "Error: Directories are not allowed in the validator archive."
202
        logger.error(info_tutor)
203
        raise JobException(info_student=info_student, info_tutor=info_tutor)
204
205
    if not os.path.exists(job.validator_script_name):
206
        if did_unpack:
207
            # The download was an archive, but the validator was not inside.
208
            # This is a failure of the tutor.
209
            info_student = "Internal error with the validator. Please contact your course responsible."
210
            info_tutor = "Error: Missing validator.py in the validator archive."
211
            logger.error(info_tutor)
212
            raise JobException(info_student=info_student,
213
                               info_tutor=info_tutor)
214
        else:
215
            # The download is already the script, but has the wrong name
216
            logger.warning("Renaming {0} to {1}.".format(
217
                validator_path, job.validator_script_name))
218
            shutil.move(validator_path, job.validator_script_name)
219
220
221
def has_file(dir, fname):
222
    return os.path.exists(dir + os.sep + fname)
223