Passed
Push — master ( 4f13ed...1de556 )
by Peter
01:47
created

prepare_working_directory()   C

Complexity

Conditions 7

Size

Total Lines 70

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 70
rs 5.683
cc 7

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
'''
2
    Functions on filesystem level.
3
'''
4
5
import zipfile
6
import tarfile
7
import os
8
import tempfile
9
import shutil
10
11
from .exceptions import JobException
12
13
import logging
14
logger = logging.getLogger('opensubmitexec')
15
16
17
def unpack_if_needed(destination_path, fpath):
18
    '''
19
    fpath is the fully qualified path to a single file that
20
    might be a ZIP / TGZ archive.
21
22
    The function moves the file, or the content if it is an
23
    archive, to the directory given by destination_path.
24
25
    The function returns two values. The first one is a 
26
    directory name if:
27
28
    - fpath is an archive.
29
    - The archive contains only one this single directory with
30
      arbitrary content.
31
32
    Otherwise, it is zero.
33
34
    This is helpful in catching the typical "right-click to compress"
35
    cases for single ZIP files in Explorer / Finder.
36
37
    The second return value is a boolean indicating if 
38
    fpath was an archive.
39
40
    '''
41
    single_dir = None
42
    did_unpack = False
43
44
    dircontent = os.listdir(destination_path)
45
    logger.debug("Content of %s before unarchiving: %s" %
46
                 (destination_path, str(dircontent)))
47
48
    # Perform un-archiving, in case
49
    if zipfile.is_zipfile(fpath):
50
        logger.debug("Detected ZIP file at %s, unpacking it." % (fpath))
51
        did_unpack = True
52
        with zipfile.ZipFile(fpath, "r") as zip:
53
            infolist = zip.infolist()
54
            directories = [entry.filename for entry in infolist if entry.filename.endswith('/')]
55
            logger.debug("List of directory entries: "+str(directories))
56
57
            # Consider this case: ['subdir1/', 'subdir1/subdir2/']
58
            if len(directories) > 1:
59
                redundant = []
60
                for current in directories:
61
                    starts_with_this = [el for el in directories if el.startswith(current)]
62
                    if len(starts_with_this) == len(directories):
63
                        # current is a partial directory name that is contained
64
                        # in all others
65
                        redundant.append(current)
66
                logger.debug("Redundant directory entries: "+str(redundant))
67
                directories = [entry for entry in directories if entry not in redundant]
68
                logger.debug("Updated list of directory entries: "+str(directories))
69
70
            files = [entry.filename for entry in infolist if not entry.filename.endswith('/')]
71
            logger.debug("List of files: "+str(files))
72
            if len(directories) == 1:
73
                d = directories[0]
74
                in_this_dir = [entry for entry in files if entry.startswith(d)]
75
                if len(files) == len(in_this_dir):
76
                    logger.debug("ZIP archive contains only one subdirectory")
77
                    single_dir = d
78
            zip.extractall(destination_path)
79
    elif tarfile.is_tarfile(fpath):
80
        logger.debug("Detected TAR file at %s, unpacking it." % (fpath))
81
        did_unpack = True
82
        with tarfile.open(fpath) as tar:
83
            infolist = tar.getmembers()
84
            # A TGZ file of one subdirectory with arbitrary files
85
            # has one infolist entry per directory and file
86
            directories = [entry.name for entry in infolist if entry.isdir()]
87
            files = [entry.name for entry in infolist if entry.isfile()]
88
            logger.debug(directories)
89
            logger.debug(files)
90
            if len(directories) == 1:
91
                d = directories[0]
92
                in_this_dir = [entry for entry in files if entry.startswith(d)]
93
                if len(files) == len(in_this_dir):
94
                    logger.debug("TGZ archive contains only one subdirectory")
95
                    single_dir = d
96
            tar.extractall(destination_path)
97
    else:
98
        if not fpath.startswith(destination_path):
99
            logger.debug(
100
                "File at %s is a single non-archive file, copying it to %s" % (fpath, destination_path))
101
            shutil.copy(fpath, destination_path)
102
103
    dircontent = os.listdir(destination_path)
104
    logger.debug("Content of %s after unarchiving: %s" %
105
                 (destination_path, str(dircontent)))
106
    return single_dir, did_unpack
107
108
109
def create_working_dir(config, prefix):
110
    '''
111
        Create a fresh temporary directory, based on the fiven prefix.
112
        Returns the new path.
113
    '''
114
    # Fetch base directory from executor configuration
115
    basepath = config.get("Execution", "directory")
116
117
    if not prefix:
118
        prefix = 'opensubmit'
119
120
    finalpath = tempfile.mkdtemp(prefix=prefix + '_', dir=basepath)
121
    if not finalpath.endswith(os.sep):
122
        finalpath += os.sep
123
    logger.debug("Created fresh working directory at {0}.".format(finalpath))
124
125
    return finalpath
126
127
128
def prepare_working_directory(job, submission_path, validator_path):
129
    '''
130
    Based on two downloaded files in the working directory,
131
    the student submission and the validation package,
132
    the working directory is prepared.
133
134
    We unpack student submission first, so that teacher files overwrite
135
    them in case.
136
137
    When the student submission is a single directory, we change the
138
    working directory and go directly into it, before dealing with the
139
    validator stuff.
140
141
    If unrecoverable errors happen, such as an empty student archive,
142
    a JobException is raised.
143
    '''
144
    submission_fname = os.path.basename(submission_path)
145
    validator_fname = os.path.basename(validator_path)
146
147
    # Un-archive student submission
148
    single_dir, did_unpack = unpack_if_needed(job.working_dir, submission_path)
149
    job.student_files = os.listdir(job.working_dir)
150
    if did_unpack:
151
        job.student_files.remove(submission_fname)
152
153
    # Fail automatically on empty student submissions
154
    if len(job.student_files) is 0:
155
        info_student = "Your compressed upload is empty - no files in there."
156
        info_tutor = "Submission archive file has no content."
157
        logger.error(info_tutor)
158
        raise JobException(info_student=info_student, info_tutor=info_tutor)
159
160
    # Handle student archives containing a single directory with all data
161
    if single_dir:
162
        logger.warning(
163
            "The submission archive contains only one directory. Changing working directory.")
164
        # Set new working directory
165
        job.working_dir = job.working_dir + single_dir + os.sep
166
        # Move validator package there
167
        shutil.move(validator_path, job.working_dir)
168
        validator_path = job.working_dir + validator_fname
169
        # Re-scan for list of student files
170
        job.student_files = os.listdir(job.working_dir)
171
172
    # The working directory now only contains the student data and the downloaded
173
    # validator package.
174
    # Update the file list accordingly.
175
    job.student_files.remove(validator_fname)
176
    logger.debug("Student files: {0}".format(job.student_files))
177
178
    # Unpack validator package
179
    single_dir, did_unpack = unpack_if_needed(job.working_dir, validator_path)
180
    if single_dir:
181
        info_student = "Internal error with the validator. Please contact your course responsible."
182
        info_tutor = "Error: Directories are not allowed in the validator archive."
183
        logger.error(info_tutor)
184
        raise JobException(info_student=info_student, info_tutor=info_tutor)
185
186
    if not os.path.exists(job.validator_script_name):
187
        if did_unpack:
188
            # The download was an archive, but the validator was not inside.
189
            # This is a failure of the tutor.
190
            info_student = "Internal error with the validator. Please contact your course responsible."
191
            info_tutor = "Error: Missing validator.py in the validator archive."
192
            logger.error(info_tutor)
193
            raise JobException(info_student=info_student, info_tutor=info_tutor)
194
        else:
195
            # The download is already the script, but has the wrong name
196
            logger.warning("Renaming {0} to {1}.".format(validator_path, job.validator_script_name))
197
            shutil.move(validator_path, job.validator_script_name)
198
199
def has_file(dir, fname):
200
    return os.path.exists(dir + os.sep + fname)
201