1
|
|
|
from django.db import models |
2
|
|
|
from django.utils import timezone |
3
|
|
|
from django.core.urlresolvers import reverse |
4
|
|
|
|
5
|
|
|
from django.conf import settings |
6
|
|
|
|
7
|
|
|
import zipfile |
8
|
|
|
import tarfile |
9
|
|
|
import unicodedata |
10
|
|
|
import os |
11
|
|
|
import hashlib |
12
|
|
|
|
13
|
|
|
import logging |
14
|
|
|
logger = logging.getLogger('OpenSubmit') |
15
|
|
|
|
16
|
|
|
|
17
|
|
|
def upload_path(instance, filename): |
18
|
|
|
''' |
19
|
|
|
Sanitize the user-provided file name, add timestamp for uniqness. |
20
|
|
|
''' |
21
|
|
|
|
22
|
|
|
filename = filename.replace(" ", "_") |
23
|
|
|
filename = unicodedata.normalize('NFKD', filename).lower() |
24
|
|
|
return os.path.join(str(timezone.now().date().isoformat()), filename) |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
class ValidSubmissionFileManager(models.Manager): |
28
|
|
|
''' |
29
|
|
|
A model manager used by SubmissionFile. It returns only submission files |
30
|
|
|
that were not replaced, for submission that were not withdrawn. |
31
|
|
|
''' |
32
|
|
|
|
33
|
|
|
def get_queryset(self): |
34
|
|
|
from .submission import Submission |
35
|
|
|
return super(ValidSubmissionFileManager, self).get_queryset().filter(replaced_by=None).exclude(submissions__state=Submission.WITHDRAWN).exclude(submissions=None) |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
class SubmissionFile(models.Model): |
39
|
|
|
''' |
40
|
|
|
A file attachment for a student submission. File attachments may be replaced |
41
|
|
|
by the student, but we keep the original version for some NSA-style data gathering. |
42
|
|
|
The "fetched" field defines the time stamp when the file was fetched for |
43
|
|
|
checking by some executor. On result retrieval, this timestamp is emptied |
44
|
|
|
again, which allows to find 'stucked' executor jobs on the server side. |
45
|
|
|
The "md5" field keeps a checksum of the file upload, for duplicate detection. |
46
|
|
|
''' |
47
|
|
|
|
48
|
|
|
attachment = models.FileField( |
49
|
|
|
upload_to=upload_path, verbose_name="File upload") |
50
|
|
|
original_filename = models.CharField(max_length=255, default='student.upload') |
51
|
|
|
fetched = models.DateTimeField(editable=False, null=True) |
52
|
|
|
replaced_by = models.ForeignKey( |
53
|
|
|
'SubmissionFile', null=True, blank=True, editable=False) |
54
|
|
|
md5 = models.CharField(max_length=36, null=True, |
55
|
|
|
blank=True, editable=False) |
56
|
|
|
|
57
|
|
|
class Meta: |
58
|
|
|
app_label = 'opensubmit' |
59
|
|
|
|
60
|
|
|
def __str__(self): |
61
|
|
|
return self.attachment.name |
62
|
|
|
|
63
|
|
|
def attachment_md5(self): |
64
|
|
|
''' |
65
|
|
|
Calculate the checksum of the file upload. |
66
|
|
|
For binary files (e.g. PDFs), the MD5 of the file itself is used. |
67
|
|
|
|
68
|
|
|
Archives are unpacked and the MD5 is generated from the sanitized textfiles |
69
|
|
|
in the archive. This is done with some smartness: |
70
|
|
|
- Whitespace and tabs are removed before comparison. |
71
|
|
|
- For MD5, ordering is important, so we compute it on the sorted list of |
72
|
|
|
file hashes. |
73
|
|
|
''' |
74
|
|
|
MAX_MD5_FILE_SIZE = 10000 |
75
|
|
|
md5_set = [] |
76
|
|
|
|
77
|
|
|
def md5_add_text(text): |
78
|
|
|
try: |
79
|
|
|
text = str(text, errors='ignore') |
80
|
|
|
text = text.replace(' ', '').replace( |
81
|
|
|
'\n', '').replace('\t', '') |
82
|
|
|
hexvalues = hashlib.md5(text.encode('utf-8')).hexdigest() |
83
|
|
|
md5_set.append(hexvalues) |
84
|
|
|
except Exception as e: |
85
|
|
|
# not unicode decodable |
86
|
|
|
pass |
87
|
|
|
|
88
|
|
|
def md5_add_file(f): |
89
|
|
|
try: |
90
|
|
|
md5 = hashlib.md5() |
91
|
|
|
for chunk in f.chunks(): |
92
|
|
|
md5.update(chunk) |
93
|
|
|
md5_set.append(md5.hexdigest()) |
94
|
|
|
except Exception: |
95
|
|
|
pass |
96
|
|
|
|
97
|
|
|
try: |
98
|
|
|
if zipfile.is_zipfile(self.attachment.path): |
99
|
|
|
zf = zipfile.ZipFile(self.attachment.path, 'r') |
100
|
|
|
for zipinfo in zf.infolist(): |
101
|
|
|
if zipinfo.file_size < MAX_MD5_FILE_SIZE: |
102
|
|
|
md5_add_text(zf.read(zipinfo)) |
103
|
|
|
elif tarfile.is_tarfile(self.attachment.path): |
104
|
|
|
tf = tarfile.open(self.attachment.path, 'r') |
105
|
|
|
for tarinfo in tf.getmembers(): |
106
|
|
|
if tarinfo.isfile(): |
107
|
|
|
if tarinfo.size < MAX_MD5_FILE_SIZE: |
108
|
|
|
md5_add_text(tf.extractfile(tarinfo).read()) |
109
|
|
|
else: |
110
|
|
|
md5_add_file(self.attachment) |
111
|
|
|
except Exception as e: |
112
|
|
|
logger.warning( |
113
|
|
|
"Exception on archive MD5 computation, using file checksum: " + str(e)) |
114
|
|
|
|
115
|
|
|
result = hashlib.md5( |
116
|
|
|
''.join(sorted(md5_set)).encode('utf-8')).hexdigest() |
117
|
|
|
return result |
118
|
|
|
|
119
|
|
|
def basename(self): |
120
|
|
|
return self.attachment.name[self.attachment.name.rfind('/') + 1:] |
121
|
|
|
|
122
|
|
|
def get_absolute_url(self): |
123
|
|
|
# To realize access protection for student files, |
124
|
|
|
# we implement our own download method here. |
125
|
|
|
# This implies that the Apache media serving (MEDIA_URL) is disabled. |
126
|
|
|
assert(len(self.submissions.all()) > 0) |
127
|
|
|
return reverse('submission_attachment_file', args=(self.submissions.all()[0].pk,)) |
128
|
|
|
|
129
|
|
|
def get_preview_url(self): |
130
|
|
|
if self.submissions.all(): |
131
|
|
|
return reverse('preview', args=(self.submissions.all()[0].pk,)) |
132
|
|
|
else: |
133
|
|
|
return None |
134
|
|
|
|
135
|
|
|
def absolute_path(self): |
136
|
|
|
return settings.MEDIA_ROOT + "/" + self.attachment.name |
137
|
|
|
|
138
|
|
|
def is_executed(self): |
139
|
|
|
return self.fetched is not None |
140
|
|
|
|
141
|
|
|
def is_archive(self): |
142
|
|
|
''' |
143
|
|
|
Determines if the attachment is an archive. |
144
|
|
|
''' |
145
|
|
|
try: |
146
|
|
|
if zipfile.is_zipfile(self.attachment.path) or tarfile.is_tarfile(self.attachment.path): |
147
|
|
|
return True |
148
|
|
|
except Exception: |
149
|
|
|
pass |
150
|
|
|
return False |
151
|
|
|
|
152
|
|
|
def previews(self): |
153
|
|
|
''' |
154
|
|
|
Return preview on archive file / single file content as dictionary. |
155
|
|
|
In order to avoid browser and web server trashing by the students, |
156
|
|
|
there is a size limit for the single files shown. |
157
|
|
|
''' |
158
|
|
|
MAX_PREVIEW_SIZE = 1000000 |
159
|
|
|
|
160
|
|
|
def sanitize(bytes): |
161
|
|
|
return bytes.decode('utf-8', 'ignore') |
162
|
|
|
|
163
|
|
|
def is_code(fname): |
164
|
|
|
code_endings = ['.c', '.cpp', 'Makefile', |
165
|
|
|
'.java', '.py', '.rb', '.js'] |
166
|
|
|
for ending in code_endings: |
167
|
|
|
if fname.endswith(ending): |
168
|
|
|
return True |
169
|
|
|
return False |
170
|
|
|
|
171
|
|
|
result = [] |
172
|
|
|
if zipfile.is_zipfile(self.attachment.path): |
173
|
|
|
zf = zipfile.ZipFile(self.attachment.path, 'r') |
174
|
|
|
for zipinfo in zf.infolist(): |
175
|
|
|
if zipinfo.file_size < MAX_PREVIEW_SIZE: |
176
|
|
|
result.append({'name': zipinfo.filename, 'is_code': is_code( |
177
|
|
|
zipinfo.filename), 'preview': sanitize(zf.read(zipinfo))}) |
178
|
|
|
else: |
179
|
|
|
result.append( |
180
|
|
|
{'name': zipinfo.filename, 'is_code': False, 'preview': '(maximum size exceeded)'}) |
181
|
|
|
elif tarfile.is_tarfile(self.attachment.path): |
182
|
|
|
tf = tarfile.open(self.attachment.path, 'r') |
183
|
|
|
for tarinfo in tf.getmembers(): |
184
|
|
|
if tarinfo.isfile(): |
185
|
|
|
if tarinfo.size < MAX_PREVIEW_SIZE: |
186
|
|
|
result.append({'name': tarinfo.name, 'is_code': is_code( |
187
|
|
|
tarinfo.name), 'preview': sanitize(tf.extractfile(tarinfo).read())}) |
188
|
|
|
else: |
189
|
|
|
result.append( |
190
|
|
|
{'name': tarinfo.name, 'is_code': False, 'preview': '(maximum size exceeded)'}) |
191
|
|
|
else: |
192
|
|
|
# single file |
193
|
|
|
f = open(self.attachment.path, 'rb') |
194
|
|
|
fname = f.name[f.name.rfind(os.sep) + 1:] |
195
|
|
|
result = [{'name': fname, 'is_code': is_code( |
196
|
|
|
fname), 'preview': sanitize(f.read())}, ] |
197
|
|
|
return result |
198
|
|
|
|
199
|
|
|
def test_result_dict(self): |
200
|
|
|
''' |
201
|
|
|
Create a compact data structure representation of all result |
202
|
|
|
types for this file. |
203
|
|
|
|
204
|
|
|
Returns a dictionary where the keys are the result types, and |
205
|
|
|
the values are dicts of all the other result information. |
206
|
|
|
''' |
207
|
|
|
list_of_dicts = list(self.test_results.all().values()) |
208
|
|
|
return {entry['kind']: {'result': entry['result']} for entry in list_of_dicts} |
209
|
|
|
|
210
|
|
|
objects = models.Manager() |
211
|
|
|
valid_ones = ValidSubmissionFileManager() |
212
|
|
|
|