Passed
Pull Request — master (#56)
by Paolo
05:42
created

SubmissionFormMixin.check_template_file()   B

Complexity

Conditions 6

Size

Total Lines 40
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 22
dl 0
loc 40
rs 8.4186
c 0
b 0
f 0
cc 6
nop 1
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Tue Jul 24 15:51:05 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
import magic
10
import tempfile
11
12
from django import forms
13
14
from common.forms import RequestFormMixin
15
from common.constants import CRB_ANIM_TYPE, TEMPLATE_TYPE
16
from image_app.models import Submission
17
from crbanim.helpers import CRBAnimReader
18
from excel.helpers import ExcelTemplateReader
19
20
21
class SubmissionFormMixin():
22
    def clean(self):
23
        # I can call this method without providing a 'uploaded file'
24
        # (for instance, when omitting uploaded file)
25
        if "uploaded_file" in self.cleaned_data:
26
            # avoid file type for excel types (is not an text file)
27
            if ("datasource_type" in self.cleaned_data and
28
                    self.cleaned_data["datasource_type"] != TEMPLATE_TYPE):
29
                self.check_file_encoding()
30
31
            # check crbanim files only if provided
32
            if ("datasource_type" in self.cleaned_data and
33
                    self.cleaned_data["datasource_type"] == CRB_ANIM_TYPE):
34
                self.check_crbanim_columns()
35
36
            # check template files only if provided
37
            if ("datasource_type" in self.cleaned_data and
38
                    self.cleaned_data["datasource_type"] == TEMPLATE_TYPE):
39
                self.check_template_file()
40
41
    def check_file_encoding(self):
42
        uploaded_file = self.cleaned_data['uploaded_file']
43
44
        # read one chunk of such file
45
        chunk = next(uploaded_file.chunks())
46
        magic_line = magic.from_buffer(chunk)
47
        file_type = magic_line.split(",")[0]
48
49
        if "UTF-8" not in file_type and "ASCII" not in file_type:
50
            # create message and add error
51
            msg = (
52
                "Error: file not in UTF-8 nor ASCII format: "
53
                "format was %s" % file_type)
54
55
            # raising an exception:
56
            raise forms.ValidationError(msg, code='invalid')
57
58
    def check_crbanim_columns(self):
59
        """Check if a CRBanim file has mandatory columns"""
60
61
        uploaded_file = self.cleaned_data['uploaded_file']
62
63
        # read one chunk of such file
64
        chunk = next(uploaded_file.chunks())
65
66
        # now determine if CRBanim file is valid. chunk is in binary format
67
        # neet to convert to a string, fortunately I've already check that
68
        # file is in UTF-8
69
        check, not_found = CRBAnimReader.is_valid(chunk.decode("utf-8"))
70
71
        if check is False:
72
            msg = "Error: file lacks of CRBanim mandatory columns: %s" % (
73
                not_found)
74
75
            # raising an exception:
76
            raise forms.ValidationError(msg, code='invalid')
77
78
    def check_template_file(self):
79
        """Check if template file has columns and sheets"""
80
81
        uploaded_file = self.cleaned_data['uploaded_file']
82
83
        chunk = next(uploaded_file.chunks())
84
        magic_line = magic.from_buffer(chunk)
85
86
        if 'Microsoft' not in magic_line:
87
            msg = "The file you provided is not a Template file"
88
            raise forms.ValidationError(msg, code='invalid')
89
90
        # xlrd can manage only files. Write a temporary file
91
        with tempfile.NamedTemporaryFile(delete=True) as tmpfile:
92
            for chunk in uploaded_file.chunks():
93
                tmpfile.write(chunk)
94
95
            # open the file with proper model
96
            reader = ExcelTemplateReader()
97
            reader.read_file(tmpfile.name)
98
99
            # check that template has at least breed, animal, sample sheets
100
            check, not_found = reader.check_sheets()
101
102
            if check is False:
103
                msg = "Error: file lacks of Template mandatory sheets: %s" % (
104
                    not_found)
105
106
                # raising an exception:
107
                raise forms.ValidationError(msg, code='invalid')
108
109
            # check that template has at least breed, animal, sample sheets
110
            check, not_found = reader.check_columns()
111
112
            if check is False:
113
                msg = "Error: file lacks of Template mandatory columns: %s" % (
114
                    not_found)
115
116
                # raising an exception:
117
                raise forms.ValidationError(msg, code='invalid')
118
119
120
class SubmissionForm(SubmissionFormMixin, RequestFormMixin, forms.ModelForm):
121
    class Meta:
122
        model = Submission
123
        fields = (
124
            'title',
125
            'description',
126
            'gene_bank_name',
127
            'gene_bank_country',
128
            'organization',
129
            'datasource_type',
130
            'datasource_version',
131
            'uploaded_file'
132
        )
133
134
        help_texts = {
135
            'uploaded_file': 'Need to be in UTF-8 or ASCII format',
136
        }
137
138
139
# I use forms.Form since I need to pass primary key as a field,
140
# and I can't use it with a modelform
141
class ReloadForm(SubmissionFormMixin, RequestFormMixin, forms.ModelForm):
142
    # custom attributes
143
    agree_reload = forms.BooleanField(
144
        label="That's fine. Replace my submission data with this file",
145
        help_text="You have to check this box to reload your data")
146
147
    class Meta:
148
        model = Submission
149
        fields = (
150
            'datasource_type',
151
            'datasource_version',
152
            'uploaded_file',
153
        )
154
155
        help_texts = {
156
            'uploaded_file': 'Need to be in UTF-8 or ASCII format',
157
        }
158