Completed
Push — master ( eecf08...313cfe )
by Paolo
15s queued 12s
created

excel.helpers.fill_uid.upload_template()   A

Complexity

Conditions 3

Size

Total Lines 62
Code Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 30
dl 0
loc 62
rs 9.16
c 0
b 0
f 0
cc 3
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Jul  5 16:37:48 2019
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
import logging
10
11
from common.constants import (
12
    ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
13
from common.helpers import image_timedelta, parse_image_timedelta
14
from image_app.helpers import get_or_create_obj, update_or_create_obj
15
from image_app.models import (
16
    DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Name, Animal,
17
    Sample, DictDevelStage, DictPhysioStage)
18
from submissions.helpers import send_message
19
from validation.helpers import construct_validation_message
20
from validation.models import ValidationSummary
21
22
from .exceptions import ExcelImportError
23
from .exceltemplate import ExcelTemplateReader
24
25
# Get an instance of a logger
26
logger = logging.getLogger(__name__)
27
28
29
def fill_uid_breeds(submission_obj, template):
30
    """Fill DictBreed from a excel record"""
31
32
    logger.info("fill_uid_breeds() started")
33
34
    # ok get languages from submission (useful for translation)
35
    language = submission_obj.gene_bank_country.label
36
37
    # iterate among excel template
38
    for record in template.get_breed_records():
39
        # get a DictSpecie object. Species are in latin names, but I can
40
        # find also a common name in translation tables
41
        specie = DictSpecie.get_specie_check_synonyms(
42
            species_label=record.species,
43
            language=language)
44
45
        # get country for breeds. Ideally will be the same of submission,
46
        # however, it could be possible to store data from other contries
47
        country = DictCountry.objects.get(label=record.efabis_breed_country)
48
49
        get_or_create_obj(
50
            DictBreed,
51
            supplied_breed=record.supplied_breed,
52
            specie=specie,
53
            country=country)
54
55
    logger.info("fill_uid_breeds() completed")
56
57
58
def fill_uid_names(submission_obj, template):
59
    """fill Names table from crbanim record"""
60
61
    # debug
62
    logger.info("called fill_uid_names()")
63
64
    # iterate among excel template
65
    for record in template.get_animal_records():
66
        # in the same record I have the sample identifier and animal identifier
67
        # a name record for animal
68
        get_or_create_obj(
69
            Name,
70
            name=record.animal_id_in_data_source,
71
            submission=submission_obj,
72
            owner=submission_obj.owner)
73
74
    # iterate among excel template
75
    for record in template.get_sample_records():
76
        # name record for sample
77
        get_or_create_obj(
78
            Name,
79
            name=record.sample_id_in_data_source,
80
            submission=submission_obj,
81
            owner=submission_obj.owner)
82
83
    logger.info("fill_uid_names() completed")
84
85
86
def fill_uid_animals(submission_obj, template):
87
    # debug
88
    logger.info("called fill_uid_animals()")
89
90
    # get language
91
    language = submission_obj.gene_bank_country.label
92
93
    # iterate among excel template
94
    for record in template.get_animal_records():
95
        # determine sex. Check for values
96
        sex = DictSex.objects.get(label__iexact=record.sex)
97
98
        # get specie (mind synonyms)
99
        specie = DictSpecie.get_specie_check_synonyms(
100
            species_label=record.species, language=language)
101
102
        logger.debug("Found '%s' as specie" % (specie))
103
104
        # how I can get breed from my data?
105
        breeds = [breed for breed in template.get_breed_records()
106
                  if breed.supplied_breed == record.breed and
107
                  breed.species == record.species]
108
109
        # breed is supposed to be unique, from UID constraints. However
110
        # I could place the same breed name for two countries. In that case,
111
        # I cant derive a unique breed from users data
112
        if len(breeds) != 1:
113
            raise ExcelImportError(
114
                "Can't determine a unique breed for '%s:%s' from user data" %
115
                (record.breed, record.species))
116
117
        # get a country for this breed
118
        country = DictCountry.objects.get(
119
            label=breeds[0].efabis_breed_country)
120
121
        # ok get a real dictbreed object
122
        breed = DictBreed.objects.get(
123
            supplied_breed=record.breed,
124
            specie=specie,
125
            country=country)
126
127
        logger.debug("Selected breed is %s" % (breed))
128
129
        # define names
130
        name, mother, father = None, None, None
131
132
        # get name for this animal and for mother and father
133
        logger.debug("Getting %s as my name" % (
134
            record.animal_id_in_data_source))
135
136
        name = Name.objects.get(
137
            name=record.animal_id_in_data_source,
138
            submission=submission_obj)
139
140
        if record.father_id_in_data_source:
141
            logger.debug("Getting %s as father" % (
142
                record.father_id_in_data_source))
143
144
            father = Name.objects.get(
145
                name=record.father_id_in_data_source,
146
                submission=submission_obj)
147
148
        if record.mother_id_in_data_source:
149
            logger.debug("Getting %s as mother" % (
150
                record.mother_id_in_data_source))
151
152
            mother = Name.objects.get(
153
                name=record.mother_id_in_data_source,
154
                submission=submission_obj)
155
156
        # now get accuracy
157
        accuracy = ACCURACIES.get_value_by_desc(
158
            record.birth_location_accuracy)
159
160
        # create a new object. Using defaults to avoid collisions when
161
        # updating data
162
        defaults = {
163
            'alternative_id': record.alternative_animal_id,
164
            'description': record.animal_description,
165
            'breed': breed,
166
            'sex': sex,
167
            'father': father,
168
            'mother': mother,
169
            'birth_date': record.birth_date,
170
            'birth_location': record.birth_location,
171
            'birth_location_latitude': record.birth_location_latitude,
172
            'birth_location_longitude': record.birth_location_longitude,
173
            'birth_location_accuracy': accuracy,
174
            'owner': submission_obj.owner
175
        }
176
177
        # creating or updating an object
178
        update_or_create_obj(
179
            Animal,
180
            name=name,
181
            defaults=defaults)
182
183
    # create a validation summary object and set all_count
184
    validation_summary = get_or_create_obj(
185
        ValidationSummary,
186
        submission=submission_obj,
187
        type="animal")
188
189
    # reset counts
190
    validation_summary.reset_all_count()
191
192
    # debug
193
    logger.info("fill_uid_animals() completed")
194
195
196
def fill_uid_samples(submission_obj, template):
197
    # debug
198
    logger.info("called fill_uid_samples()")
199
200
    # iterate among excel template
201
    for record in template.get_sample_records():
202
        # get name for this sample
203
        name = Name.objects.get(
204
            name=record.sample_id_in_data_source,
205
            submission=submission_obj,
206
            owner=submission_obj.owner)
207
208
        # get animal by reading record
209
        animal = Animal.objects.get(
210
            name__name=record.animal_id_in_data_source,
211
            name__submission=submission_obj)
212
213
        # get a organism part. Organism parts need to be in lowercases
214
        organism_part = get_or_create_obj(
215
            DictUberon,
216
            label=record.organism_part
217
        )
218
219
        # get developmental_stage and physiological_stage terms
220
        # they are not mandatory
221
        devel_stage, physio_stage = None, None
222
223
        if record.developmental_stage:
224
            devel_stage = get_or_create_obj(
225
                DictDevelStage,
226
                label=record.developmental_stage
227
            )
228
229
        if record.physiological_stage:
230
            physio_stage = get_or_create_obj(
231
                DictPhysioStage,
232
                label=record.physiological_stage
233
            )
234
235
        # animal age could be present or not
236
        if record.animal_age_at_collection:
237
            animal_age_at_collection, time_units = parse_image_timedelta(
238
                record.animal_age_at_collection)
239
240
        else:
241
            # derive animal age at collection
242
            animal_age_at_collection, time_units = image_timedelta(
243
                record.collection_date, animal.birth_date)
244
245
        # another time column
246
        preparation_interval, preparation_interval_units = None, None
247
248
        if record.sampling_to_preparation_interval:
249
            preparation_interval, preparation_interval_units = \
250
                parse_image_timedelta(record.sampling_to_preparation_interval)
251
252
        # now get accuracy
253
        accuracy = ACCURACIES.get_value_by_desc(
254
            record.collection_place_accuracy)
255
256
        # now get storage and storage processing
257
        # TODO; check those values in excel columns
258
        storage = SAMPLE_STORAGE.get_value_by_desc(
259
            record.sample_storage)
260
261
        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
262
            record.sample_storage_processing)
263
264
        # create a new object. Using defaults to avoid collisions when
265
        # updating data
266
        defaults = {
267
            'alternative_id': record.alternative_sample_id,
268
            'description': record.sample_description,
269
            'animal': animal,
270
            'protocol': record.specimen_collection_protocol,
271
            'collection_date': record.collection_date,
272
            'collection_place_latitude': record.collection_place_latitude,
273
            'collection_place_longitude': record.collection_place_longitude,
274
            'collection_place': record.collection_place,
275
            'collection_place_accuracy': accuracy,
276
            'organism_part': organism_part,
277
            'developmental_stage': devel_stage,
278
            'physiological_stage': physio_stage,
279
            'animal_age_at_collection': animal_age_at_collection,
280
            'animal_age_at_collection_units': time_units,
281
            'availability': record.availability,
282
            'storage': storage,
283
            'storage_processing': storage_processing,
284
            'preparation_interval': preparation_interval,
285
            'preparation_interval_units': preparation_interval_units,
286
            'owner': submission_obj.owner,
287
        }
288
289
        update_or_create_obj(
290
            Sample,
291
            name=name,
292
            defaults=defaults)
293
294
    # create a validation summary object and set all_count
295
    validation_summary = get_or_create_obj(
296
        ValidationSummary,
297
        submission=submission_obj,
298
        type="sample")
299
300
    # reset counts
301
    validation_summary.reset_all_count()
302
303
    # debug
304
    logger.info("fill_uid_samples() completed")
305
306
307
def check_UID(submission_obj, reader):
308
    # check for species and sex in a similar way as cryoweb does
309
    # TODO: identical to CRBanim. Move to a mixin
310
    check, not_found = reader.check_sex()
311
312
    # check sex
313
    if not check:
314
        message = (
315
            "Not all Sex terms are loaded into database: "
316
            "check for '%s' in your dataset" % (not_found))
317
318
        raise ExcelImportError(message)
319
320
    check, not_found = reader.check_species(
321
        submission_obj.gene_bank_country)
322
323
    # check species and related
324
    if not check:
325
        raise ExcelImportError(
326
            "Some species are not loaded into database: "
327
            "check for '%s' in your dataset" % (not_found))
328
329
    check, not_found = reader.check_species_in_animal_sheet()
330
331
    if not check:
332
        raise ExcelImportError(
333
            "Some species are not defined in breed sheet: "
334
            "check for '%s' in your dataset" % (not_found))
335
336
    # check countries
337
    check, not_found = reader.check_countries()
338
339
    if not check:
340
        raise ExcelImportError(
341
            "Those countries are not loaded in database: "
342
            "check for '%s' in your dataset" % (not_found))
343
344
    # check accuracies
345
    check, not_found = reader.check_accuracies()
346
347
    if not check:
348
        message = (
349
            "Not all accuracy levels are defined in database: "
350
            "check for '%s' in your dataset" % (not_found))
351
352
        raise ExcelImportError(message)
353
354
355
def upload_template(submission_obj):
356
    # debug
357
    logger.info("Importing from Excel template file")
358
359
    # this is the full path in docker container
360
    fullpath = submission_obj.get_uploaded_file_path()
361
362
    # read submission data
363
    reader = ExcelTemplateReader()
364
    reader.read_file(fullpath)
365
366
    # start data loading
367
    try:
368
        # check UID data like cryoweb does
369
        check_UID(submission_obj, reader)
370
371
        # BREEDS
372
        fill_uid_breeds(submission_obj, reader)
373
374
        # NAME
375
        fill_uid_names(submission_obj, reader)
376
377
        # ANIMALS
378
        fill_uid_animals(submission_obj, reader)
379
380
        # SAMPLES
381
        fill_uid_samples(submission_obj, reader)
382
383
    except Exception as exc:
384
        # set message:
385
        message = "Error in importing data: %s" % (str(exc))
386
387
        # save a message in database
388
        submission_obj.status = ERROR
389
        submission_obj.message = message
390
        submission_obj.save()
391
392
        # send async message
393
        send_message(submission_obj)
394
395
        # debug
396
        logger.error("Error in importing from Template: %s" % (exc))
397
        logger.exception(exc)
398
399
        return False
400
401
    else:
402
        message = "Template import completed for submission: %s" % (
403
            submission_obj.id)
404
405
        submission_obj.message = message
406
        submission_obj.status = LOADED
407
        submission_obj.save()
408
409
        # send async message
410
        send_message(
411
            submission_obj,
412
            validation_message=construct_validation_message(submission_obj))
413
414
    logger.info("Import from Template is complete")
415
416
    return True
417