Completed
Push — master ( 4f7ee6...646424 )
by Paolo
08:30 queued 06:53
created

excel.helpers.fill_uid.parse_times()   B

Complexity

Conditions 5

Size

Total Lines 41
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 28
dl 0
loc 41
rs 8.7413
c 0
b 0
f 0
cc 5
nop 2
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Jul  5 16:37:48 2019
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
import logging
10
11
from common.constants import (
12
    ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
13
from common.helpers import image_timedelta, parse_image_timedelta
14
from uid.helpers import get_or_create_obj, update_or_create_obj
15
from uid.models import (
16
    DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Animal,
17
    Sample, DictDevelStage, DictPhysioStage)
18
from submissions.helpers import send_message
19
from validation.helpers import construct_validation_message
20
from validation.models import ValidationSummary
21
22
from .exceptions import ExcelImportError
23
from .exceltemplate import ExcelTemplateReader
24
25
# Get an instance of a logger
26
logger = logging.getLogger(__name__)
27
28
29
def fill_uid_breeds(submission_obj, template):
30
    """Fill DictBreed from a excel record"""
31
32
    logger.info("fill_uid_breeds() started")
33
34
    # ok get languages from submission (useful for translation)
35
    language = submission_obj.gene_bank_country.label
36
37
    # iterate among excel template
38
    for record in template.get_breed_records():
39
        # get a DictSpecie object. Species are in latin names, but I can
40
        # find also a common name in translation tables
41
        specie = DictSpecie.get_specie_check_synonyms(
42
            species_label=record.species,
43
            language=language)
44
45
        # get country for breeds. Ideally will be the same of submission,
46
        # however, it could be possible to store data from other contries
47
        country = DictCountry.objects.get(label=record.efabis_breed_country)
48
49
        get_or_create_obj(
50
            DictBreed,
51
            supplied_breed=record.supplied_breed,
52
            specie=specie,
53
            country=country)
54
55
    logger.info("fill_uid_breeds() completed")
56
57
58
def fill_uid_animals(submission_obj, template):
59
    # debug
60
    logger.info("called fill_uid_animals()")
61
62
    # get language
63
    language = submission_obj.gene_bank_country.label
64
65
    # iterate among excel template
66
    for record in template.get_animal_records():
67
        # determine sex. Check for values
68
        sex = DictSex.objects.get(label__iexact=record.sex)
69
70
        # get specie (mind synonyms)
71
        specie = DictSpecie.get_specie_check_synonyms(
72
            species_label=record.species, language=language)
73
74
        logger.debug("Found '%s' as specie" % (specie))
75
76
        # how I can get breed from my data?
77
        breed_record = template.get_breed_from_animal(record)
78
79
        # get a country for this breed
80
        country = DictCountry.objects.get(
81
            label=breed_record.efabis_breed_country)
82
83
        # ok get a real dictbreed object
84
        breed = DictBreed.objects.get(
85
            supplied_breed=breed_record.supplied_breed,
86
            specie=specie,
87
            country=country)
88
89
        logger.debug("Selected breed is %s" % (breed))
90
91
        # define mother and father
92
        mother, father = None, None
93
94
        # get name for this animal and for mother and father
95
        if record.father_id_in_data_source:
96
            logger.debug("Getting %s as father" % (
97
                record.father_id_in_data_source))
98
99
            father = Animal.objects.get(
100
                name=record.father_id_in_data_source,
101
                breed=breed,
102
                owner=submission_obj.owner)
103
104
        if record.mother_id_in_data_source:
105
            logger.debug("Getting %s as mother" % (
106
                record.mother_id_in_data_source))
107
108
            mother = Animal.objects.get(
109
                name=record.mother_id_in_data_source,
110
                breed=breed,
111
                owner=submission_obj.owner)
112
113
        # now get accuracy
114
        accuracy = ACCURACIES.get_value_by_desc(
115
            record.birth_location_accuracy)
116
117
        # create a new object. Using defaults to avoid collisions when
118
        # updating data
119
        defaults = {
120
            'alternative_id': record.alternative_animal_id,
121
            'description': record.animal_description,
122
            'sex': sex,
123
            'father': father,
124
            'mother': mother,
125
            'birth_date': record.birth_date,
126
            'birth_location': record.birth_location,
127
            'birth_location_latitude': record.birth_location_latitude,
128
            'birth_location_longitude': record.birth_location_longitude,
129
            'birth_location_accuracy': accuracy,
130
        }
131
132
        # creating or updating an object
133
        update_or_create_obj(
134
            Animal,
135
            name=record.animal_id_in_data_source,
136
            breed=breed,
137
            owner=submission_obj.owner,
138
            submission=submission_obj,
139
            defaults=defaults)
140
141
    # create a validation summary object and set all_count
142
    validation_summary = get_or_create_obj(
143
        ValidationSummary,
144
        submission=submission_obj,
145
        type="animal")
146
147
    # reset counts
148
    validation_summary.reset_all_count()
149
150
    # debug
151
    logger.info("fill_uid_animals() completed")
152
153
154
def parse_times(record, animal):
155
    """Try to deal with times in excel templates"""
156
157
    animal_age_at_collection, time_units = None, None
158
159
    # animal age could be present or not
160
    try:
161
        if record.animal_age_at_collection:
162
            animal_age_at_collection, time_units = parse_image_timedelta(
163
                record.animal_age_at_collection)
164
165
        else:
166
            # derive animal age at collection
167
            animal_age_at_collection, time_units = image_timedelta(
168
                record.collection_date, animal.birth_date)
169
170
    except ValueError as exc:
171
        message = (
172
            "Error for Sample '%s' at animal_age_at_collection column: %s" % (
173
                    record.sample_id_in_data_source, exc))
174
        logger.error(message)
175
        raise ExcelImportError(message)
176
177
    # another time column
178
    preparation_interval, preparation_interval_units = None, None
179
180
    try:
181
        if record.sampling_to_preparation_interval:
182
            preparation_interval, preparation_interval_units = \
183
                parse_image_timedelta(record.sampling_to_preparation_interval)
184
185
    except ValueError as exc:
186
        message = (
187
            "Error for Sample '%s' at sampling_to_preparation_interval "
188
            "column: %s" % (
189
                record.sample_id_in_data_source, exc))
190
        logger.error(message)
191
        raise ExcelImportError(message)
192
193
    return (animal_age_at_collection, time_units, preparation_interval,
194
            preparation_interval_units)
195
196
197
def fill_uid_samples(submission_obj, template):
198
    # debug
199
    logger.info("called fill_uid_samples()")
200
201
    # get language
202
    language = submission_obj.gene_bank_country.label
203
204
    # iterate among excel template
205
    for record in template.get_sample_records():
206
        # get animal by reading record
207
        animal_record = template.get_animal_from_sample(record)
208
209
        # get specie (mind synonyms)
210
        specie = DictSpecie.get_specie_check_synonyms(
211
            species_label=animal_record.species,
212
            language=language)
213
214
        logger.debug("Found '%s' as specie" % (specie))
215
216
        # get breed from animal record
217
        breed_record = template.get_breed_from_animal(animal_record)
218
219
        # get a country for this breed
220
        country = DictCountry.objects.get(
221
            label=breed_record.efabis_breed_country)
222
223
        # ok get a real dictbreed object
224
        breed = DictBreed.objects.get(
225
            supplied_breed=breed_record.supplied_breed,
226
            specie=specie,
227
            country=country)
228
229
        logger.debug("Selected breed is %s" % (breed))
230
231
        animal = Animal.objects.get(
232
            name=animal_record.animal_id_in_data_source,
233
            breed=breed,
234
            owner=submission_obj.owner)
235
236
        logger.debug("Selected animal is %s" % (animal))
237
238
        # get a organism part. Organism parts need to be in lowercases
239
        organism_part = get_or_create_obj(
240
            DictUberon,
241
            label=record.organism_part
242
        )
243
244
        # get developmental_stage and physiological_stage terms
245
        # they are not mandatory
246
        devel_stage, physio_stage = None, None
247
248
        if record.developmental_stage:
249
            devel_stage = get_or_create_obj(
250
                DictDevelStage,
251
                label=record.developmental_stage
252
            )
253
254
        if record.physiological_stage:
255
            physio_stage = get_or_create_obj(
256
                DictPhysioStage,
257
                label=record.physiological_stage
258
            )
259
260
        # deal with time columns
261
        (animal_age_at_collection, time_units, preparation_interval,
262
         preparation_interval_units) = parse_times(record, animal)
263
264
        # now get accuracy
265
        accuracy = ACCURACIES.get_value_by_desc(
266
            record.collection_place_accuracy)
267
268
        # now get storage and storage processing
269
        # TODO; check those values in excel columns
270
        storage = SAMPLE_STORAGE.get_value_by_desc(
271
            record.sample_storage)
272
273
        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
274
            record.sample_storage_processing)
275
276
        # create a new object. Using defaults to avoid collisions when
277
        # updating data
278
        defaults = {
279
            'alternative_id': record.alternative_sample_id,
280
            'description': record.sample_description,
281
            'protocol': record.specimen_collection_protocol,
282
            'collection_date': record.collection_date,
283
            'collection_place_latitude': record.collection_place_latitude,
284
            'collection_place_longitude': record.collection_place_longitude,
285
            'collection_place': record.collection_place,
286
            'collection_place_accuracy': accuracy,
287
            'organism_part': organism_part,
288
            'developmental_stage': devel_stage,
289
            'physiological_stage': physio_stage,
290
            'animal_age_at_collection': animal_age_at_collection,
291
            'animal_age_at_collection_units': time_units,
292
            'availability': record.availability,
293
            'storage': storage,
294
            'storage_processing': storage_processing,
295
            'preparation_interval': preparation_interval,
296
            'preparation_interval_units': preparation_interval_units,
297
        }
298
299
        update_or_create_obj(
300
            Sample,
301
            name=record.sample_id_in_data_source,
302
            animal=animal,
303
            owner=submission_obj.owner,
304
            submission=submission_obj,
305
            defaults=defaults)
306
307
    # create a validation summary object and set all_count
308
    validation_summary = get_or_create_obj(
309
        ValidationSummary,
310
        submission=submission_obj,
311
        type="sample")
312
313
    # reset counts
314
    validation_summary.reset_all_count()
315
316
    # debug
317
    logger.info("fill_uid_samples() completed")
318
319
320
def check_UID(submission_obj, reader):
321
    # check for species and sex in a similar way as cryoweb does
322
    # TODO: identical to CRBanim. Move to a mixin
323
    check, not_found = reader.check_sex()
324
325
    # check sex
326
    if not check:
327
        message = (
328
            "Not all Sex terms are loaded into database: "
329
            "check for '%s' in your dataset" % (not_found))
330
331
        raise ExcelImportError(message)
332
333
    check, not_found = reader.check_species(
334
        submission_obj.gene_bank_country)
335
336
    # check species and related
337
    if not check:
338
        raise ExcelImportError(
339
            "Some species are not loaded into database: "
340
            "check for '%s' in your dataset" % (not_found))
341
342
    check, not_found = reader.check_species_in_animal_sheet()
343
344
    if not check:
345
        raise ExcelImportError(
346
            "Some species are not defined in breed sheet: "
347
            "check for '%s' in your dataset" % (not_found))
348
349
    # check countries
350
    check, not_found = reader.check_countries()
351
352
    if not check:
353
        raise ExcelImportError(
354
            "Those countries are not loaded in database: "
355
            "check for '%s' in your dataset" % (not_found))
356
357
    # check accuracies
358
    check, not_found = reader.check_accuracies()
359
360
    if not check:
361
        message = (
362
            "Not all accuracy levels are defined in database: "
363
            "check for '%s' in your dataset" % (not_found))
364
365
        raise ExcelImportError(message)
366
367
368
def upload_template(submission_obj):
369
    # debug
370
    logger.info("Importing from Excel template file")
371
372
    # this is the full path in docker container
373
    fullpath = submission_obj.get_uploaded_file_path()
374
375
    # read submission data
376
    reader = ExcelTemplateReader()
377
    reader.read_file(fullpath)
378
379
    # start data loading
380
    try:
381
        # check UID data like cryoweb does
382
        check_UID(submission_obj, reader)
383
384
        # BREEDS
385
        fill_uid_breeds(submission_obj, reader)
386
387
        # ANIMALS
388
        fill_uid_animals(submission_obj, reader)
389
390
        # SAMPLES
391
        fill_uid_samples(submission_obj, reader)
392
393
    except Exception as exc:
394
        # set message:
395
        message = "Error in importing data: %s" % (str(exc))
396
397
        # save a message in database
398
        submission_obj.status = ERROR
399
        submission_obj.message = message
400
        submission_obj.save()
401
402
        # send async message
403
        send_message(submission_obj)
404
405
        # debug
406
        logger.error("Error in importing from Template: %s" % (exc))
407
        logger.exception(exc)
408
409
        return False
410
411
    else:
412
        message = "Template import completed for submission: %s" % (
413
            submission_obj.id)
414
415
        submission_obj.message = message
416
        submission_obj.status = LOADED
417
        submission_obj.save()
418
419
        # send async message
420
        send_message(
421
            submission_obj,
422
            validation_message=construct_validation_message(submission_obj))
423
424
    logger.info("Import from Template is complete")
425
426
    return True
427