excel.helpers.fill_uid.parse_times()   B
last analyzed

Complexity

Conditions 7

Size

Total Lines 42
Code Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 30
dl 0
loc 42
rs 7.76
c 0
b 0
f 0
cc 7
nop 2
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Jul  5 16:37:48 2019
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
import logging
10
11
from common.constants import (
12
    ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
13
from common.helpers import image_timedelta, parse_image_timedelta
14
from uid.helpers import get_or_create_obj, update_or_create_obj
15
from uid.models import (
16
    DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Animal,
17
    Sample, DictDevelStage, DictPhysioStage)
18
from submissions.helpers import send_message
19
from validation.helpers import construct_validation_message
20
from validation.models import ValidationSummary
21
22
from .exceptions import ExcelImportError
23
from .exceltemplate import ExcelTemplateReader
24
25
# Get an instance of a logger
26
logger = logging.getLogger(__name__)
27
28
29
def fill_uid_breeds(submission_obj, template):
30
    """Fill DictBreed from a excel record"""
31
32
    logger.info("fill_uid_breeds() started")
33
34
    # ok get languages from submission (useful for translation)
35
    language = submission_obj.gene_bank_country.label
36
37
    # iterate among excel template
38
    for record in template.get_breed_records():
39
        # get a DictSpecie object. Species are in latin names, but I can
40
        # find also a common name in translation tables
41
        specie = DictSpecie.get_specie_check_synonyms(
42
            species_label=record.species,
43
            language=language)
44
45
        # get country for breeds. Ideally will be the same of submission,
46
        # however, it could be possible to store data from other contries
47
        country = DictCountry.objects.get(label=record.efabis_breed_country)
48
49
        get_or_create_obj(
50
            DictBreed,
51
            supplied_breed=record.supplied_breed,
52
            specie=specie,
53
            country=country)
54
55
    logger.info("fill_uid_breeds() completed")
56
57
58
def get_relationship(animal_id_in_data_source, name, breed, owner):
59
    try:
60
        parent = Animal.objects.get(
61
            name=name,
62
            breed=breed,
63
            owner=owner)
64
65
    except Animal.DoesNotExist as exc:
66
        logger.error(exc)
67
        message = (
68
            "Unknown parent '%s': check animal '%s' "
69
            "in your dataset" % (name, animal_id_in_data_source))
70
        logger.error(message)
71
        raise ExcelImportError(message)
72
73
    return parent
74
75
76
def fill_uid_animals(submission_obj, template):
77
    # debug
78
    logger.info("called fill_uid_animals()")
79
80
    # get language
81
    language = submission_obj.gene_bank_country.label
82
83
    # iterate among excel template
84
    for record in template.get_animal_records():
85
        # determine sex. Check for values
86
        sex = DictSex.objects.get(label__iexact=record.sex)
87
88
        # get specie (mind synonyms)
89
        specie = DictSpecie.get_specie_check_synonyms(
90
            species_label=record.species, language=language)
91
92
        logger.debug("Found '%s' as specie" % (specie))
93
94
        # how I can get breed from my data?
95
        breed_record = template.get_breed_from_animal(record)
96
97
        # get a country for this breed
98
        country = DictCountry.objects.get(
99
            label=breed_record.efabis_breed_country)
100
101
        # ok get a real dictbreed object
102
        breed = DictBreed.objects.get(
103
            supplied_breed=breed_record.supplied_breed,
104
            specie=specie,
105
            country=country)
106
107
        logger.debug("Selected breed is %s" % (breed))
108
109
        # define mother and father
110
        mother, father = None, None
111
112
        # get name for this animal and for mother and father
113
        if record.father_id_in_data_source:
114
            logger.debug("Getting %s as father" % (
115
                record.father_id_in_data_source))
116
117
            father = get_relationship(
118
                record.animal_id_in_data_source,
119
                record.father_id_in_data_source,
120
                breed,
121
                submission_obj.owner)
122
123
        if record.mother_id_in_data_source:
124
            logger.debug("Getting %s as mother" % (
125
                record.mother_id_in_data_source))
126
127
            mother = get_relationship(
128
                record.animal_id_in_data_source,
129
                record.mother_id_in_data_source,
130
                breed,
131
                submission_obj.owner)
132
133
        # now get accuracy
134
        accuracy = ACCURACIES.get_value_by_desc(
135
            record.birth_location_accuracy)
136
137
        # create a new object. Using defaults to avoid collisions when
138
        # updating data
139
        defaults = {
140
            'alternative_id': record.alternative_animal_id,
141
            'description': record.animal_description,
142
            'sex': sex,
143
            'father': father,
144
            'mother': mother,
145
            'birth_date': record.birth_date,
146
            'birth_location': record.birth_location,
147
            'birth_location_latitude': record.birth_location_latitude,
148
            'birth_location_longitude': record.birth_location_longitude,
149
            'birth_location_accuracy': accuracy,
150
        }
151
152
        # creating or updating an object
153
        update_or_create_obj(
154
            Animal,
155
            name=record.animal_id_in_data_source,
156
            breed=breed,
157
            owner=submission_obj.owner,
158
            submission=submission_obj,
159
            defaults=defaults)
160
161
    # create a validation summary object and set all_count
162
    validation_summary = get_or_create_obj(
163
        ValidationSummary,
164
        submission=submission_obj,
165
        type="animal")
166
167
    # reset counts
168
    validation_summary.reset_all_count()
169
170
    # debug
171
    logger.info("fill_uid_animals() completed")
172
173
174
def parse_times(record, animal):
175
    """Try to deal with times in excel templates"""
176
177
    animal_age_at_collection, time_units = None, None
178
179
    # animal age could be present or not
180
    if record.animal_age_at_collection:
181
        try:
182
            animal_age_at_collection, time_units = parse_image_timedelta(
183
                record.animal_age_at_collection)
184
185
        except ValueError as exc:
186
            message = (
187
                "Error for Sample '%s' at animal_age_at_collection "
188
                "column: %s" % (
189
                    record.sample_id_in_data_source, exc))
190
            logger.error(message)
191
            raise ExcelImportError(message)
192
193
    elif record.collection_date and animal.birth_date:
194
        # derive animal age at collection if I have recommended values
195
        animal_age_at_collection, time_units = image_timedelta(
196
            record.collection_date, animal.birth_date)
197
198
    # another time column
199
    preparation_interval, preparation_interval_units = None, None
200
201
    try:
202
        if record.sampling_to_preparation_interval:
203
            preparation_interval, preparation_interval_units = \
204
                parse_image_timedelta(record.sampling_to_preparation_interval)
205
206
    except ValueError as exc:
207
        message = (
208
            "Error for Sample '%s' at sampling_to_preparation_interval "
209
            "column: %s" % (
210
                record.sample_id_in_data_source, exc))
211
        logger.error(message)
212
        raise ExcelImportError(message)
213
214
    return (animal_age_at_collection, time_units, preparation_interval,
215
            preparation_interval_units)
216
217
218
def fill_uid_samples(submission_obj, template):
219
    # debug
220
    logger.info("called fill_uid_samples()")
221
222
    # get language
223
    language = submission_obj.gene_bank_country.label
224
225
    # iterate among excel template
226
    for record in template.get_sample_records():
227
        # get animal by reading record
228
        animal_record = template.get_animal_from_sample(record)
229
230
        # get specie (mind synonyms)
231
        specie = DictSpecie.get_specie_check_synonyms(
232
            species_label=animal_record.species,
233
            language=language)
234
235
        logger.debug("Found '%s' as specie" % (specie))
236
237
        # get breed from animal record
238
        breed_record = template.get_breed_from_animal(animal_record)
239
240
        # get a country for this breed
241
        country = DictCountry.objects.get(
242
            label=breed_record.efabis_breed_country)
243
244
        # ok get a real dictbreed object
245
        breed = DictBreed.objects.get(
246
            supplied_breed=breed_record.supplied_breed,
247
            specie=specie,
248
            country=country)
249
250
        logger.debug("Selected breed is %s" % (breed))
251
252
        animal = Animal.objects.get(
253
            name=animal_record.animal_id_in_data_source,
254
            breed=breed,
255
            owner=submission_obj.owner)
256
257
        logger.debug("Selected animal is %s" % (animal))
258
259
        # get a organism part. Organism parts need to be in lowercases
260
        organism_part = get_or_create_obj(
261
            DictUberon,
262
            label=record.organism_part
263
        )
264
265
        # get developmental_stage and physiological_stage terms
266
        # they are not mandatory
267
        devel_stage, physio_stage = None, None
268
269
        if record.developmental_stage:
270
            devel_stage = get_or_create_obj(
271
                DictDevelStage,
272
                label=record.developmental_stage
273
            )
274
275
        if record.physiological_stage:
276
            physio_stage = get_or_create_obj(
277
                DictPhysioStage,
278
                label=record.physiological_stage
279
            )
280
281
        # deal with time columns
282
        (animal_age_at_collection, time_units, preparation_interval,
283
         preparation_interval_units) = parse_times(record, animal)
284
285
        # now get accuracy
286
        accuracy = ACCURACIES.get_value_by_desc(
287
            record.collection_place_accuracy)
288
289
        # now get storage and storage processing
290
        # TODO; check those values in excel columns
291
        storage = SAMPLE_STORAGE.get_value_by_desc(
292
            record.sample_storage)
293
294
        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
295
            record.sample_storage_processing)
296
297
        # create a new object. Using defaults to avoid collisions when
298
        # updating data
299
        defaults = {
300
            'alternative_id': record.alternative_sample_id,
301
            'description': record.sample_description,
302
            'protocol': record.specimen_collection_protocol,
303
            'collection_date': record.collection_date,
304
            'collection_place_latitude': record.collection_place_latitude,
305
            'collection_place_longitude': record.collection_place_longitude,
306
            'collection_place': record.collection_place,
307
            'collection_place_accuracy': accuracy,
308
            'organism_part': organism_part,
309
            'developmental_stage': devel_stage,
310
            'physiological_stage': physio_stage,
311
            'animal_age_at_collection': animal_age_at_collection,
312
            'animal_age_at_collection_units': time_units,
313
            'availability': record.availability,
314
            'storage': storage,
315
            'storage_processing': storage_processing,
316
            'preparation_interval': preparation_interval,
317
            'preparation_interval_units': preparation_interval_units,
318
        }
319
320
        update_or_create_obj(
321
            Sample,
322
            name=record.sample_id_in_data_source,
323
            animal=animal,
324
            owner=submission_obj.owner,
325
            submission=submission_obj,
326
            defaults=defaults)
327
328
    # create a validation summary object and set all_count
329
    validation_summary = get_or_create_obj(
330
        ValidationSummary,
331
        submission=submission_obj,
332
        type="sample")
333
334
    # reset counts
335
    validation_summary.reset_all_count()
336
337
    # debug
338
    logger.info("fill_uid_samples() completed")
339
340
341
def check_UID(submission_obj, reader):
342
    # check for species and sex in a similar way as cryoweb does
343
    # TODO: identical to CRBanim. Move to a mixin
344
    check, not_found = reader.check_sex()
345
346
    # check sex
347
    if not check:
348
        message = (
349
            "Not all Sex terms are loaded into database: "
350
            "check for '%s' in your dataset" % (not_found))
351
352
        raise ExcelImportError(message)
353
354
    check, not_found = reader.check_species(
355
        submission_obj.gene_bank_country)
356
357
    # check species and related
358
    if not check:
359
        raise ExcelImportError(
360
            "Some species are not loaded into database: "
361
            "check for '%s' in your dataset" % (not_found))
362
363
    check, not_found = reader.check_species_in_animal_sheet()
364
365
    if not check:
366
        raise ExcelImportError(
367
            "Some species are not defined in breed sheet: "
368
            "check for '%s' in your dataset" % (not_found))
369
370
    # check countries
371
    check, not_found = reader.check_countries()
372
373
    if not check:
374
        raise ExcelImportError(
375
            "Those countries are not loaded in database: "
376
            "check for '%s' in your dataset" % (not_found))
377
378
    # check accuracies
379
    check, not_found = reader.check_accuracies()
380
381
    if not check:
382
        message = (
383
            "Not all accuracy levels are defined in database: "
384
            "check for '%s' in your dataset" % (not_found))
385
386
        raise ExcelImportError(message)
387
388
389
def upload_template(submission_obj):
390
    # debug
391
    logger.info("Importing from Excel template file")
392
393
    # this is the full path in docker container
394
    fullpath = submission_obj.get_uploaded_file_path()
395
396
    # read submission data
397
    reader = ExcelTemplateReader()
398
    reader.read_file(fullpath)
399
400
    # start data loading
401
    try:
402
        # check UID data like cryoweb does
403
        check_UID(submission_obj, reader)
404
405
        # BREEDS
406
        fill_uid_breeds(submission_obj, reader)
407
408
        # ANIMALS
409
        fill_uid_animals(submission_obj, reader)
410
411
        # SAMPLES
412
        fill_uid_samples(submission_obj, reader)
413
414
    except Exception as exc:
415
        # set message:
416
        message = "Error in importing data: %s" % (str(exc))
417
418
        # save a message in database
419
        submission_obj.status = ERROR
420
        submission_obj.message = message
421
        submission_obj.save()
422
423
        # send async message
424
        send_message(submission_obj)
425
426
        # debug
427
        logger.error("Error in importing from Template: %s" % (exc))
428
        logger.exception(exc)
429
430
        return False
431
432
    else:
433
        message = "Template import completed for submission: %s" % (
434
            submission_obj.id)
435
436
        submission_obj.message = message
437
        submission_obj.status = LOADED
438
        submission_obj.save()
439
440
        # send async message
441
        send_message(
442
            submission_obj,
443
            validation_message=construct_validation_message(submission_obj))
444
445
    logger.info("Import from Template is complete")
446
447
    return True
448