Completed
Push — master ( a65260...307dd7 )
by Paolo
07:35
created

excel.helpers.fill_uid.get_relationship()   A

Complexity

Conditions 2

Size

Total Lines 16
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 16
rs 9.7
c 0
b 0
f 0
cc 2
nop 4
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Jul  5 16:37:48 2019
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
import logging
10
11
from common.constants import (
12
    ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
13
from common.helpers import image_timedelta, parse_image_timedelta
14
from uid.helpers import get_or_create_obj, update_or_create_obj
15
from uid.models import (
16
    DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Animal,
17
    Sample, DictDevelStage, DictPhysioStage)
18
from submissions.helpers import send_message
19
from validation.helpers import construct_validation_message
20
from validation.models import ValidationSummary
21
22
from .exceptions import ExcelImportError
23
from .exceltemplate import ExcelTemplateReader
24
25
# Get an instance of a logger
26
logger = logging.getLogger(__name__)
27
28
29
def fill_uid_breeds(submission_obj, template):
30
    """Fill DictBreed from a excel record"""
31
32
    logger.info("fill_uid_breeds() started")
33
34
    # ok get languages from submission (useful for translation)
35
    language = submission_obj.gene_bank_country.label
36
37
    # iterate among excel template
38
    for record in template.get_breed_records():
39
        # get a DictSpecie object. Species are in latin names, but I can
40
        # find also a common name in translation tables
41
        specie = DictSpecie.get_specie_check_synonyms(
42
            species_label=record.species,
43
            language=language)
44
45
        # get country for breeds. Ideally will be the same of submission,
46
        # however, it could be possible to store data from other contries
47
        country = DictCountry.objects.get(label=record.efabis_breed_country)
48
49
        get_or_create_obj(
50
            DictBreed,
51
            supplied_breed=record.supplied_breed,
52
            specie=specie,
53
            country=country)
54
55
    logger.info("fill_uid_breeds() completed")
56
57
58
def get_relationship(animal_id_in_data_source, name, breed, owner):
59
    try:
60
        parent = Animal.objects.get(
61
            name=name,
62
            breed=breed,
63
            owner=owner)
64
65
    except Animal.DoesNotExist as exc:
66
        logger.error(exc)
67
        message = (
68
            "Unknown parent '%s': check animal '%s' "
69
            "in your dataset" % (name, animal_id_in_data_source))
70
        logger.error(message)
71
        raise ExcelImportError(message)
72
73
    return parent
74
75
76
def fill_uid_animals(submission_obj, template):
77
    # debug
78
    logger.info("called fill_uid_animals()")
79
80
    # get language
81
    language = submission_obj.gene_bank_country.label
82
83
    # iterate among excel template
84
    for record in template.get_animal_records():
85
        # determine sex. Check for values
86
        sex = DictSex.objects.get(label__iexact=record.sex)
87
88
        # get specie (mind synonyms)
89
        specie = DictSpecie.get_specie_check_synonyms(
90
            species_label=record.species, language=language)
91
92
        logger.debug("Found '%s' as specie" % (specie))
93
94
        # how I can get breed from my data?
95
        breed_record = template.get_breed_from_animal(record)
96
97
        # get a country for this breed
98
        country = DictCountry.objects.get(
99
            label=breed_record.efabis_breed_country)
100
101
        # ok get a real dictbreed object
102
        breed = DictBreed.objects.get(
103
            supplied_breed=breed_record.supplied_breed,
104
            specie=specie,
105
            country=country)
106
107
        logger.debug("Selected breed is %s" % (breed))
108
109
        # define mother and father
110
        mother, father = None, None
111
112
        # get name for this animal and for mother and father
113
        if record.father_id_in_data_source:
114
            logger.debug("Getting %s as father" % (
115
                record.father_id_in_data_source))
116
117
            father = get_relationship(
118
                record.animal_id_in_data_source,
119
                record.father_id_in_data_source,
120
                breed,
121
                submission_obj.owner)
122
123
        if record.mother_id_in_data_source:
124
            logger.debug("Getting %s as mother" % (
125
                record.mother_id_in_data_source))
126
127
            mother = get_relationship(
128
                record.animal_id_in_data_source,
129
                record.mother_id_in_data_source,
130
                breed,
131
                submission_obj.owner)
132
133
        # now get accuracy
134
        accuracy = ACCURACIES.get_value_by_desc(
135
            record.birth_location_accuracy)
136
137
        # create a new object. Using defaults to avoid collisions when
138
        # updating data
139
        defaults = {
140
            'alternative_id': record.alternative_animal_id,
141
            'description': record.animal_description,
142
            'sex': sex,
143
            'father': father,
144
            'mother': mother,
145
            'birth_date': record.birth_date,
146
            'birth_location': record.birth_location,
147
            'birth_location_latitude': record.birth_location_latitude,
148
            'birth_location_longitude': record.birth_location_longitude,
149
            'birth_location_accuracy': accuracy,
150
        }
151
152
        # creating or updating an object
153
        update_or_create_obj(
154
            Animal,
155
            name=record.animal_id_in_data_source,
156
            breed=breed,
157
            owner=submission_obj.owner,
158
            submission=submission_obj,
159
            defaults=defaults)
160
161
    # create a validation summary object and set all_count
162
    validation_summary = get_or_create_obj(
163
        ValidationSummary,
164
        submission=submission_obj,
165
        type="animal")
166
167
    # reset counts
168
    validation_summary.reset_all_count()
169
170
    # debug
171
    logger.info("fill_uid_animals() completed")
172
173
174
def parse_times(record, animal):
175
    """Try to deal with times in excel templates"""
176
177
    animal_age_at_collection, time_units = None, None
178
179
    # animal age could be present or not
180
    try:
181
        if record.animal_age_at_collection:
182
            animal_age_at_collection, time_units = parse_image_timedelta(
183
                record.animal_age_at_collection)
184
185
        else:
186
            # derive animal age at collection
187
            animal_age_at_collection, time_units = image_timedelta(
188
                record.collection_date, animal.birth_date)
189
190
    except ValueError as exc:
191
        message = (
192
            "Error for Sample '%s' at animal_age_at_collection column: %s" % (
193
                    record.sample_id_in_data_source, exc))
194
        logger.error(message)
195
        raise ExcelImportError(message)
196
197
    # another time column
198
    preparation_interval, preparation_interval_units = None, None
199
200
    try:
201
        if record.sampling_to_preparation_interval:
202
            preparation_interval, preparation_interval_units = \
203
                parse_image_timedelta(record.sampling_to_preparation_interval)
204
205
    except ValueError as exc:
206
        message = (
207
            "Error for Sample '%s' at sampling_to_preparation_interval "
208
            "column: %s" % (
209
                record.sample_id_in_data_source, exc))
210
        logger.error(message)
211
        raise ExcelImportError(message)
212
213
    return (animal_age_at_collection, time_units, preparation_interval,
214
            preparation_interval_units)
215
216
217
def fill_uid_samples(submission_obj, template):
218
    # debug
219
    logger.info("called fill_uid_samples()")
220
221
    # get language
222
    language = submission_obj.gene_bank_country.label
223
224
    # iterate among excel template
225
    for record in template.get_sample_records():
226
        # get animal by reading record
227
        animal_record = template.get_animal_from_sample(record)
228
229
        # get specie (mind synonyms)
230
        specie = DictSpecie.get_specie_check_synonyms(
231
            species_label=animal_record.species,
232
            language=language)
233
234
        logger.debug("Found '%s' as specie" % (specie))
235
236
        # get breed from animal record
237
        breed_record = template.get_breed_from_animal(animal_record)
238
239
        # get a country for this breed
240
        country = DictCountry.objects.get(
241
            label=breed_record.efabis_breed_country)
242
243
        # ok get a real dictbreed object
244
        breed = DictBreed.objects.get(
245
            supplied_breed=breed_record.supplied_breed,
246
            specie=specie,
247
            country=country)
248
249
        logger.debug("Selected breed is %s" % (breed))
250
251
        animal = Animal.objects.get(
252
            name=animal_record.animal_id_in_data_source,
253
            breed=breed,
254
            owner=submission_obj.owner)
255
256
        logger.debug("Selected animal is %s" % (animal))
257
258
        # get a organism part. Organism parts need to be in lowercases
259
        organism_part = get_or_create_obj(
260
            DictUberon,
261
            label=record.organism_part
262
        )
263
264
        # get developmental_stage and physiological_stage terms
265
        # they are not mandatory
266
        devel_stage, physio_stage = None, None
267
268
        if record.developmental_stage:
269
            devel_stage = get_or_create_obj(
270
                DictDevelStage,
271
                label=record.developmental_stage
272
            )
273
274
        if record.physiological_stage:
275
            physio_stage = get_or_create_obj(
276
                DictPhysioStage,
277
                label=record.physiological_stage
278
            )
279
280
        # deal with time columns
281
        (animal_age_at_collection, time_units, preparation_interval,
282
         preparation_interval_units) = parse_times(record, animal)
283
284
        # now get accuracy
285
        accuracy = ACCURACIES.get_value_by_desc(
286
            record.collection_place_accuracy)
287
288
        # now get storage and storage processing
289
        # TODO; check those values in excel columns
290
        storage = SAMPLE_STORAGE.get_value_by_desc(
291
            record.sample_storage)
292
293
        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
294
            record.sample_storage_processing)
295
296
        # create a new object. Using defaults to avoid collisions when
297
        # updating data
298
        defaults = {
299
            'alternative_id': record.alternative_sample_id,
300
            'description': record.sample_description,
301
            'protocol': record.specimen_collection_protocol,
302
            'collection_date': record.collection_date,
303
            'collection_place_latitude': record.collection_place_latitude,
304
            'collection_place_longitude': record.collection_place_longitude,
305
            'collection_place': record.collection_place,
306
            'collection_place_accuracy': accuracy,
307
            'organism_part': organism_part,
308
            'developmental_stage': devel_stage,
309
            'physiological_stage': physio_stage,
310
            'animal_age_at_collection': animal_age_at_collection,
311
            'animal_age_at_collection_units': time_units,
312
            'availability': record.availability,
313
            'storage': storage,
314
            'storage_processing': storage_processing,
315
            'preparation_interval': preparation_interval,
316
            'preparation_interval_units': preparation_interval_units,
317
        }
318
319
        update_or_create_obj(
320
            Sample,
321
            name=record.sample_id_in_data_source,
322
            animal=animal,
323
            owner=submission_obj.owner,
324
            submission=submission_obj,
325
            defaults=defaults)
326
327
    # create a validation summary object and set all_count
328
    validation_summary = get_or_create_obj(
329
        ValidationSummary,
330
        submission=submission_obj,
331
        type="sample")
332
333
    # reset counts
334
    validation_summary.reset_all_count()
335
336
    # debug
337
    logger.info("fill_uid_samples() completed")
338
339
340
def check_UID(submission_obj, reader):
341
    # check for species and sex in a similar way as cryoweb does
342
    # TODO: identical to CRBanim. Move to a mixin
343
    check, not_found = reader.check_sex()
344
345
    # check sex
346
    if not check:
347
        message = (
348
            "Not all Sex terms are loaded into database: "
349
            "check for '%s' in your dataset" % (not_found))
350
351
        raise ExcelImportError(message)
352
353
    check, not_found = reader.check_species(
354
        submission_obj.gene_bank_country)
355
356
    # check species and related
357
    if not check:
358
        raise ExcelImportError(
359
            "Some species are not loaded into database: "
360
            "check for '%s' in your dataset" % (not_found))
361
362
    check, not_found = reader.check_species_in_animal_sheet()
363
364
    if not check:
365
        raise ExcelImportError(
366
            "Some species are not defined in breed sheet: "
367
            "check for '%s' in your dataset" % (not_found))
368
369
    # check countries
370
    check, not_found = reader.check_countries()
371
372
    if not check:
373
        raise ExcelImportError(
374
            "Those countries are not loaded in database: "
375
            "check for '%s' in your dataset" % (not_found))
376
377
    # check accuracies
378
    check, not_found = reader.check_accuracies()
379
380
    if not check:
381
        message = (
382
            "Not all accuracy levels are defined in database: "
383
            "check for '%s' in your dataset" % (not_found))
384
385
        raise ExcelImportError(message)
386
387
388
def upload_template(submission_obj):
389
    # debug
390
    logger.info("Importing from Excel template file")
391
392
    # this is the full path in docker container
393
    fullpath = submission_obj.get_uploaded_file_path()
394
395
    # read submission data
396
    reader = ExcelTemplateReader()
397
    reader.read_file(fullpath)
398
399
    # start data loading
400
    try:
401
        # check UID data like cryoweb does
402
        check_UID(submission_obj, reader)
403
404
        # BREEDS
405
        fill_uid_breeds(submission_obj, reader)
406
407
        # ANIMALS
408
        fill_uid_animals(submission_obj, reader)
409
410
        # SAMPLES
411
        fill_uid_samples(submission_obj, reader)
412
413
    except Exception as exc:
414
        # set message:
415
        message = "Error in importing data: %s" % (str(exc))
416
417
        # save a message in database
418
        submission_obj.status = ERROR
419
        submission_obj.message = message
420
        submission_obj.save()
421
422
        # send async message
423
        send_message(submission_obj)
424
425
        # debug
426
        logger.error("Error in importing from Template: %s" % (exc))
427
        logger.exception(exc)
428
429
        return False
430
431
    else:
432
        message = "Template import completed for submission: %s" % (
433
            submission_obj.id)
434
435
        submission_obj.message = message
436
        submission_obj.status = LOADED
437
        submission_obj.save()
438
439
        # send async message
440
        send_message(
441
            submission_obj,
442
            validation_message=construct_validation_message(submission_obj))
443
444
    logger.info("Import from Template is complete")
445
446
    return True
447