Completed
Push — master ( eecf08...313cfe )
by Paolo
15s queued 12s
created

cryoweb.helpers.check_countries()   A

Complexity

Conditions 3

Size

Total Lines 13
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 13
rs 10
c 0
b 0
f 0
cc 3
nop 0
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Mon May 14 10:28:39 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
# --- import
10
11
import logging
12
import os
13
import shlex
14
import subprocess
15
16
from decouple import AutoConfig
17
18
from django.conf import settings
19
20
from common.constants import LOADED, ERROR, MISSING, UNKNOWN
21
from common.helpers import image_timedelta
22
from image_app.helpers import get_or_create_obj, update_or_create_obj
23
from image_app.models import (
24
    Animal, DictBreed, DictCountry, DictSex, DictSpecie, Name, Sample,
25
    Submission, DictUberon)
26
from language.helpers import check_species_synonyms
27
from submissions.helpers import send_message
28
from validation.helpers import construct_validation_message
29
from validation.models import ValidationSummary
30
31
from .models import db_has_data as cryoweb_has_data
32
from .models import VAnimal, VBreedsSpecies, VTransfer, VVessels
33
34
# Get an instance of a logger
35
logger = logging.getLogger(__name__)
36
37
38
# --- check functions
39
40
41
# a function to detect if cryoweb species have synonyms or not
42
def check_species(country):
43
    """Check all cryoweb species for a synonym in a supplied language or
44
    the default one, ie: check_species(country). country is an
45
    image_app.models.DictCountry.label"""
46
47
    # get all species using view
48
    words = VBreedsSpecies.get_all_species()
49
50
    # for logging purposes
51
    database_name = settings.DATABASES['cryoweb']['NAME']
52
53
    if len(words) == 0:
54
        raise CryoWebImportError(
55
            "You have no species in %s database" % database_name)
56
57
    # debug
58
    logger.debug("Got %s species from %s" % (words, database_name))
59
60
    # check if every word as a synonym (a specie)
61
    # (And create synonyms if don't exist)
62
    return check_species_synonyms(words, country, create=True)
63
64
65
# a function to test if I have all countries or not
66
def check_countries():
67
    """Check that all efabis countries have a dictionary object"""
68
69
    # get all countries
70
    countries = VBreedsSpecies.get_all_countries()
71
72
    countries_not_found = []
73
74
    for country in countries:
75
        if not DictCountry.objects.filter(label=country).exists():
76
            countries_not_found.append(country)
77
78
    return countries_not_found
79
80
81
# a function specific for cryoweb import path to ensure that all required
82
# fields in UID are present. There could be a function like this in others
83
# import paths
84
def check_UID(submission):
85
    """A function to ensure that UID is valid before data upload. Specific
86
    to the module where is called from"""
87
88
    logger.debug("Checking UID")
89
90
    # check that dict sex table contains data
91
    if len(DictSex.objects.all()) == 0:
92
        raise CryoWebImportError("You have to upload DictSex data")
93
94
    # test for specie synonyms in submission language or defaul one
95
    # otherwise, fill synonym table with new terms then throw exception
96
    if not check_species(submission.gene_bank_country):
97
        raise CryoWebImportError("Some species haven't a synonym!")
98
99
    # test for countries in UID
100
    countries_not_found = check_countries()
101
102
    if len(countries_not_found) > 0:
103
        raise CryoWebImportError(
104
            "Not all countries are loaded into database: "
105
            "check for '%s' in your dataset" % (countries_not_found)
106
        )
107
108
    # return a status
109
    return True
110
111
112
# A class to deal with cryoweb import errors
113
class CryoWebImportError(Exception):
114
    pass
115
116
117
# --- Upload data into cryoweb database
118
def upload_cryoweb(submission_id):
119
    """Imports backup into the cryoweb db
120
121
    This function uses the container's installation of psql to import a backup
122
    file into the "cryoweb" database. The imported backup file is
123
    the last inserted into the image's table image_app_submission.
124
125
    :submission_id: the submission primary key
126
    """
127
128
    # define some useful variables
129
    database_name = settings.DATABASES['cryoweb']['NAME']
130
131
    # define a decouple config object
132
    config_dir = os.path.join(settings.BASE_DIR, 'image')
133
    config = AutoConfig(search_path=config_dir)
134
135
    # get a submission object
136
    submission = Submission.objects.get(pk=submission_id)
137
138
    # debug
139
    logger.info("Importing data into cryoweb staging area")
140
    logger.debug("Got Submission %s" % (submission))
141
142
    # If cryoweb has data, update submission message and return exception:
143
    # maybe another process is running or there is another type of problem
144
    if cryoweb_has_data():
145
        logger.error("Cryoweb has data!")
146
147
        # update submission status
148
        submission.status = ERROR
149
        submission.message = "Error in importing data: Cryoweb has data"
150
        submission.save()
151
152
        # send async message
153
        send_message(submission)
154
155
        raise CryoWebImportError("Cryoweb has data!")
156
157
    # this is the full path in docker container
158
    fullpath = submission.get_uploaded_file_path()
159
160
    # define command line
161
    cmd_line = "/usr/bin/psql -U {user} -h db {database}".format(
162
        database=database_name, user='cryoweb_insert_only')
163
164
    cmds = shlex.split(cmd_line)
165
166
    logger.debug("Executing: %s" % " ".join(cmds))
167
168
    try:
169
        result = subprocess.run(
170
            cmds,
171
            stdin=open(fullpath),
172
            stdout=subprocess.PIPE,
173
            stderr=subprocess.PIPE,
174
            check=True,
175
            env={'PGPASSWORD': config('CRYOWEB_INSERT_ONLY_PW')},
176
            encoding='utf8'
177
            )
178
179
    except Exception as exc:
180
        # save a message in database
181
        submission.status = ERROR
182
        submission.message = "Error in importing data: %s" % (str(exc))
183
        submission.save()
184
185
        # send async message
186
        send_message(submission)
187
188
        # debug
189
        logger.error("error in calling upload_cryoweb: %s" % (exc))
190
191
        return False
192
193
    n_of_statements = len(result.stdout.split("\n"))
194
    logger.debug("%s statement executed" % n_of_statements)
195
196
    if len(result.stderr) > 0:
197
        for line in result.stderr.split("\n"):
198
            logger.error(line)
199
200
    logger.info("{filename} uploaded into {database}".format(
201
        filename=submission.uploaded_file.name, database=database_name))
202
203
    return True
204
205
206
# --- Upload data from cryoweb to UID
207
208
209
def fill_uid_breeds(submission):
210
    """Fill UID DictBreed model. Require a submission instance"""
211
212
    logger.info("fill_uid_breeds() started")
213
214
    # get submission language
215
    language = submission.gene_bank_country.label
216
217
    for v_breed_specie in VBreedsSpecies.objects.all():
218
        # get specie. Since I need a dictionary tables, DictSpecie is
219
        # already filled
220
        specie = DictSpecie.get_by_synonym(
221
            synonym=v_breed_specie.ext_species,
222
            language=language)
223
224
        # get country for breeds. Ideally will be the same of submission,
225
        # since the Italian cryoweb is supposed to contains italian breeds.
226
        # however, it could be possible to store data from other contries
227
        country = DictCountry.objects.get(label=v_breed_specie.efabis_country)
228
229
        # create breed obj if necessary
230
        get_or_create_obj(
231
            DictBreed,
232
            supplied_breed=v_breed_specie.efabis_mcname,
233
            specie=specie,
234
            country=country)
235
236
    logger.info("fill_uid_breeds() completed")
237
238
239
def fill_uid_names(submission):
240
    """Read VTransfer Views and fill name table"""
241
242
    # debug
243
    logger.info("called fill_uid_names()")
244
245
    # get all Vtransfer object
246
    for v_tranfer in VTransfer.objects.all():
247
        # no name manipulation. If two objects are indentical, there's no
248
        # duplicates.
249
        # HINT: The ramon example will be a issue in validation step
250
        get_or_create_obj(
251
            Name,
252
            name=v_tranfer.get_fullname(),
253
            submission=submission,
254
            owner=submission.owner)
255
256
    logger.info("fill_uid_names() completed")
257
258
259
def fill_uid_animals(submission):
260
    """Helper function to fill animal data in UID animal table"""
261
262
    # debug
263
    logger.info("called fill_uid_animals()")
264
265
    # get submission language
266
    language = submission.gene_bank_country.label
267
268
    # get male and female DictSex objects from database
269
    male = DictSex.objects.get(label="male")
270
    female = DictSex.objects.get(label="female")
271
272
    # cycle over animals
273
    for v_animal in VAnimal.objects.all():
274
        # get specie translated by dictionary
275
        specie = DictSpecie.get_by_synonym(
276
            synonym=v_animal.ext_species,
277
            language=language)
278
279
        # get breed name and country through VBreedsSpecies model
280
        efabis_mcname = v_animal.efabis_mcname
281
        efabis_country = v_animal.efabis_country
282
283
        # get a country object
284
        country = DictCountry.objects.get(label=efabis_country)
285
286
        # a breed could be specie/country specific
287
        breed = DictBreed.objects.get(
288
            supplied_breed=efabis_mcname,
289
            specie=specie,
290
            country=country)
291
292
        logger.debug("Selected breed is %s" % (breed))
293
294
        # get name for this animal and for mother and father
295
        logger.debug("Getting %s as my name" % (v_animal.ext_animal))
296
        name = Name.objects.get(
297
            name=v_animal.ext_animal, submission=submission)
298
299
        logger.debug("Getting %s as father" % (v_animal.ext_sire))
300
        father = Name.objects.get(
301
            name=v_animal.ext_sire, submission=submission)
302
303
        logger.debug("Getting %s as mother" % (v_animal.ext_dam))
304
        mother = Name.objects.get(
305
            name=v_animal.ext_dam, submission=submission)
306
307
        # determine sex. Check for values
308
        if v_animal.ext_sex == 'm':
309
            sex = male
310
311
        elif v_animal.ext_sex == 'f':
312
            sex = female
313
314
        else:
315
            raise CryoWebImportError(
316
                "Unknown sex '%s' for '%s'" % (v_animal.ext_sex, v_animal))
317
318
        # checking accuracy
319
        accuracy = MISSING
320
321
        if v_animal.latitude and v_animal.longitude:
322
            accuracy = UNKNOWN
323
324
        # create a new object. Using defaults to avoid collisions when
325
        # updating data
326
        defaults = {
327
            'alternative_id': v_animal.db_animal,
328
            'breed': breed,
329
            'sex': sex,
330
            'father': father,
331
            'mother': mother,
332
            'birth_date': v_animal.birth_dt,
333
            'birth_location_latitude': v_animal.latitude,
334
            'birth_location_longitude': v_animal.longitude,
335
            'birth_location_accuracy': accuracy,
336
            'description': v_animal.comment,
337
            'owner': submission.owner
338
        }
339
340
        # Upate or create animal obj
341
        update_or_create_obj(
342
            Animal,
343
            name=name,
344
            defaults=defaults)
345
346
    # create a validation summary object and set all_count
347
    validation_summary = get_or_create_obj(
348
        ValidationSummary,
349
        submission=submission,
350
        type="animal")
351
352
    # reset counts
353
    validation_summary.reset_all_count()
354
355
    # debug
356
    logger.info("fill_uid_animals() completed")
357
358
359
def fill_uid_samples(submission):
360
    """Helper function to fill animal data in UID animal table"""
361
362
    # debug
363
    logger.info("called fill_uid_samples()")
364
365
    for v_vessel in VVessels.objects.all():
366
        # get name for this sample. Need to insert it
367
        name = get_or_create_obj(
368
            Name,
369
            name=v_vessel.ext_vessel,
370
            submission=submission,
371
            owner=submission.owner)
372
373
        # get animal object using name
374
        animal = Animal.objects.get(
375
            name__name=v_vessel.ext_animal,
376
            name__submission=submission)
377
378
        # get a organism part. Organism parts need to be in lowercases
379
        organism_part = get_or_create_obj(
380
            DictUberon,
381
            label=v_vessel.get_organism_part().lower()
382
        )
383
384
        # get a v_animal instance to get access to animal birth date
385
        v_animal = VAnimal.objects.get(db_animal=v_vessel.db_animal)
386
387
        # derive animal age at collection. THis function deals with NULL valies
388
        animal_age_at_collection, time_units = image_timedelta(
389
            v_vessel.production_dt, v_animal.birth_dt)
390
391
        # create a new object. Using defaults to avoid collisions when
392
        # updating data
393
        defaults = {
394
            'alternative_id': v_vessel.db_vessel,
395
            'collection_date': v_vessel.production_dt,
396
            # 'protocol': v_vessel.get_protocol_name(),
397
            'organism_part': organism_part,
398
            'animal': animal,
399
            'description': v_vessel.comment,
400
            'owner': submission.owner,
401
            'animal_age_at_collection': animal_age_at_collection,
402
            'animal_age_at_collection_units': time_units,
403
            # 'storage': v_vessel.ext_vessel_type,
404
        }
405
406
        update_or_create_obj(
407
            Sample,
408
            name=name,
409
            defaults=defaults)
410
411
    # create a validation summary object and set all_count
412
    validation_summary = get_or_create_obj(
413
        ValidationSummary,
414
        submission=submission,
415
        type="sample")
416
417
    # reset counts
418
    validation_summary.reset_all_count()
419
420
    # debug
421
    logger.info("fill_uid_samples() completed")
422
423
424
def cryoweb_import(submission):
425
    """Import data from cryoweb stage database into UID
426
427
    :submission: a submission instance
428
    """
429
430
    # debug
431
    logger.info("Importing from cryoweb staging area")
432
433
    try:
434
        # check UID status. get an exception if database is not initialized
435
        check_UID(submission)
436
437
        # BREEDS
438
        fill_uid_breeds(submission)
439
440
        # NAME
441
        fill_uid_names(submission)
442
443
        # ANIMALS
444
        fill_uid_animals(submission)
445
446
        # SAMPLES
447
        fill_uid_samples(submission)
448
449
    except Exception as exc:
450
        # save a message in database
451
        submission.status = ERROR
452
        submission.message = "Error in importing data: %s" % (str(exc))
453
        submission.save()
454
455
        # send async message
456
        send_message(submission)
457
458
        # debug
459
        logger.error("error in importing from cryoweb: %s" % (exc))
460
        logger.exception(exc)
461
462
        return False
463
464
    else:
465
        message = "Cryoweb import completed for submission: %s" % (
466
            submission.id)
467
468
        submission.message = message
469
        submission.status = LOADED
470
        submission.save()
471
472
        send_message(
473
            submission,
474
            validation_message=construct_validation_message(submission))
475
476
    logger.info("Import from staging area is complete")
477
478
    return True
479