Completed
Push — master ( 4f7ee6...646424 )
by Paolo
08:30 queued 06:53
created

uid.management.commands.initializedb   A

Complexity

Total Complexity 32

Size/Duplication

Total Lines 283
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 32
eloc 159
dl 0
loc 283
rs 9.84
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
A Command.handle() 0 18 1

8 Functions

Rating   Name   Duplication   Size   Complexity  
B standardize_institute_name() 0 34 6
A fill_ontology() 0 30 3
A fill_Organization() 0 32 4
A fill_DictRoles() 0 9 2
B fill_Species() 0 49 6
A fill_DictSex() 0 19 4
A fill_Countries() 0 23 3
A fill_OrganismParts() 0 14 3
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Thu Jan 25 15:28:05 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
This django command script need to be called after database initialization.
9
The aim is to fill tables like ontology tables (roles, sex) in order to upload
10
data from cryoweb database (or xls template files, or ...)
11
"""
12
13
import collections
14
import csv
15
import io
16
import logging
17
import os
18
19
from image_validation.use_ontology import get_general_breed_by_species
20
21
from django.core.management import BaseCommand
22
23
from common.constants import OBO_URL, CURATED
24
from uid.models import (
25
    DictCountry, DictRole, DictSex, DictSpecie, Ontology, Organization,
26
    DictUberon)
27
from language.models import SpecieSynonym
28
29
# Get an instance of a logger
30
logger = logging.getLogger(__name__)
31
32
33
def fill_ontology():
34
    data = """Library name;Library URI;Comment
35
PATO;{obo_url};Phenotype And Trait Ontology
36
LBO;{obo_url};Livestock Breed Ontology
37
EFO;http://www.ebi.ac.uk/efo;Experimental Factor Ontology
38
OBI;{obo_url};Ontology for Biomedical Investigations
39
NCBITaxon;{obo_url};NCBI Taxonomy
40
UBERON;{obo_url};cross-species ontology covering anatomical structures in animals
41
GAZ;{obo_url};A gazetteer constructed on ontological principles
42
NCIT;{obo_url};NCI Thesaurus OBO Edition
43
""".format(obo_url=OBO_URL)
44
45
    handle = io.StringIO(data)
46
    reader = csv.reader(handle, delimiter=";")
47
48
    header = next(reader)
49
50
    # fix header
51
    header = [col.lower().replace(" ", "_") for col in header]
52
53
    Data = collections.namedtuple('Data', header)
54
55
    for row in map(Data._make, reader):
56
        # update objects
57
        ontology, created = Ontology.objects.update_or_create(
58
                library_name=row.library_name,
59
                defaults=row._asdict())
60
61
        if created is True:
62
            logger.info("Created: %s" % (ontology))
63
64
65
def fill_DictSex():
66
    # define three DictSex objects
67
    male, created = DictSex.objects.get_or_create(
68
        label='male', term='PATO_0000384')
69
70
    if created is True:
71
        logger.info("Created: %s" % (male))
72
73
    female, created = DictSex.objects.get_or_create(
74
        label='female', term='PATO_0000383')
75
76
    if created is True:
77
        logger.info("Created: %s" % (female))
78
79
    unknown, created = DictSex.objects.get_or_create(
80
        label='record of unknown sex', term='OBI_0000858')
81
82
    if created is True:
83
        logger.info("Created: %s" % (unknown))
84
85
86
# a function to fill up DictRoles
87
# TODO: need I fill tables with descendants terms?
88
def fill_DictRoles():
89
    # define a submitter role
90
    role, created = DictRole.objects.get_or_create(
91
        label='submitter', term='EFO_0001741')
92
93
    if created is True:
94
        logger.info("Created: %s" % (role))
95
96
    return role
97
98
99
# a function to fill up dictspecie and speciesynonym
100
def fill_Species():
101
    """Populate cryoweb dictionary tables"""
102
103
    # insert country and get the default language
104
    language = fill_Countries()
105
106
    # those are cryoweb DE species an synonyms
107
    cryoweb = {
108
        'Cattle': 'Bos taurus',
109
        'Chicken': 'Gallus gallus',
110
        'Deer': 'Cervidae',
111
        'Duck (domestic)': 'Anas platyrhynchos',
112
        'Goat': 'Capra hircus',
113
        'Goose (domestic)': 'Anser anser',
114
        'Horse': 'Equus caballus',
115
        'Pig': 'Sus scrofa',
116
        'Rabbit': 'Oryctolagus cuniculus',
117
        'Sheep': 'Ovis aries',
118
        'Turkey': 'Meleagris gallopavo'
119
    }
120
121
    for word, specie in cryoweb.items():
122
        dictspecie, created = DictSpecie.objects.get_or_create(
123
            label=specie)
124
125
        if created is True:
126
            logger.info("Created: %s" % (specie))
127
128
        # update with general specie
129
        result = get_general_breed_by_species(specie)
130
131
        if result != {}:
132
            general_breed_label = result['text']
133
            # split the full part and get the last piece
134
            general_breed_term = result['ontologyTerms'].split("/")[-1]
135
136
            if dictspecie.general_breed_label != general_breed_label:
137
                logger.info("Added general breed: %s" % (general_breed_label))
138
                dictspecie.general_breed_label = general_breed_label
139
                dictspecie.general_breed_term = general_breed_term
140
                dictspecie.save()
141
142
        synonym, created = SpecieSynonym.objects.get_or_create(
143
            dictspecie=dictspecie,
144
            language=language,
145
            word=word)
146
147
        if created is True:
148
            logger.info("Created: %s" % (synonym))
149
150
151
def fill_Countries():
152
    """Fill countries and return the default country (for languages)"""
153
154
    # define the default country for the default language
155
    united_kingdom, created = DictCountry.objects.get_or_create(
156
        label='United Kingdom',
157
        term='NCIT_C17233',
158
        confidence=CURATED)
159
160
    if created is True:
161
        logger.info("Created: %s" % (united_kingdom))
162
163
    # add a country difficult to annotate with zooma
164
    country, created = DictCountry.objects.get_or_create(
165
        label='Colombia',
166
        term='NCIT_C16449',
167
        confidence=CURATED)
168
169
    if created is True:
170
        logger.info("Created: %s" % (country))
171
172
    # I will return default language for translations
173
    return united_kingdom
174
175
176
def fill_OrganismParts():
177
    """Fill organism parts with manually curated terms"""
178
179
    data = {'strand of hair': "UBERON_0001037"}
180
181
    for label, term in data.items():
182
        dictorganism, created = DictUberon.objects.get_or_create(
183
            label=label,
184
            term=term,
185
            confidence=CURATED
186
        )
187
188
        if created is True:
189
            logger.info("Created: %s" % (dictorganism))
190
191
192
def standardize_institute_name(original):
193
    special = {
194
        'de': 1,
195
        'la': 1,
196
        'of': 1,
197
        'and': 1,
198
        'y': 1,
199
        'fuer': 1,
200
        'del': 1,
201
        'l': 1,
202
        'INRA': 1,
203
        'FAO': 1
204
    }
205
206
    # search space in original (instutute name) if no space is found
207
    # it is like that institute name will be EBI or IBBA, and will be
208
    # treated as it is
209
    if original.find(" ") > -1:
210
        if original.upper() == original:
211
            components = original.split(' ')
212
            # We capitalize the first letter of each component except the first
213
            # one with the 'title' method and join them together.
214
            result = ''
215
            for component in components:
216
                result = result + ' '
217
                if component.lower() in special:
218
                    result = result + component.lower()
219
                elif component.upper() in special:
220
                    result = result + component.upper()
221
                else:
222
                    result = result + component.title()
223
            result = result[1:]
224
            return result
225
    return original
226
227
228
def fill_Organization():
229
    """Fill organization table"""
230
231
    base_dir = os.path.dirname(os.path.abspath(__file__))
232
    filename = os.path.join(base_dir, "organization_list.csv")
233
234
    # open data file
235
    handle = open(filename)
236
    reader = csv.reader(handle, delimiter=";")
237
    Data = collections.namedtuple('Data', "id name country")
238
239
    # get a role
240
    role = fill_DictRoles()
241
242
    for row in map(Data._make, reader):
243
        # get a country object
244
        country, created = DictCountry.objects.get_or_create(
245
            label=row.country)
246
247
        if created is True:
248
            logger.info("Created: %s" % (country))
249
250
        # HINT: could be better to fix organization names in organization_list?
251
        organization, created = Organization.objects.get_or_create(
252
            name=standardize_institute_name(row.name),
253
            role=role,
254
            country=country)
255
256
        if created is True:
257
            logger.info("Created: %s" % (organization))
258
259
    handle.close()
260
261
262
class Command(BaseCommand):
263
    help = 'Fill database tables like roles, sex, etc'
264
265
    def handle(self, *args, **options):
266
        # call commands and fill tables.
267
        fill_ontology()
268
269
        # Fill sex tables
270
        fill_DictSex()
271
272
        # fill DictRoles table
273
        fill_DictRoles()
274
275
        # import synonyms
276
        fill_Species()
277
278
        # import organizations
279
        fill_Organization()
280
281
        # import organisms
282
        fill_OrganismParts()
283