Completed
Push — master ( a65260...307dd7 )
by Paolo
07:35
created

fill_Species()   A

Complexity

Conditions 2

Size

Total Lines 15
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 12
dl 0
loc 15
rs 9.8
c 0
b 0
f 0
cc 2
nop 0
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Thu Jan 25 15:28:05 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
This django command script need to be called after database initialization.
9
The aim is to fill tables like ontology tables (roles, sex) in order to upload
10
data from cryoweb database (or xls template files, or ...)
11
"""
12
13
import collections
14
import csv
15
import io
16
import logging
17
import os
18
19
from image_validation.use_ontology import get_general_breed_by_species
20
21
from django.core.management import BaseCommand
22
23
from common.constants import OBO_URL, CURATED
24
from uid.helpers import get_or_create_obj, update_or_create_obj
25
from uid.models import (
26
    DictCountry, DictRole, DictSex, DictSpecie, Ontology, Organization,
27
    DictUberon)
28
from language.models import SpecieSynonym
29
30
# Get an instance of a logger
31
logger = logging.getLogger(__name__)
32
33
34
def fill_ontology():
35
    data = """Library name;Library URI;Comment
36
PATO;{obo_url};Phenotype And Trait Ontology
37
LBO;{obo_url};Livestock Breed Ontology
38
EFO;http://www.ebi.ac.uk/efo;Experimental Factor Ontology
39
OBI;{obo_url};Ontology for Biomedical Investigations
40
NCBITaxon;{obo_url};NCBI Taxonomy
41
UBERON;{obo_url};cross-species ontology covering anatomical structures in animals
42
GAZ;{obo_url};A gazetteer constructed on ontological principles
43
NCIT;{obo_url};NCI Thesaurus OBO Edition
44
""".format(obo_url=OBO_URL)
45
46
    handle = io.StringIO(data)
47
    reader = csv.reader(handle, delimiter=";")
48
49
    header = next(reader)
50
51
    # fix header
52
    header = [col.lower().replace(" ", "_") for col in header]
53
54
    Data = collections.namedtuple('Data', header)
55
56
    for row in map(Data._make, reader):
57
        # update objects
58
        update_or_create_obj(
59
            Ontology,
60
            library_name=row.library_name,
61
            defaults=row._asdict())
62
63
64
def fill_DictSex():
65
    # define three DictSex objects
66
    get_or_create_obj(
67
        DictSex,
68
        label='male',
69
        term='PATO_0000384')
70
71
    get_or_create_obj(
72
        DictSex,
73
        label='female',
74
        term='PATO_0000383')
75
76
    get_or_create_obj(
77
        DictSex,
78
        label='record of unknown sex',
79
        term='OBI_0000858')
80
81
82
# a function to fill up DictRoles
83
# TODO: need I fill tables with descendants terms?
84
def fill_DictRoles():
85
    # define a submitter role
86
    role = get_or_create_obj(
87
        DictRole,
88
        label='submitter',
89
        term='EFO_0001741')
90
91
    return role
92
93
94
# a function to fill up only species
95
def fill_Species():
96
    """Populate species table"""
97
98
    data = [
99
        {'confidence': CURATED, 'label': 'Crassostrea gigas',
100
         'term': 'NCBITaxon_29159'},
101
        {'confidence': CURATED, 'label': 'Equus asinus',
102
         'term': 'NCBITaxon_9793'},
103
        {'confidence': CURATED, 'label': 'Oncorhynchus mykiss',
104
         'term': 'NCBITaxon_8022'},
105
        {'confidence': CURATED, 'label': 'Canis lupus familiaris',
106
         'term': 'NCBITaxon_9615'}]
107
108
    for specie in data:
109
        get_or_create_obj(DictSpecie, **specie)
110
111
112
# a function to fill up dictspecie and speciesynonym
113
def fill_SpeciesAndSynonyms():
114
    """Populate cryoweb dictionary tables"""
115
116
    # insert country and get the default language
117
    language = fill_Countries()
118
119
    # those are cryoweb DE species an synonyms
120
    cryoweb = {
121
        'Cattle': 'Bos taurus',
122
        'Chicken': 'Gallus gallus',
123
        'Deer': 'Cervidae',
124
        'Duck (domestic)': 'Anas platyrhynchos',
125
        'Goat': 'Capra hircus',
126
        'Goose (domestic)': 'Anser anser',
127
        'Horse': 'Equus caballus',
128
        'Pig': 'Sus scrofa',
129
        'Rabbit': 'Oryctolagus cuniculus',
130
        'Sheep': 'Ovis aries',
131
        'Turkey': 'Meleagris gallopavo',
132
        'Rainbow trout': 'Oncorhynchus mykiss',
133
        'Goose': 'Anser anser',
134
        'Dog': 'Canis lupus familiaris',
135
    }
136
137
    for word, specie in cryoweb.items():
138
        dictspecie = get_or_create_obj(
139
            DictSpecie,
140
            label=specie)
141
142
        # update with general specie
143
        result = get_general_breed_by_species(specie)
144
145
        if result != {}:
146
            general_breed_label = result['text']
147
            # split the full part and get the last piece
148
            general_breed_term = result['ontologyTerms'].split("/")[-1]
149
150
            if dictspecie.general_breed_label != general_breed_label:
151
                dictspecie.general_breed_label = general_breed_label
152
                dictspecie.general_breed_term = general_breed_term
153
                dictspecie.save()
154
                logger.info("Added general breed: %s" % (general_breed_label))
155
156
        get_or_create_obj(
157
            SpecieSynonym,
158
            dictspecie=dictspecie,
159
            language=language,
160
            word=word)
161
162
163
def fill_Countries():
164
    """Fill countries and return the default country (for languages)"""
165
166
    # define the default country for the default language
167
    united_kingdom = get_or_create_obj(
168
        DictCountry,
169
        label='United Kingdom',
170
        term='NCIT_C17233',
171
        confidence=CURATED)
172
173
    # add a country difficult to annotate with zooma
174
    get_or_create_obj(
175
        DictCountry,
176
        label='Colombia',
177
        term='NCIT_C16449',
178
        confidence=CURATED)
179
180
    # I will return default language for translations
181
    return united_kingdom
182
183
184
def fill_OrganismParts():
185
    """Fill organism parts with manually curated terms"""
186
187
    data = {'strand of hair': "UBERON_0001037"}
188
189
    for label, term in data.items():
190
        get_or_create_obj(
191
            DictUberon,
192
            label=label,
193
            term=term,
194
            confidence=CURATED
195
        )
196
197
198
def standardize_institute_name(original):
199
    special = {
200
        'de': 1,
201
        'la': 1,
202
        'of': 1,
203
        'and': 1,
204
        'y': 1,
205
        'fuer': 1,
206
        'del': 1,
207
        'l': 1,
208
        'INRA': 1,
209
        'FAO': 1
210
    }
211
212
    # search space in original (instutute name) if no space is found
213
    # it is like that institute name will be EBI or IBBA, and will be
214
    # treated as it is
215
    if original.find(" ") > -1:
216
        if original.upper() == original:
217
            components = original.split(' ')
218
            # We capitalize the first letter of each component except the first
219
            # one with the 'title' method and join them together.
220
            result = ''
221
            for component in components:
222
                result = result + ' '
223
                if component.lower() in special:
224
                    result = result + component.lower()
225
                elif component.upper() in special:
226
                    result = result + component.upper()
227
                else:
228
                    result = result + component.title()
229
            result = result[1:]
230
            return result
231
    return original
232
233
234
def fill_Organization():
235
    """Fill organization table"""
236
237
    base_dir = os.path.dirname(os.path.abspath(__file__))
238
    filename = os.path.join(base_dir, "organization_list.csv")
239
240
    # open data file
241
    handle = open(filename)
242
    reader = csv.reader(handle, delimiter=";")
243
    Data = collections.namedtuple('Data', "id name country")
244
245
    # get a role
246
    role = fill_DictRoles()
247
248
    for row in map(Data._make, reader):
249
        # get a country object
250
        country = get_or_create_obj(
251
            DictCountry,
252
            label=row.country)
253
254
        # HINT: could be better to fix organization names in organization_list?
255
        get_or_create_obj(
256
            Organization,
257
            name=standardize_institute_name(row.name),
258
            role=role,
259
            country=country)
260
261
    handle.close()
262
263
264
class Command(BaseCommand):
265
    help = 'Fill database tables like roles, sex, etc'
266
267
    def handle(self, *args, **options):
268
        # call commands and fill tables.
269
        fill_ontology()
270
271
        # Fill sex tables
272
        fill_DictSex()
273
274
        # fill DictRoles table
275
        fill_DictRoles()
276
277
        # import custom species
278
        fill_Species()
279
280
        # import synonyms
281
        fill_SpeciesAndSynonyms()
282
283
        # import organizations
284
        fill_Organization()
285
286
        # import organisms
287
        fill_OrganismParts()
288