uid.management.commands.initializedb.standardize_institute_name() - Code Metrics - Inspection of ":twisted_rightwards_arrows: Merge pull request #68..." - cnr-ibba/IMAGE-InjectTool - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 4f7ee6...646424 )

by Paolo

created 2019-11-07 14:18 UTC

standardize_institute_name() B

↳ Parent: uid.management.commands.initializedb

Complexity

Conditions

Size

Total Lines	34
Code Lines	26

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	26
dl	0
loc	34
rs	8.3226
c	0
b	0
f	0
cc	6
nop	1

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 25 15:28:05 2018

@author: Paolo Cozzi <[email protected]>

This django command script need to be called after database initialization.
The aim is to fill tables like ontology tables (roles, sex) in order to upload
data from cryoweb database (or xls template files, or ...)
"""

import collections
import csv
import io
import logging
import os

from image_validation.use_ontology import get_general_breed_by_species

from django.core.management import BaseCommand

from common.constants import OBO_URL, CURATED
from uid.models import (
    DictCountry, DictRole, DictSex, DictSpecie, Ontology, Organization,
    DictUberon)
from language.models import SpecieSynonym

# Get an instance of a logger
logger = logging.getLogger(__name__)


def fill_ontology():
    data = """Library name;Library URI;Comment
PATO;{obo_url};Phenotype And Trait Ontology
LBO;{obo_url};Livestock Breed Ontology
EFO;http://www.ebi.ac.uk/efo;Experimental Factor Ontology
OBI;{obo_url};Ontology for Biomedical Investigations
NCBITaxon;{obo_url};NCBI Taxonomy
UBERON;{obo_url};cross-species ontology covering anatomical structures in animals
GAZ;{obo_url};A gazetteer constructed on ontological principles
NCIT;{obo_url};NCI Thesaurus OBO Edition
""".format(obo_url=OBO_URL)

    handle = io.StringIO(data)
    reader = csv.reader(handle, delimiter=";")

    header = next(reader)

    # fix header
    header = [col.lower().replace(" ", "_") for col in header]

    Data = collections.namedtuple('Data', header)

    for row in map(Data._make, reader):
        # update objects
        ontology, created = Ontology.objects.update_or_create(
                library_name=row.library_name,
                defaults=row._asdict())

        if created is True:
            logger.info("Created: %s" % (ontology))


def fill_DictSex():
    # define three DictSex objects
    male, created = DictSex.objects.get_or_create(
        label='male', term='PATO_0000384')

    if created is True:
        logger.info("Created: %s" % (male))

    female, created = DictSex.objects.get_or_create(
        label='female', term='PATO_0000383')

    if created is True:
        logger.info("Created: %s" % (female))

    unknown, created = DictSex.objects.get_or_create(
        label='record of unknown sex', term='OBI_0000858')

    if created is True:
        logger.info("Created: %s" % (unknown))


# a function to fill up DictRoles
# TODO: need I fill tables with descendants terms?
def fill_DictRoles():
    # define a submitter role
    role, created = DictRole.objects.get_or_create(
        label='submitter', term='EFO_0001741')

    if created is True:
        logger.info("Created: %s" % (role))

    return role


# a function to fill up dictspecie and speciesynonym
def fill_Species():
    """Populate cryoweb dictionary tables"""

    # insert country and get the default language
    language = fill_Countries()

    # those are cryoweb DE species an synonyms
    cryoweb = {
        'Cattle': 'Bos taurus',
        'Chicken': 'Gallus gallus',
        'Deer': 'Cervidae',
        'Duck (domestic)': 'Anas platyrhynchos',
        'Goat': 'Capra hircus',
        'Goose (domestic)': 'Anser anser',
        'Horse': 'Equus caballus',
        'Pig': 'Sus scrofa',
        'Rabbit': 'Oryctolagus cuniculus',
        'Sheep': 'Ovis aries',
        'Turkey': 'Meleagris gallopavo'
    }

    for word, specie in cryoweb.items():
        dictspecie, created = DictSpecie.objects.get_or_create(
            label=specie)

        if created is True:
            logger.info("Created: %s" % (specie))

        # update with general specie
        result = get_general_breed_by_species(specie)

        if result != {}:
            general_breed_label = result['text']
            # split the full part and get the last piece
            general_breed_term = result['ontologyTerms'].split("/")[-1]

            if dictspecie.general_breed_label != general_breed_label:
                logger.info("Added general breed: %s" % (general_breed_label))
                dictspecie.general_breed_label = general_breed_label
                dictspecie.general_breed_term = general_breed_term
                dictspecie.save()

        synonym, created = SpecieSynonym.objects.get_or_create(
            dictspecie=dictspecie,
            language=language,
            word=word)

        if created is True:
            logger.info("Created: %s" % (synonym))


def fill_Countries():
    """Fill countries and return the default country (for languages)"""

    # define the default country for the default language
    united_kingdom, created = DictCountry.objects.get_or_create(
        label='United Kingdom',
        term='NCIT_C17233',
        confidence=CURATED)

    if created is True:
        logger.info("Created: %s" % (united_kingdom))

    # add a country difficult to annotate with zooma
    country, created = DictCountry.objects.get_or_create(
        label='Colombia',
        term='NCIT_C16449',
        confidence=CURATED)

    if created is True:
        logger.info("Created: %s" % (country))

    # I will return default language for translations
    return united_kingdom


def fill_OrganismParts():
    """Fill organism parts with manually curated terms"""

    data = {'strand of hair': "UBERON_0001037"}

    for label, term in data.items():
        dictorganism, created = DictUberon.objects.get_or_create(
            label=label,
            term=term,
            confidence=CURATED
        )

        if created is True:
            logger.info("Created: %s" % (dictorganism))


def standardize_institute_name(original):
    special = {
        'de': 1,
        'la': 1,
        'of': 1,
        'and': 1,
        'y': 1,
        'fuer': 1,
        'del': 1,
        'l': 1,
        'INRA': 1,
        'FAO': 1
    }

    # search space in original (instutute name) if no space is found
    # it is like that institute name will be EBI or IBBA, and will be
    # treated as it is
    if original.find(" ") > -1:
        if original.upper() == original:
            components = original.split(' ')
            # We capitalize the first letter of each component except the first
            # one with the 'title' method and join them together.
            result = ''
            for component in components:
                result = result + ' '
                if component.lower() in special:
                    result = result + component.lower()
                elif component.upper() in special:
                    result = result + component.upper()
                else:
                    result = result + component.title()
            result = result[1:]
            return result
    return original


def fill_Organization():
    """Fill organization table"""

    base_dir = os.path.dirname(os.path.abspath(__file__))
    filename = os.path.join(base_dir, "organization_list.csv")

    # open data file
    handle = open(filename)
    reader = csv.reader(handle, delimiter=";")
    Data = collections.namedtuple('Data', "id name country")

    # get a role
    role = fill_DictRoles()

    for row in map(Data._make, reader):
        # get a country object
        country, created = DictCountry.objects.get_or_create(
            label=row.country)

        if created is True:
            logger.info("Created: %s" % (country))

        # HINT: could be better to fix organization names in organization_list?
        organization, created = Organization.objects.get_or_create(
            name=standardize_institute_name(row.name),
            role=role,
            country=country)

        if created is True:
            logger.info("Created: %s" % (organization))

    handle.close()


class Command(BaseCommand):
    help = 'Fill database tables like roles, sex, etc'

    def handle(self, *args, **options):
        # call commands and fill tables.
        fill_ontology()

        # Fill sex tables
        fill_DictSex()

        # fill DictRoles table
        fill_DictRoles()

        # import synonyms
        fill_Species()

        # import organizations
        fill_Organization()

        # import organisms
        fill_OrganismParts()


1			#!/usr/bin/env python3
2			# -- coding: utf-8 --
3			"""
4			Created on Thu Jan 25 15:28:05 2018
5
6			@author: Paolo Cozzi <[email protected]>
7
8			This django command script need to be called after database initialization.
9			The aim is to fill tables like ontology tables (roles, sex) in order to upload
10			data from cryoweb database (or xls template files, or ...)
11			"""
12
13			import collections
14			import csv
15			import io
16			import logging
17			import os
18
19			from image_validation.use_ontology import get_general_breed_by_species
20
21			from django.core.management import BaseCommand
22
23			from common.constants import OBO_URL, CURATED
24			from uid.models import (
25			DictCountry, DictRole, DictSex, DictSpecie, Ontology, Organization,
26			DictUberon)
27			from language.models import SpecieSynonym
28
29			# Get an instance of a logger
30			logger = logging.getLogger(__name__)
31
32
33			def fill_ontology():
34			data = """Library name;Library URI;Comment
35			PATO;{obo_url};Phenotype And Trait Ontology
36			LBO;{obo_url};Livestock Breed Ontology
37			EFO;http://www.ebi.ac.uk/efo;Experimental Factor Ontology
38			OBI;{obo_url};Ontology for Biomedical Investigations
39			NCBITaxon;{obo_url};NCBI Taxonomy
40			UBERON;{obo_url};cross-species ontology covering anatomical structures in animals
41			GAZ;{obo_url};A gazetteer constructed on ontological principles
42			NCIT;{obo_url};NCI Thesaurus OBO Edition
43			""".format(obo_url=OBO_URL)
44
45			handle = io.StringIO(data)
46			reader = csv.reader(handle, delimiter=";")
47
48			header = next(reader)
49
50			# fix header
51			header = [col.lower().replace(" ", "_") for col in header]
52
53			Data = collections.namedtuple('Data', header)
54
55			for row in map(Data._make, reader):
56			# update objects
57			ontology, created = Ontology.objects.update_or_create(
58			library_name=row.library_name,
59			defaults=row._asdict())
60
61			if created is True:
62			logger.info("Created: %s" % (ontology))
63
64
65			def fill_DictSex():
66			# define three DictSex objects
67			male, created = DictSex.objects.get_or_create(
68			label='male', term='PATO_0000384')
69
70			if created is True:
71			logger.info("Created: %s" % (male))
72
73			female, created = DictSex.objects.get_or_create(
74			label='female', term='PATO_0000383')
75
76			if created is True:
77			logger.info("Created: %s" % (female))
78
79			unknown, created = DictSex.objects.get_or_create(
80			label='record of unknown sex', term='OBI_0000858')
81
82			if created is True:
83			logger.info("Created: %s" % (unknown))
84
85
86			# a function to fill up DictRoles
87			# TODO: need I fill tables with descendants terms?
88			def fill_DictRoles():
89			# define a submitter role
90			role, created = DictRole.objects.get_or_create(
91			label='submitter', term='EFO_0001741')
92
93			if created is True:
94			logger.info("Created: %s" % (role))
95
96			return role
97
98
99			# a function to fill up dictspecie and speciesynonym
100			def fill_Species():
101			"""Populate cryoweb dictionary tables"""
102
103			# insert country and get the default language
104			language = fill_Countries()
105
106			# those are cryoweb DE species an synonyms
107			cryoweb = {
108			'Cattle': 'Bos taurus',
109			'Chicken': 'Gallus gallus',
110			'Deer': 'Cervidae',
111			'Duck (domestic)': 'Anas platyrhynchos',
112			'Goat': 'Capra hircus',
113			'Goose (domestic)': 'Anser anser',
114			'Horse': 'Equus caballus',
115			'Pig': 'Sus scrofa',
116			'Rabbit': 'Oryctolagus cuniculus',
117			'Sheep': 'Ovis aries',
118			'Turkey': 'Meleagris gallopavo'
119			}
120
121			for word, specie in cryoweb.items():
122			dictspecie, created = DictSpecie.objects.get_or_create(
123			label=specie)
124
125			if created is True:
126			logger.info("Created: %s" % (specie))
127
128			# update with general specie
129			result = get_general_breed_by_species(specie)
130
131			if result != {}:
132			general_breed_label = result['text']
133			# split the full part and get the last piece
134			general_breed_term = result['ontologyTerms'].split("/")[-1]
135
136			if dictspecie.general_breed_label != general_breed_label:
137			logger.info("Added general breed: %s" % (general_breed_label))
138			dictspecie.general_breed_label = general_breed_label
139			dictspecie.general_breed_term = general_breed_term
140			dictspecie.save()
141
142			synonym, created = SpecieSynonym.objects.get_or_create(
143			dictspecie=dictspecie,
144			language=language,
145			word=word)
146
147			if created is True:
148			logger.info("Created: %s" % (synonym))
149
150
151			def fill_Countries():
152			"""Fill countries and return the default country (for languages)"""
153
154			# define the default country for the default language
155			united_kingdom, created = DictCountry.objects.get_or_create(
156			label='United Kingdom',
157			term='NCIT_C17233',
158			confidence=CURATED)
159
160			if created is True:
161			logger.info("Created: %s" % (united_kingdom))
162
163			# add a country difficult to annotate with zooma
164			country, created = DictCountry.objects.get_or_create(
165			label='Colombia',
166			term='NCIT_C16449',
167			confidence=CURATED)
168
169			if created is True:
170			logger.info("Created: %s" % (country))
171
172			# I will return default language for translations
173			return united_kingdom
174
175
176			def fill_OrganismParts():
177			"""Fill organism parts with manually curated terms"""
178
179			data = {'strand of hair': "UBERON_0001037"}
180
181			for label, term in data.items():
182			dictorganism, created = DictUberon.objects.get_or_create(
183			label=label,
184			term=term,
185			confidence=CURATED
186			)
187
188			if created is True:
189			logger.info("Created: %s" % (dictorganism))
190
191
192			def standardize_institute_name(original):
193			special = {
194			'de': 1,
195			'la': 1,
196			'of': 1,
197			'and': 1,
198			'y': 1,
199			'fuer': 1,
200			'del': 1,
201			'l': 1,
202			'INRA': 1,
203			'FAO': 1
204			}
205
206			# search space in original (instutute name) if no space is found
207			# it is like that institute name will be EBI or IBBA, and will be
208			# treated as it is
209			if original.find(" ") > -1:
210			if original.upper() == original:
211			components = original.split(' ')
212			# We capitalize the first letter of each component except the first
213			# one with the 'title' method and join them together.
214			result = ''
215			for component in components:
216			result = result + ' '
217			if component.lower() in special:
218			result = result + component.lower()
219			elif component.upper() in special:
220			result = result + component.upper()
221			else:
222			result = result + component.title()
223			result = result[1:]
224			return result
225			return original
226
227
228			def fill_Organization():
229			"""Fill organization table"""
230
231			base_dir = os.path.dirname(os.path.abspath(__file__))
232			filename = os.path.join(base_dir, "organization_list.csv")
233
234			# open data file
235			handle = open(filename)
236			reader = csv.reader(handle, delimiter=";")
237			Data = collections.namedtuple('Data', "id name country")
238
239			# get a role
240			role = fill_DictRoles()
241
242			for row in map(Data._make, reader):
243			# get a country object
244			country, created = DictCountry.objects.get_or_create(
245			label=row.country)
246
247			if created is True:
248			logger.info("Created: %s" % (country))
249
250			# HINT: could be better to fix organization names in organization_list?
251			organization, created = Organization.objects.get_or_create(
252			name=standardize_institute_name(row.name),
253			role=role,
254			country=country)
255
256			if created is True:
257			logger.info("Created: %s" % (organization))
258
259			handle.close()
260
261
262			class Command(BaseCommand):
263			help = 'Fill database tables like roles, sex, etc'
264
265			def handle(self, args, *options):
266			# call commands and fill tables.
267			fill_ontology()
268
269			# Fill sex tables
270			fill_DictSex()
271
272			# fill DictRoles table
273			fill_DictRoles()
274
275			# import synonyms
276			fill_Species()
277
278			# import organizations
279			fill_Organization()
280
281			# import organisms
282			fill_OrganismParts()
283

cnr-ibba / IMAGE-InjectTool

Push — master ( 4f7ee6...646424 )

standardize_institute_name() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like