Passed
Pull Request — master (#496)
by
unknown
02:00
created

annif.util   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 98
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 60
dl 0
loc 98
rs 10
c 0
b 0
f 0
wmc 17

7 Functions

Rating   Name   Duplication   Size   Complexity  
A cleanup_uri() 0 5 3
A merge_hits() 0 9 1
A atomic_save() 0 19 3
A parse_sources() 0 16 3
A boolean() 0 6 1
A parse_args() 0 16 5
A identity() 0 3 1
1
"""Utility functions for Annif"""
2
3
import glob
4
import os
5
import os.path
6
import tempfile
7
import numpy as np
8
from annif import logger
9
from annif.suggestion import VectorSuggestionResult
10
11
12
def atomic_save(obj, dirname, filename, method=None):
13
    """Save the given object (which must have a .save() method, unless the
14
    method parameter is given) into the given directory with the given
15
    filename, using a temporary file and renaming the temporary file to the
16
    final name."""
17
18
    prefix, suffix = os.path.splitext(filename)
19
    tempfd, tempfilename = tempfile.mkstemp(
20
        prefix=prefix, suffix=suffix, dir=dirname)
21
    os.close(tempfd)
22
    logger.debug('saving %s to temporary file %s', str(obj)[:90], tempfilename)
23
    if method is not None:
24
        method(obj, tempfilename)
25
    else:
26
        obj.save(tempfilename)
27
    for fn in glob.glob(tempfilename + '*'):
28
        newname = fn.replace(tempfilename, os.path.join(dirname, filename))
29
        logger.debug('renaming temporary file %s to %s', fn, newname)
30
        os.rename(fn, newname)
31
32
33
def cleanup_uri(uri):
34
    """remove angle brackets from a URI, if any"""
35
    if uri.startswith('<') and uri.endswith('>'):
36
        return uri[1:-1]
37
    return uri
38
39
40
def merge_hits(weighted_hits, subject_index):
41
    """Merge hits from multiple sources. Input is a sequence of WeightedSuggestion
42
    objects. A SubjectIndex is needed to convert between subject IDs and URIs.
43
    Returns an SuggestionResult object."""
44
45
    weights = [whit.weight for whit in weighted_hits]
46
    scores = [whit.hits.as_vector(subject_index) for whit in weighted_hits]
47
    result = np.average(scores, axis=0, weights=weights)
48
    return VectorSuggestionResult(result)
49
50
51
def parse_sources(sourcedef):
52
    """parse a source definition such as 'src1:1.0,src2' into a sequence of
53
    tuples (src_id, weight)"""
54
55
    sources = []
56
    totalweight = 0.0
57
    for srcdef in sourcedef.strip().split(','):
58
        srcval = srcdef.strip().split(':')
59
        src_id = srcval[0]
60
        if len(srcval) > 1:
61
            weight = float(srcval[1])
62
        else:
63
            weight = 1.0
64
        sources.append((src_id, weight))
65
        totalweight += weight
66
    return [(srcid, weight / totalweight) for srcid, weight in sources]
67
68
69
def parse_args(param_string):
70
    """Parse a string of comma separated arguments such as '42,43,key=abc' into
71
    a list of positional args [42, 43] and a dict of keyword args {key: abc}"""
72
73
    if not param_string:
74
        return [], {}
75
    posargs = []
76
    kwargs = {}
77
    param_strings = param_string.split(',')
78
    for p_string in param_strings:
79
        parts = p_string.split('=')
80
        if len(parts) == 1:
81
            posargs.append(p_string)
82
        elif len(parts) == 2:
83
            kwargs[parts[0]] = parts[1]
84
    return posargs, kwargs
85
86
87
def boolean(val):
88
    """Convert the given value to a boolean True/False value, if it isn't already.
89
    True values are '1', 'yes', 'true', and 'on' (case insensitive), everything
90
    else is False."""
91
92
    return str(val).lower() in ('1', 'yes', 'true', 'on')
93
94
95
def identity(x):
96
    """Identity function: return the given argument unchanged"""
97
    return x
98