Passed
Push — master ( 7dd908...6601a1 )
by Osma
02:57
created

annif.util.localname()   A

Complexity

Conditions 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 5
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
"""Utility functions for Annif"""
2
3
import glob
4
import os
5
import tempfile
6
import numpy as np
7
from annif import logger
8
from annif.hit import VectorAnalysisResult
9
10
11
def atomic_save(obj, dirname, filename, method=None):
12
    """Save the given object (which must have a .save() method, unless the
13
    method parameter is given) into the given directory with the given
14
    filename, using a temporary file and renaming the temporary file to the
15
    final name."""
16
17
    tempfd, tempfilename = tempfile.mkstemp(prefix=filename, dir=dirname)
18
    os.close(tempfd)
19
    logger.debug('saving %s to temporary file %s', str(obj), tempfilename)
20
    if method is not None:
21
        method(obj, tempfilename)
22
    else:
23
        obj.save(tempfilename)
24
    for fn in glob.glob(tempfilename + '*'):
25
        newname = fn.replace(tempfilename, os.path.join(dirname, filename))
26
        logger.debug('renaming temporary file %s to %s', fn, newname)
27
        os.rename(fn, newname)
28
29
30
def cleanup_uri(uri):
31
    """remove angle brackets from a URI, if any"""
32
    if uri.startswith('<') and uri.endswith('>'):
33
        return uri[1:-1]
34
    return uri
35
36
37
def merge_hits(weighted_hits, subject_index):
38
    """Merge hits from multiple sources. Input is a sequence of WeightedHits
39
    objects. A SubjectIndex is needed to convert between subject IDs and URIs.
40
    Returns an AnalysisResult object."""
41
42
    weights = [whit.weight for whit in weighted_hits]
43
    scores = [whit.hits.vector for whit in weighted_hits]
44
    result = np.average(scores, axis=0, weights=weights)
45
    return VectorAnalysisResult(result, subject_index)
46
47
48
def parse_sources(sourcedef):
49
    """parse a source definition such as 'src1:1.0,src2' into a sequence of
50
    tuples (src_id, weight)"""
51
52
    sources = []
53
    for srcdef in sourcedef.strip().split(','):
54
        srcval = srcdef.strip().split(':')
55
        src_id = srcval[0]
56
        if len(srcval) > 1:
57
            weight = float(srcval[1])
58
        else:
59
            weight = 1.0
60
        sources.append((src_id, weight))
61
    return sources
62