Passed
Push — master ( c17a7d...ea11a0 )
by Osma
02:27 queued 13s
created

annif.lexical.util.make_collection_matrix()   A

Complexity

Conditions 4

Size

Total Lines 16
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 11
nop 2
dl 0
loc 16
rs 9.85
c 0
b 0
f 0
1
"""Utility methods for lexical algorithms"""
2
3
import collections
4
from rdflib import URIRef
5
from rdflib.namespace import SKOS
6
import numpy as np
7
from scipy.sparse import lil_matrix, csc_matrix
8
9
10
def get_subject_labels(graph, uri, properties, language):
11
    return [str(label)
12
            for prop in properties
13
            for label in graph.objects(URIRef(uri), prop)
14
            if label.language == language]
15
16
17
def make_relation_matrix(graph, vocab, property):
18
    n_subj = len(vocab.subjects)
19
    matrix = lil_matrix((n_subj, n_subj), dtype=np.bool)
20
21
    for subj, obj in graph.subject_objects(property):
22
        subj_id = vocab.subjects.by_uri(str(subj), warnings=False)
23
        obj_id = vocab.subjects.by_uri(str(obj), warnings=False)
24
        if subj_id is not None and obj_id is not None:
25
            matrix[subj_id, obj_id] = True
26
27
    return csc_matrix(matrix)
28
29
30
def make_collection_matrix(graph, vocab):
31
    # make an index with all collection members
32
    c_members = collections.defaultdict(list)
33
    for coll, member in graph.subject_objects(SKOS.member):
34
        member_id = vocab.subjects.by_uri(str(member), warnings=False)
35
        if member_id is not None:
36
            c_members[str(coll)].append(member_id)
37
38
    c_matrix = lil_matrix((len(c_members), len(vocab.subjects)),
39
                          dtype=np.bool)
40
41
    # populate the matrix for collection -> subject_id
42
    for c_id, members in enumerate(c_members.values()):
43
        c_matrix[c_id, members] = True
44
45
    return csc_matrix(c_matrix)
46