annif.vocab.rules   A
last analyzed

Complexity

Total Complexity 25

Size/Duplication

Total Lines 107
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 74
dl 0
loc 107
rs 10
c 0
b 0
f 0
wmc 25

7 Functions

Rating   Name   Duplication   Size   Complexity  
D kwargs_to_exclude_uris() 0 45 12
A uris_by_type() 0 6 2
A remove_uris() 0 6 3
A uris_by_scheme() 0 6 2
A resolve_uri_or_curie() 0 7 2
A uris_by_collection() 0 6 2
A add_uris() 0 5 2
1
"""Support for exclude/include rules for subject vocabularies"""
2
3
from rdflib import RDF, Graph, URIRef
4
from rdflib.namespace import SKOS
5
6
import annif
7
from annif.exception import ConfigurationException
8
9
from .vocab import AnnifVocabulary
10
11
logger = annif.logger
12
13
14
def resolve_uri_or_curie(graph: Graph, value: str) -> URIRef:
15
    try:
16
        # Try to expand as CURIE using the graph's namespace manager
17
        return graph.namespace_manager.expand_curie(value)
18
    except ValueError:
19
        # Not a CURIE or prefix not defined; treat as full URI
20
        return URIRef(value)
21
22
23
def uris_by_type(graph: Graph, type_: str, action: str) -> list[str]:
24
    type_uri = resolve_uri_or_curie(graph, type_)
25
    uris = [str(uri) for uri in graph.subjects(RDF.type, type_uri)]
26
    if not uris:
27
        logger.warning(f"{action}: no concepts found with type {type_uri}")
28
    return uris
29
30
31
def uris_by_scheme(graph: Graph, scheme: str, action: str) -> list[str]:
32
    scheme_uri = resolve_uri_or_curie(graph, scheme)
33
    uris = [str(uri) for uri in graph.subjects(SKOS.inScheme, scheme_uri)]
34
    if not uris:
35
        logger.warning(f"{action}: no concepts found in scheme {scheme_uri}")
36
    return uris
37
38
39
def uris_by_collection(graph: Graph, collection: str, action: str) -> list[str]:
40
    collection_uri = resolve_uri_or_curie(graph, collection)
41
    uris = [str(uri) for uri in graph.objects(collection_uri, SKOS.member)]
42
    if not uris:
43
        logger.warning(f"{action}: no concepts found in collection {collection_uri}")
44
    return uris
45
46
47
def add_uris(
48
    graph: Graph, uris_func: callable, uris_set: set[str], vals: list[str], action: str
49
) -> None:
50
    for val in vals:
51
        uris_set.update(uris_func(graph, val, action))
52
53
54
def remove_uris(
55
    graph: Graph, uris_func: callable, uris_set: set[str], vals: list[str], action: str
56
) -> None:
57
    for val in vals:
58
        for uri in uris_func(graph, val, action):
59
            uris_set.discard(uri)
60
61
62
def kwargs_to_exclude_uris(vocab: AnnifVocabulary, kwargs: dict[str, str]) -> set[str]:
63
    exclude_uris = set()
64
    actions = {
65
        "exclude": lambda vals: exclude_uris.update(
66
            vals
67
            if "*" not in vals
68
            else uris_by_type(vocab.as_graph(), "skos:Concept", "exclude")
69
        ),
70
        "exclude_type": lambda vals: add_uris(
71
            vocab.as_graph(), uris_by_type, exclude_uris, vals, "exclude_type"
72
        ),
73
        "exclude_scheme": lambda vals: add_uris(
74
            vocab.as_graph(), uris_by_scheme, exclude_uris, vals, "exclude_scheme"
75
        ),
76
        "exclude_collection": lambda vals: add_uris(
77
            vocab.as_graph(),
78
            uris_by_collection,
79
            exclude_uris,
80
            vals,
81
            "exclude_collection",
82
        ),
83
        "include": lambda vals: exclude_uris.difference_update(vals),
84
        "include_type": lambda vals: remove_uris(
85
            vocab.as_graph(), uris_by_type, exclude_uris, vals, "include_type"
86
        ),
87
        "include_scheme": lambda vals: remove_uris(
88
            vocab.as_graph(), uris_by_scheme, exclude_uris, vals, "include_scheme"
89
        ),
90
        "include_collection": lambda vals: remove_uris(
91
            vocab.as_graph(),
92
            uris_by_collection,
93
            exclude_uris,
94
            vals,
95
            "include_collection",
96
        ),
97
    }
98
99
    for key, value in kwargs.items():
100
        vals = value.split("|")
101
        if key in actions:
102
            actions[key](vals)
103
        else:
104
            raise ConfigurationException(f"unknown vocab keyword argument {key}")
105
106
    return exclude_uris
107