Completed
Pull Request — master (#344)
by Osma
06:44
created

annif.corpus.skos.serialize_subjects_to_skos()   A

Complexity

Conditions 2

Size

Total Lines 12
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 9
dl 0
loc 12
rs 9.95
c 0
b 0
f 0
cc 2
nop 3
1
"""Support for subjects loaded from a SKOS/RDF file"""
2
3
import shutil
4
import rdflib
5
import rdflib.util
6
from rdflib.namespace import SKOS, RDF, OWL
7
from .types import Subject, SubjectCorpus
8
9
10
def serialize_subjects_to_skos(subjects, language, path):
11
    """Create a SKOS representation of the given subjects and serialize it
12
    into a SKOS/Turtle file with the given path name."""
13
14
    graph = rdflib.Graph()
15
    graph.namespace_manager.bind('skos', SKOS)
16
    for subject in subjects:
17
        graph.add((rdflib.URIRef(subject.uri), RDF.type, SKOS.Concept))
18
        graph.add((rdflib.URIRef(subject.uri),
19
                   SKOS.prefLabel,
20
                   rdflib.Literal(subject.label, language)))
21
    graph.serialize(destination=path, format='turtle')
22
23
24
class SubjectFileSKOS(SubjectCorpus):
25
    """A subject corpus that uses SKOS files"""
26
27
    def __init__(self, path, language):
28
        self.path = path
29
        self.language = language
30
        self.graph = rdflib.Graph()
31
        self.graph.load(self.path, format=rdflib.util.guess_format(self.path))
32
33
    @property
34
    def subjects(self):
35
        for concept in self.graph.subjects(RDF.type, SKOS.Concept):
36
            if (concept, OWL.deprecated, rdflib.Literal(True)) in self.graph:
37
                continue
38
            labels = self.graph.preferredLabel(concept, lang=self.language)
39
            if not labels:
40
                continue
41
            label = str(labels[0][1])
42
            yield Subject(uri=str(concept), label=label, text=None)
43
44
    @staticmethod
45
    def is_rdf_file(path):
46
        """return True if the path looks like an RDF file that can be loaded
47
        as SKOS"""
48
49
        fmt = rdflib.util.guess_format(path)
50
        return fmt is not None
51
52
    def save_skos(self, path, language):
53
        """Save the contents of the subject vocabulary into a SKOS/Turtle
54
        file with the given path name."""
55
56
        if self.path.endswith('.ttl'):
57
            # input is already in Turtle syntax, no need to reserialize
58
            shutil.copyfile(self.path, path)
59
        else:
60
            # need to serialize into Turtle
61
            self.graph.serialize(destination=path, format='turtle')
62