|
1
|
|
|
"""Support for subjects loaded from a SKOS/RDF file""" |
|
2
|
|
|
|
|
3
|
|
|
import shutil |
|
4
|
|
|
import rdflib |
|
5
|
|
|
import rdflib.util |
|
6
|
|
|
from rdflib.namespace import SKOS, RDF, OWL |
|
7
|
|
|
from .types import Subject, SubjectCorpus |
|
8
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
def serialize_subjects_to_skos(subjects, language, path): |
|
11
|
|
|
"""Create a SKOS representation of the given subjects and serialize it |
|
12
|
|
|
into a SKOS/Turtle file with the given path name.""" |
|
13
|
|
|
|
|
14
|
|
|
graph = rdflib.Graph() |
|
15
|
|
|
graph.namespace_manager.bind('skos', SKOS) |
|
16
|
|
|
for subject in subjects: |
|
17
|
|
|
graph.add((rdflib.URIRef(subject.uri), RDF.type, SKOS.Concept)) |
|
18
|
|
|
graph.add((rdflib.URIRef(subject.uri), |
|
19
|
|
|
SKOS.prefLabel, |
|
20
|
|
|
rdflib.Literal(subject.label, language))) |
|
21
|
|
|
graph.add((rdflib.URIRef(subject.uri), |
|
22
|
|
|
SKOS.notation, |
|
23
|
|
|
rdflib.Literal(subject.notation))) |
|
24
|
|
|
graph.serialize(destination=path, format='turtle') |
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
class SubjectFileSKOS(SubjectCorpus): |
|
28
|
|
|
"""A subject corpus that uses SKOS files""" |
|
29
|
|
|
|
|
30
|
|
|
def __init__(self, path, language): |
|
31
|
|
|
self.path = path |
|
32
|
|
|
self.language = language |
|
33
|
|
|
self.graph = rdflib.Graph() |
|
34
|
|
|
self.graph.load(self.path, format=rdflib.util.guess_format(self.path)) |
|
35
|
|
|
|
|
36
|
|
|
@property |
|
37
|
|
|
def subjects(self): |
|
38
|
|
|
for concept in self.concepts: |
|
39
|
|
|
labels = self.graph.preferredLabel(concept, lang=self.language) |
|
40
|
|
|
notation = self.graph.value(concept, SKOS.notation, None, any=True) |
|
41
|
|
|
if not labels: |
|
42
|
|
|
continue |
|
43
|
|
|
label = str(labels[0][1]) |
|
44
|
|
|
if notation is not None: |
|
45
|
|
|
notation = str(notation) |
|
46
|
|
|
yield Subject(uri=str(concept), label=label, notation=notation, |
|
47
|
|
|
text=None) |
|
48
|
|
|
|
|
49
|
|
|
@property |
|
50
|
|
|
def concepts(self): |
|
51
|
|
|
for concept in self.graph.subjects(RDF.type, SKOS.Concept): |
|
52
|
|
|
if (concept, OWL.deprecated, rdflib.Literal(True)) in self.graph: |
|
53
|
|
|
continue |
|
54
|
|
|
yield concept |
|
55
|
|
|
|
|
56
|
|
|
def get_concept_labels(self, concept, label_types, language): |
|
57
|
|
|
return [str(label) |
|
58
|
|
|
for label_type in label_types |
|
59
|
|
|
for label in self.graph.objects(concept, label_type) |
|
60
|
|
|
if label.language == language] |
|
61
|
|
|
|
|
62
|
|
|
@staticmethod |
|
63
|
|
|
def is_rdf_file(path): |
|
64
|
|
|
"""return True if the path looks like an RDF file that can be loaded |
|
65
|
|
|
as SKOS""" |
|
66
|
|
|
|
|
67
|
|
|
fmt = rdflib.util.guess_format(path) |
|
68
|
|
|
return fmt is not None |
|
69
|
|
|
|
|
70
|
|
|
def save_skos(self, path, language): |
|
71
|
|
|
"""Save the contents of the subject vocabulary into a SKOS/Turtle |
|
72
|
|
|
file with the given path name.""" |
|
73
|
|
|
|
|
74
|
|
|
if self.path.endswith('.ttl'): |
|
75
|
|
|
# input is already in Turtle syntax, no need to reserialize |
|
76
|
|
|
shutil.copyfile(self.path, path) |
|
77
|
|
|
else: |
|
78
|
|
|
# need to serialize into Turtle |
|
79
|
|
|
self.graph.serialize(destination=path, format='turtle') |
|
80
|
|
|
|