Completed
Push — master ( 0e2ea9...d8c485 )
by Koen
01:09
created

_add_in_dataset()   A

Complexity

Conditions 2

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
c 0
b 0
f 0
dl 0
loc 4
rs 10
1
# -*- coding: utf-8 -*-
2
'''
3
This module contains utility functions for dealing with skos providers.
4
'''
5
from __future__ import unicode_literals
6
import logging
7
import warnings
8
import sys
9
10
log = logging.getLogger(__name__)
11
12
from rdflib import Graph, Literal, Namespace
13
from rdflib.term import URIRef, BNode
14
from rdflib.namespace import RDF, SKOS, DCTERMS, VOID
15
SKOS_THES = Namespace('http://purl.org/iso25964/skos-thes#')
16
from skosprovider.skos import (
17
    Concept,
18
    Collection
19
)
20
21
from xml.dom.minidom import Node, Element
22
import html5lib
23
24
PY3 = sys.version_info[0] == 3
25
26
if PY3:  # pragma: no cover
27
    binary_type = bytes
28
else:  # pragma: no cover
29
    binary_type = str
30
31
32
def rdf_dumper(provider):
33
    '''
34
    Dump a provider to a format that can be passed to a
35
    :class:`skosprovider.providers.RDFProvider`.
36
37
    :param skosprovider.providers.VocabularyProvider provider: The provider
38
        that wil be turned into an :class:`rdflib.graph.Graph`.
39
40
    :rtype: :class:`rdflib.graph.Graph`
41
    '''
42
    return _rdf_dumper(provider, None)
43
44
45
def rdf_c_dumper(provider, c):
46
    '''
47
    Dump one concept or collection from a provider to a format that can be passed to a
48
    :class:`skosprovider.providers.RDFProvider`.
49
50
    :param skosprovider.providers.VocabularyProvider provider: The provider
51
        that wil be turned into an :class:`rdflib.graph.Graph`.
52
53
    :param String c: identifier
54
55
    :rtype: :class:`rdflib.graph.Graph`
56
    '''
57
    return _rdf_dumper(provider, [c])
58
59
60
def _rdf_dumper(provider, id_list=None):
61
    '''
62
    Dump a provider to a format that can be passed to a
63
    :class:`skosprovider.providers.RDFProvider`.
64
65
    :param skosprovider.providers.VocabularyProvider provider: The provider
66
        that wil be turned into an :class:`rdflib.graph.Graph`.
67
68
    :param List id_list: List of id's of the data to dump.
69
70
    :rtype: :class:`rdflib.graph.Graph`
71
    '''
72
    graph = Graph()
73
    graph.namespace_manager.bind("skos", SKOS)
74
    graph.namespace_manager.bind("dcterms", DCTERMS)
75
    graph.namespace_manager.bind("skos-thes", SKOS_THES)
76
    conceptscheme = URIRef(provider.concept_scheme.uri)
77
    _add_in_dataset(graph, conceptscheme, provider)
78
    graph.add((conceptscheme, RDF.type, SKOS.ConceptScheme))
79
    graph.add((conceptscheme, DCTERMS.identifier, Literal(provider.metadata['id'])))
80
    _add_labels(graph, provider.concept_scheme, conceptscheme)
81
    _add_notes(graph, provider.concept_scheme, conceptscheme)
82
    _add_sources(graph, provider.concept_scheme, conceptscheme)
83
    _add_languages(graph, provider.concept_scheme, conceptscheme)
84
    # Add triples using store's add method.
85
    if not id_list:
86
        id_list = [x['id'] for x in provider.get_all()]
87
        for c in provider.get_top_concepts():
88
            graph.add((conceptscheme, SKOS.hasTopConcept, URIRef(c['uri'])))
89
    for id in id_list:
90
        c = provider.get_by_id(id)
91
        subject = URIRef(c.uri)
92
        _add_in_dataset(graph, subject, provider)
93
        graph.add((subject, DCTERMS.identifier, Literal(c.id)))
94
        graph.add((subject, SKOS.inScheme, conceptscheme))
95
        _add_labels(graph, c, subject)
96
        _add_notes(graph, c, subject)
97
        _add_sources(graph, c, subject)
98
        if isinstance(c, Concept):
99
            graph.add((subject, RDF.type, SKOS.Concept))
100
            for b in c.broader:
101
                broader = provider.get_by_id(b)
102
                if broader:
103
                    graph.add((subject, SKOS.broader, URIRef(broader.uri)))
104
                else:
105
                    warnings.warn(_warning(b), UserWarning)
106
            for n in c.narrower:
107
                narrower = provider.get_by_id(n)
108
                if narrower:
109
                    graph.add((subject, SKOS.narrower, URIRef(narrower.uri)))
110
                else:
111
                    warnings.warn(_warning(n), UserWarning)
112
            for r in c.related:
113
                related = provider.get_by_id(r)
114
                if related:
115
                    graph.add((subject, SKOS.related, URIRef(related.uri)))
116
                else:
117
                    warnings.warn(_warning(r), UserWarning)
118
            for s in c.subordinate_arrays:
119
                subordinate_array = provider.get_by_id(s)
120
                if subordinate_array:
121
                    graph.add((subject, SKOS_THES.subordinateArray, URIRef(subordinate_array.uri)))
122
                else:
123
                    warnings.warn(_warning(s), UserWarning)
124
            for k in c.matches.keys():
125
                for uri in c.matches[k]:
126
                    graph.add((subject, URIRef(SKOS + k +'Match'), URIRef(uri)))
127
        elif isinstance(c, Collection):
128
            graph.add((subject, RDF.type, SKOS.Collection))
129
            for m in c.members:
130
                member = provider.get_by_id(m)
131
                if member:
132
                    graph.add((subject, SKOS.member, URIRef(member.uri)))
133
                else:
134
                    warnings.warn(_warning(m), UserWarning)
135
            for s in c.superordinates:
136
                superordinate = provider.get_by_id(s)
137
                if superordinate:
138
                    graph.add((subject, SKOS_THES.superOrdinate, URIRef(superordinate.uri)))
139
                else:
140
                    warnings.warn(_warning(s), UserWarning)
141
142
    return graph
143
144
145
def rdf_conceptscheme_dumper(provider):
146
    '''
147
    Dump all information of the conceptscheme of a provider to a format that can be passed to a
148
    :class:`skosprovider.providers.RDFProvider`.
149
150
    :param skosprovider.providers.VocabularyProvider provider: The provider
151
        that wil be turned into an :class:`rdflib.graph.Graph`.
152
153
    :rtype: :class:`rdflib.graph.Graph`
154
    '''
155
    graph = Graph()
156
    graph.namespace_manager.bind("skos", SKOS)
157
    graph.namespace_manager.bind("dcterms", DCTERMS)
158
    graph.namespace_manager.bind("skos-thes", SKOS_THES)
159
    conceptscheme = URIRef(provider.concept_scheme.uri)
160
    graph.add((conceptscheme, RDF.type, SKOS.ConceptScheme))
161
    graph.add((conceptscheme, DCTERMS.identifier, Literal(provider.metadata['id'])))
162
    _add_labels(graph, provider.concept_scheme, conceptscheme)
163
    _add_notes(graph, provider.concept_scheme, conceptscheme)
164
    _add_sources(graph, provider.concept_scheme, conceptscheme)
165
    _add_languages(graph, provider.concept_scheme, conceptscheme)
166
    for c in provider.get_top_concepts():
167
        graph.add((conceptscheme, SKOS.hasTopConcept, URIRef(c['uri'])))
168
169
    return graph
170
171
172
def _warning(id):
173
    return 'id %s could not be resolved' % id
174
175
176
def _add_in_dataset(graph, subject, provider):
177
    duri = provider.get_metadata().get('dataset', {}).get('uri', None)
178
    if duri:
179
        graph.add((subject, VOID.inDataset, URIRef(duri)))
180
181
182
def _add_labels(graph, c, subject):
183
    for l in c.labels:
184
        predicate = URIRef(SKOS + l.type)
185
        lang = extract_language(l.language)
186
        graph.add((subject, predicate, Literal(l.label, lang=lang)))
187
188
189
def _add_notes(graph, c, subject):
190
    for n in c.notes:
191
        predicate = URIRef(SKOS + n.type)
192
        lang = extract_language(n.language)
193
        if n.markup is None:
194
            graph.add((subject, predicate, Literal(n.note, lang=lang)))
195
        else:
196
            html = _add_lang_to_html(n.note, lang)
197
            graph.add((subject, predicate, Literal(html, datatype=RDF.HTML)))
198
199
def _add_lang_to_html(htmltext, lang):
200
    '''
201
    Take a piece of HTML and add an xml:lang attribute to it.
202
    '''
203
    if lang == 'und':
204
        return htmltext
205
    parser = html5lib.HTMLParser(
206
        tree=html5lib.treebuilders.getTreeBuilder("dom")
207
    )
208
    html = parser.parseFragment(htmltext)
209
    html.normalize()
210
    if len(html.childNodes) == 0:
211
        return '<div xml:lang="%s"></div>' % lang
212
    elif len(html.childNodes) == 1:
213
        node = html.firstChild
214
        if node.nodeType == Node.TEXT_NODE:
215
            div = Element('div')
216
            div.ownerDocument = html
217
            div.setAttribute('xml:lang', lang)
218
            div.childNodes = [node]
219
            html.childNodes = [div]
220
        else:
221
            node.setAttribute('xml:lang', lang)
222
    else:
223
        #add a single encompassing div
224
        div = Element('div')
225
        div.ownerDocument = html
226
        div.setAttribute('xml:lang', lang)
227
        div.childNodes = html.childNodes
228
        html.childNodes = [div]
229
    return html.toxml()
230
231
def _add_sources(graph, c, subject):
232
    '''
233
    Add sources to the RDF graph.
234
235
    :param rdflib.graph.Graph graph: An RDF Graph.
236
    :param c: A :class:`skosprovider.skos.ConceptScheme`, 
237
        :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Collection`
238
    :param subject: The RDF subject to add the sources to.
239
    '''
240
    for s in c.sources:
241
        source = BNode()
242
        graph.add((source, RDF.type, DCTERMS.BibliographicResource))
243
        if s.markup is None:
244
            graph.add((source, DCTERMS.bibliographicCitation, Literal(s.citation)))
245
        else:
246
            graph.add((source, DCTERMS.bibliographicCitation, Literal(s.citation, datatype=RDF.HTML)))
247
        graph.add((subject, DCTERMS.source, source))
248
249
def _add_languages(graph, c, subject):
250
    '''
251
    Add languages to the RDF graph.
252
253
    :param rdflib.graph.Graph graph: An RDF Graph.
254
    :param c: A :class:`skosprovider.skos.ConceptScheme`.
255
    :param subject: The RDF subject to add the sources to.
256
    '''
257
    for l in c.languages:
258
        lang = extract_language(l)
259
        graph.add((subject, DCTERMS.language, Literal(l)))
260
261
def extract_language(lang):
262
    '''
263
    Turn a language in our domain model into a IANA tag.
264
    '''
265
    if lang is None:
266
        lang = 'und'  # return undefined code when no language
267
    else:
268
        lang = text_(lang, encoding="UTF-8")
269
    return lang
270
271
272
def text_(s, encoding='latin-1', errors='strict'):
273
    """ If ``s`` is an instance of ``binary_type``, return
274
    ``s.decode(encoding, errors)``, otherwise return ``s``"""
275
    if isinstance(s, binary_type):
276
        return s.decode(encoding, errors)
277
    return s
278
279
def _df_writexml(self, writer, indent="", addindent="", newl=""):
280
    '''
281
    Monkeypatch method for unexisting `writexml` in
282
    :class:`xml.dom.minidom.DocumentFragment`.
283
    '''
284
    # indent = current indentation
285
    # addindent = indentation to add to higher levels
286
    # newl = newline string
287
    if self.childNodes:
288
        if (len(self.childNodes) == 1 and
289
            self.childNodes[0].nodeType == Node.TEXT_NODE):
290
            self.childNodes[0].writexml(writer, '', '', '')
291
        else:
292
            for node in self.childNodes:
293
                node.writexml(writer, indent+addindent, addindent, newl)
294
295
from xml.dom.minidom import DocumentFragment
296
DocumentFragment.writexml = _df_writexml
297