_rdf_dumper()   F
last analyzed

Complexity

Conditions 21

Size

Total Lines 84

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 1 Features 0
Metric Value
cc 21
c 3
b 1
f 0
dl 0
loc 84
rs 2.0817

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like _rdf_dumper() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
'''
3
This module contains utility functions for dealing with skos providers.
4
'''
5
from __future__ import unicode_literals
6
import logging
7
import warnings
8
import sys
9
10
log = logging.getLogger(__name__)
11
12
from rdflib import Graph, Literal, Namespace
13
from rdflib.term import URIRef, BNode
14
from rdflib.namespace import RDF, SKOS, DCTERMS, VOID
15
SKOS_THES = Namespace('http://purl.org/iso25964/skos-thes#')
16
from skosprovider.skos import (
17
    Concept,
18
    Collection
19
)
20
21
from xml.dom.minidom import Node, Element
22
import html5lib
23
24
PY3 = sys.version_info[0] == 3
25
26
if PY3:  # pragma: no cover
27
    binary_type = bytes
28
else:  # pragma: no cover
29
    binary_type = str
30
31
32
def rdf_dumper(provider):
33
    '''
34
    Dump a provider to a format that can be passed to a
35
    :class:`skosprovider.providers.RDFProvider`.
36
37
    :param skosprovider.providers.VocabularyProvider provider: The provider
38
        that wil be turned into an :class:`rdflib.graph.Graph`.
39
40
    :rtype: :class:`rdflib.graph.Graph`
41
    '''
42
    return _rdf_dumper(provider, None)
43
44
45
def rdf_c_dumper(provider, c):
46
    '''
47
    Dump one concept or collection from a provider to a format that can be passed to a
48
    :class:`skosprovider.providers.RDFProvider`.
49
50
    :param skosprovider.providers.VocabularyProvider provider: The provider
51
        that wil be turned into an :class:`rdflib.graph.Graph`.
52
53
    :param String c: identifier
54
55
    :rtype: :class:`rdflib.graph.Graph`
56
    '''
57
    return _rdf_dumper(provider, [c])
58
59
60
def _rdf_dumper(provider, id_list=None):
61
    '''
62
    Dump a provider to a format that can be passed to a
63
    :class:`skosprovider.providers.RDFProvider`.
64
65
    :param skosprovider.providers.VocabularyProvider provider: The provider
66
        that wil be turned into an :class:`rdflib.graph.Graph`.
67
68
    :param List id_list: List of id's of the data to dump.
69
70
    :rtype: :class:`rdflib.graph.Graph`
71
    '''
72
    graph = Graph()
73
    graph.namespace_manager.bind("skos", SKOS)
74
    graph.namespace_manager.bind("dcterms", DCTERMS)
75
    graph.namespace_manager.bind("skos-thes", SKOS_THES)
76
    graph.namespace_manager.bind("void", VOID)
77
    conceptscheme = URIRef(provider.concept_scheme.uri)
78
    _add_in_dataset(graph, conceptscheme, provider)
79
    graph.add((conceptscheme, RDF.type, SKOS.ConceptScheme))
80
    graph.add((conceptscheme, DCTERMS.identifier, Literal(provider.metadata['id'])))
81
    _add_labels(graph, provider.concept_scheme, conceptscheme)
82
    _add_notes(graph, provider.concept_scheme, conceptscheme)
83
    _add_sources(graph, provider.concept_scheme, conceptscheme)
84
    _add_languages(graph, provider.concept_scheme, conceptscheme)
85
    # Add triples using store's add method.
86
    if not id_list:
87
        id_list = [x['id'] for x in provider.get_all()]
88
        for c in provider.get_top_concepts():
89
            graph.add((conceptscheme, SKOS.hasTopConcept, URIRef(c['uri'])))
90
    for id in id_list:
91
        c = provider.get_by_id(id)
92
        subject = URIRef(c.uri)
93
        _add_in_dataset(graph, subject, provider)
94
        graph.add((subject, DCTERMS.identifier, Literal(c.id)))
95
        graph.add((subject, SKOS.inScheme, conceptscheme))
96
        _add_labels(graph, c, subject)
97
        _add_notes(graph, c, subject)
98
        _add_sources(graph, c, subject)
99
        if isinstance(c, Concept):
100
            graph.add((subject, RDF.type, SKOS.Concept))
101
            for b in c.broader:
102
                broader = provider.get_by_id(b)
103
                if broader:
104
                    graph.add((subject, SKOS.broader, URIRef(broader.uri)))
105
                else:
106
                    warnings.warn(_warning(b), UserWarning)
107
            for n in c.narrower:
108
                narrower = provider.get_by_id(n)
109
                if narrower:
110
                    graph.add((subject, SKOS.narrower, URIRef(narrower.uri)))
111
                else:
112
                    warnings.warn(_warning(n), UserWarning)
113
            for r in c.related:
114
                related = provider.get_by_id(r)
115
                if related:
116
                    graph.add((subject, SKOS.related, URIRef(related.uri)))
117
                else:
118
                    warnings.warn(_warning(r), UserWarning)
119
            for s in c.subordinate_arrays:
120
                subordinate_array = provider.get_by_id(s)
121
                if subordinate_array:
122
                    graph.add((subject, SKOS_THES.subordinateArray, URIRef(subordinate_array.uri)))
123
                else:
124
                    warnings.warn(_warning(s), UserWarning)
125
            for k in c.matches.keys():
126
                for uri in c.matches[k]:
127
                    graph.add((subject, URIRef(SKOS + k +'Match'), URIRef(uri)))
128
        elif isinstance(c, Collection):
129
            graph.add((subject, RDF.type, SKOS.Collection))
130
            for m in c.members:
131
                member = provider.get_by_id(m)
132
                if member:
133
                    graph.add((subject, SKOS.member, URIRef(member.uri)))
134
                else:
135
                    warnings.warn(_warning(m), UserWarning)
136
            for s in c.superordinates:
137
                superordinate = provider.get_by_id(s)
138
                if superordinate:
139
                    graph.add((subject, SKOS_THES.superOrdinate, URIRef(superordinate.uri)))
140
                else:
141
                    warnings.warn(_warning(s), UserWarning)
142
143
    return graph
144
145
146
def rdf_conceptscheme_dumper(provider):
147
    '''
148
    Dump all information of the conceptscheme of a provider to a format that can be passed to a
149
    :class:`skosprovider.providers.RDFProvider`.
150
151
    :param skosprovider.providers.VocabularyProvider provider: The provider
152
        that wil be turned into an :class:`rdflib.graph.Graph`.
153
154
    :rtype: :class:`rdflib.graph.Graph`
155
    '''
156
    graph = Graph()
157
    graph.namespace_manager.bind("skos", SKOS)
158
    graph.namespace_manager.bind("dcterms", DCTERMS)
159
    graph.namespace_manager.bind("skos-thes", SKOS_THES)
160
    graph.namespace_manager.bind("void", VOID)
161
    conceptscheme = URIRef(provider.concept_scheme.uri)
162
    _add_in_dataset(graph, conceptscheme, provider)
163
    graph.add((conceptscheme, RDF.type, SKOS.ConceptScheme))
164
    graph.add((conceptscheme, DCTERMS.identifier, Literal(provider.metadata['id'])))
165
    _add_labels(graph, provider.concept_scheme, conceptscheme)
166
    _add_notes(graph, provider.concept_scheme, conceptscheme)
167
    _add_sources(graph, provider.concept_scheme, conceptscheme)
168
    _add_languages(graph, provider.concept_scheme, conceptscheme)
169
    for c in provider.get_top_concepts():
170
        graph.add((conceptscheme, SKOS.hasTopConcept, URIRef(c['uri'])))
171
172
    return graph
173
174
175
def _warning(id):
176
    return 'id %s could not be resolved' % id
177
178
179
def _add_in_dataset(graph, subject, provider):
180
    '''
181
    Checks if the provider says something about a dataset and if so adds 
182
    void.inDataset statements.
183
184
    :param rdflib.graph.Graph graph: The graph to add statements to.
185
    :param rdflib.term.URIRef subject: The subject to add an inDataset statement to.
186
    :param skosprovider.providers.VocabularyProvider provider:
187
    '''
188
189
    duri = provider.get_metadata().get('dataset', {}).get('uri', None)
190
    if duri:
191
        graph.add((subject, VOID.inDataset, URIRef(duri)))
192
193
194
def _add_labels(graph, c, subject):
195
    for l in c.labels:
196
        predicate = URIRef(SKOS + l.type)
197
        lang = extract_language(l.language)
198
        graph.add((subject, predicate, Literal(l.label, lang=lang)))
199
200
201
def _add_notes(graph, c, subject):
202
    for n in c.notes:
203
        predicate = URIRef(SKOS + n.type)
204
        lang = extract_language(n.language)
205
        if n.markup is None:
206
            graph.add((subject, predicate, Literal(n.note, lang=lang)))
207
        else:
208
            html = _add_lang_to_html(n.note, lang)
209
            graph.add((subject, predicate, Literal(html, datatype=RDF.HTML)))
210
211
def _add_lang_to_html(htmltext, lang):
212
    '''
213
    Take a piece of HTML and add an xml:lang attribute to it.
214
    '''
215
    if lang == 'und':
216
        return htmltext
217
    parser = html5lib.HTMLParser(
218
        tree=html5lib.treebuilders.getTreeBuilder("dom")
219
    )
220
    html = parser.parseFragment(htmltext)
221
    html.normalize()
222
    if len(html.childNodes) == 0:
223
        return '<div xml:lang="%s"></div>' % lang
224
    elif len(html.childNodes) == 1:
225
        node = html.firstChild
226
        if node.nodeType == Node.TEXT_NODE:
227
            div = Element('div')
228
            div.ownerDocument = html
229
            div.setAttribute('xml:lang', lang)
230
            div.childNodes = [node]
231
            html.childNodes = [div]
232
        else:
233
            node.setAttribute('xml:lang', lang)
234
    else:
235
        #add a single encompassing div
236
        div = Element('div')
237
        div.ownerDocument = html
238
        div.setAttribute('xml:lang', lang)
239
        div.childNodes = html.childNodes
240
        html.childNodes = [div]
241
    return html.toxml()
242
243
def _add_sources(graph, c, subject):
244
    '''
245
    Add sources to the RDF graph.
246
247
    :param rdflib.graph.Graph graph: An RDF Graph.
248
    :param c: A :class:`skosprovider.skos.ConceptScheme`, 
249
        :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Collection`
250
    :param subject: The RDF subject to add the sources to.
251
    '''
252
    for s in c.sources:
253
        source = BNode()
254
        graph.add((source, RDF.type, DCTERMS.BibliographicResource))
255
        if s.markup is None:
256
            graph.add((source, DCTERMS.bibliographicCitation, Literal(s.citation)))
257
        else:
258
            graph.add((source, DCTERMS.bibliographicCitation, Literal(s.citation, datatype=RDF.HTML)))
259
        graph.add((subject, DCTERMS.source, source))
260
261
def _add_languages(graph, c, subject):
262
    '''
263
    Add languages to the RDF graph.
264
265
    :param rdflib.graph.Graph graph: An RDF Graph.
266
    :param c: A :class:`skosprovider.skos.ConceptScheme`.
267
    :param subject: The RDF subject to add the sources to.
268
    '''
269
    for l in c.languages:
270
        lang = extract_language(l)
271
        graph.add((subject, DCTERMS.language, Literal(l)))
272
273
def extract_language(lang):
274
    '''
275
    Turn a language in our domain model into a IANA tag.
276
    '''
277
    if lang is None:
278
        lang = 'und'  # return undefined code when no language
279
    else:
280
        lang = text_(lang, encoding="UTF-8")
281
    return lang
282
283
284
def text_(s, encoding='latin-1', errors='strict'):
285
    """ If ``s`` is an instance of ``binary_type``, return
286
    ``s.decode(encoding, errors)``, otherwise return ``s``"""
287
    if isinstance(s, binary_type):
288
        return s.decode(encoding, errors)
289
    return s
290
291
def _df_writexml(self, writer, indent="", addindent="", newl=""):
292
    '''
293
    Monkeypatch method for unexisting `writexml` in
294
    :class:`xml.dom.minidom.DocumentFragment`.
295
    '''
296
    # indent = current indentation
297
    # addindent = indentation to add to higher levels
298
    # newl = newline string
299
    if self.childNodes:
300
        if (len(self.childNodes) == 1 and
301
            self.childNodes[0].nodeType == Node.TEXT_NODE):
302
            self.childNodes[0].writexml(writer, '', '', '')
303
        else:
304
            for node in self.childNodes:
305
                node.writexml(writer, indent+addindent, addindent, newl)
306
307
from xml.dom.minidom import DocumentFragment
308
DocumentFragment.writexml = _df_writexml
309