Passed
Push — master ( c88b19...443080 )
by Koen
02:19
created

atramhasis.scripts.import_file.validate_connection_string()   C

Complexity

Conditions 10

Size

Total Lines 18
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 18
rs 5.9999
c 0
b 0
f 0
cc 10
nop 1

How to fix   Complexity   

Complexity

Complex classes like atramhasis.scripts.import_file.validate_connection_string() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import argparse
2
import csv
3
import json
4
import os
5
import sys
6
7
from rdflib import Graph
8
from rdflib.util import SUFFIX_FORMAT_MAP
9
from rdflib.util import guess_format
10
from skosprovider.providers import DictionaryProvider
11
from skosprovider.providers import SimpleCsvProvider
12
from skosprovider.skos import ConceptScheme
13
from skosprovider.uri import UriPatternGenerator
14
from skosprovider_rdf.providers import RDFProvider
15
from skosprovider_sqlalchemy.utils import import_provider
16
from sqlalchemy import create_engine
17
from sqlalchemy.engine import url
18
from sqlalchemy.orm import sessionmaker
19
20
from atramhasis.data.models import Provider
21
from atramhasis.scripts.migrate_sqlalchemy_providers import json_serial
22
23
24
def file_to_rdf_provider(**kwargs) -> RDFProvider:
25
    """
26
    Create RDF provider from the input file
27
    """
28
    input_file = kwargs.get('input_file')
29
    input_name, input_ext = os.path.splitext(os.path.basename(input_file))
30
    meta_id = kwargs.get("provider_id") or input_name.upper()
31
    graph = Graph()
32
    graph.parse(input_file, format=guess_format(input_ext))
33
    return RDFProvider(
34
        {'id': meta_id},
35
        graph
36
    )
37
38
39
def _create_provider_kwargs(**kwargs):
40
    provider_kwargs = {}
41
    uri_pattern = kwargs.get('uri_pattern')
42
    if uri_pattern:
43
        provider_kwargs['uri_generator'] =  UriPatternGenerator(uri_pattern)
44
    concept_scheme = kwargs.get('concept_scheme')
45
    if concept_scheme:
46
        provider_kwargs['concept_scheme'] = concept_scheme
47
    return provider_kwargs
48
49
50
def file_to_csv_provider(**kwargs) -> SimpleCsvProvider:
51
    """
52
    Create CSV provider from the input file
53
    """
54
    input_file = kwargs.get('input_file')
55
    input_name, input_ext = os.path.splitext(os.path.basename(input_file))
56
    meta_id = kwargs.get("provider_id") or input_name.upper()
57
    provider_kwargs = _create_provider_kwargs(**kwargs)
58
    with open(input_file) as ifile:
59
        reader = csv.reader(ifile)
60
        return SimpleCsvProvider(
61
            {'id': meta_id},
62
            reader,
63
            **provider_kwargs
64
        )
65
66
67
def file_to_json_provider(**kwargs) -> DictionaryProvider:
68
    """
69
    Create Dictionary provider from the input file
70
    """
71
    input_file = kwargs.get('input_file')
72
    input_name, input_ext = os.path.splitext(os.path.basename(input_file))
73
    meta_id = kwargs.get("provider_id") or input_name.upper()
74
    provider_kwargs = _create_provider_kwargs(**kwargs)
75
    with open(input_file) as data_file:
76
        dictionary = json.load(data_file)
77
    return DictionaryProvider(
78
        {'id': meta_id},
79
        dictionary,
80
        **provider_kwargs
81
    )
82
83
84
supported_types = {
85
    'JSON': {
86
        'extensions': ['.json'],
87
        'file_to_provider': file_to_json_provider
88
    },
89
    'RDF': {
90
        'extensions': ['.%s' % suffix for suffix in SUFFIX_FORMAT_MAP],
91
        'file_to_provider': file_to_rdf_provider
92
    },
93
    'CSV': {
94
        'extensions': ['.csv'],
95
        'file_to_provider': file_to_csv_provider
96
    }
97
}
98
99
supported_ext = [item for sublist in [supported_types[filetype]['extensions'] for filetype in supported_types.keys()]
100
                 for item in sublist]
101
102
103
def parse_argv_for_import(argv):
104
    """
105
    Parse parameters and validate
106
    """
107
    cmd = os.path.basename(argv[0])
108
    parser = argparse.ArgumentParser(
109
        description='Import file to a database',
110
        epilog=(
111
            f'example: {cmd} '
112
            'atramhasis/scripts/my_file '
113
            'urn:x-skosprovider:trees:%s '
114
            '--to sqlite:///atramhasis.sqlite '
115
            '--conceptscheme-label Labels '
116
            '--conceptscheme-uri urn:x-skosprovider:trees '
117
            '--create-provider '
118
            '--provider-id ERFGOEDTYPES '
119
            '--id-generation-strategy numeric'
120
        )
121
    )
122
    parser.add_argument(
123
        'input_file',
124
        type=str,
125
        help='local path to the input file',
126
    )
127
    parser.add_argument(
128
        'uri_pattern',
129
        type=str,
130
        help='URI pattern input for the URIGenerator',
131
    )
132
    parser.add_argument(
133
        '--to',
134
        dest='to',
135
        metavar='conn_string',
136
        type=str,
137
        help='Connection string of the output database',
138
        required=False,
139
        default='sqlite:///atramhasis.sqlite'
140
    )
141
    parser.add_argument(
142
        '--conceptscheme-label',
143
        dest='cs_label',
144
        type=str,
145
        help='Label of the conceptscheme',
146
        required=False,
147
        default=None
148
    )
149
    parser.add_argument(
150
        '--conceptscheme-uri',
151
        dest='cs_uri',
152
        type=str,
153
        help='URI of the conceptscheme',
154
        required=False,
155
        default=None
156
    )
157
    parser.add_argument(
158
        '--create-provider',
159
        dest='create_provider',
160
        default=True,
161
        action=argparse.BooleanOptionalAction,
162
        help='An optional parameter if given a provider is created. '
163
             'Use --no-create-provider to not create a provider',
164
    )
165
    parser.add_argument(
166
        '--provider-id',
167
        dest='provider_id',
168
        type=str,
169
        help='An optional string (eg. ERFGOEDTYPES) to be assigned to the provider id. '
170
             'If not specified, assign the conceptscheme id to the provider id',
171
        required=False,
172
        default=None
173
    )
174
    parser.add_argument(
175
        '--id-generation-strategy',
176
        dest='id_generation_strategy',
177
        type=str,
178
        help='URI pattern input for the URIGenerator',
179
        required=False,
180
        choices=["numeric", "guid", "manual"],
181
        default="numeric"
182
    )
183
    args = parser.parse_args()
184
    if not validate_file(args.input_file) or not validate_connection_string(args.to):
185
        sys.exit(1)
186
    return args
187
188
189
def validate_file(input_file):
190
    if not os.path.exists(input_file):
191
        print(f'The input file {input_file} does not exists')
192
        return False
193
    elif os.path.splitext(input_file)[1] not in supported_ext:
194
        print(f'the input file {input_file} is not supported. Allowed extensions are: {supported_ext}')
195
        return False
196
    else:
197
        return True
198
199
200
def validate_connection_string(connection_string):
201
    """
202
    Validate the connection string
203
    :param connection_string
204
    :return: Boolean True if correct connection string
205
    """
206
    u = url.make_url(connection_string)
207
    if u.drivername == 'postgresql':
208
        if u.username and u.password and u.host and u.port and u.database:
209
            return True
210
    elif u.drivername == 'sqlite':
211
        if u.database:
212
            return True
213
    elif u.drivername:
214
        print('The database driver ' + u.drivername + ' is not supported.')
215
    print('Wrong structure of connection string "' + connection_string + '"')
216
    print('Structure: postgresql://username:password@host:port/db_name OR sqlite:///path/db_name.sqlite')
217
    return False
218
219
220
def conn_str_to_session(conn_str):
221
    """
222
    create session from database connection string
223
    """
224
    connect_uri = conn_str
225
    engine = create_engine(connect_uri, echo=True)
226
    return sessionmaker(
227
        bind=engine,
228
    )()
229
230
231
def create_conceptscheme(conceptscheme_uri: str, conceptscheme_label: str) -> ConceptScheme:
232
    """
233
    Create a conceptscheme based on arg values
234
    """
235
    return ConceptScheme(
236
            uri=conceptscheme_uri,
237
            labels = [{'label': conceptscheme_label}]
238
        )
239
240
241
def main(argv=sys.argv):
242
    """
243
    Documentation: import -h
244
    Run: import
245
    <path_input_file>
246
    <uri_pattern>
247
    --to <conn_string>
248
    --conceptscheme-uri <cs_uri>
249
    --conceptscheme-label <cs_label>
250
    --create-provider
251
    --provider-id <provider_id>
252
    --id-generation-strategy <numeric/guid/manual>
253
254
    example path_input_file:
255
     atramhasis/scripts/my_file
256
257
    structure conn_string:
258
     postgresql://username:password@host:port/db_name
259
     sqlite:///path/db_name.sqlite
260
    default conn_string:
261
     sqlite:///atramhasis.sqlite
262
263
    example conceptscheme_label
264
     My Conceptscheme
265
    default conceptscheme_label is the name of the file if a URI is specified.
266
    If no URI is specified, a conceptscheme will be imported from the input
267
    file. This only works for RDf files. For other file types (JSON and CSV)
268
    conceptscheme_uri is mandatory and conceptscheme_label is recommended.
269
    """
270
271
    # Import the data
272
    args = parse_argv_for_import(argv)
273
    input_name, input_ext = os.path.splitext(os.path.basename(args.input_file))
274
    session = conn_str_to_session(args.to)
275
    file_to_provider_function = [
276
        supported_types[filetype]['file_to_provider']
277
        for filetype in supported_types.keys()
278
        if input_ext in supported_types[filetype]['extensions']
279
    ][0]
280
    if args.cs_uri:
281
        cs_uri = args.cs_uri
282
        cs_label = args.cs_label if args.cs_label else input_name.capitalize()
283
        args.concept_scheme = create_conceptscheme(cs_uri, cs_label)
284
    provider = file_to_provider_function(**vars(args))
285
    cs = import_provider(provider, session)
286
    if args.create_provider:
287
        db_provider = Provider()
288
        provider.metadata[
289
            'atramhasis.id_generation_strategy'
290
        ] = args.id_generation_strategy.upper()
291
        db_provider.meta = json.loads(json.dumps(provider.metadata, default=json_serial))
292
        db_provider.expand_strategy = 'RECURSE'
293
        db_provider.conceptscheme = cs
294
        db_provider.id = args.provider_id or cs.id
295
        db_provider.uri_pattern = args.uri_pattern
296
        if 'conceptscheme_id' in db_provider.meta:
297
            del db_provider.meta['conceptscheme_id']
298
        session.add(db_provider)
299
    session.commit()
300
301
    # Get info to return to the user
302
    scheme_id = cs.id
303
    if not args.create_provider:
304
        prov_id = getattr(args, 'provider_id', None) or input_name.upper()
305
        print(
306
            "\n\n*** The import of conceptscheme {0} from the {1} file to {2} was succesful. ***\
307
              \n\nTo use the data in Atramhasis, you must edit the file my_thesaurus/skos/__init__.py.\
308
              \nAdd a configuration similar to:\
309
                \n\ndef create_registry(request):\
310
                \n\t# create the SKOS registry\
311
                \n\tregistry = Registry(instance_scope='threaded_thread')\
312
                \n\t{3} = SQLAlchemyProvider(\
313
                \n\t\t{{'id': '{4}', 'conceptscheme_id': {5}}},\
314
                \n\t\trequest.db\
315
                \n\t)\
316
                \n\tregistry.register_provider({6})\
317
                \n\treturn registry\
318
                \n\n".
319
            format(
320
                prov_id, args.input_file, args.to,
321
                prov_id.replace(' ', '_'), prov_id, scheme_id, prov_id.replace(' ', '_')
322
            )
323
        )
324
    else:
325
        prov_id = args.provider_id or cs.id
326
        msg = """
327
***
328
The import of conceptscheme {0} from the {1} file to {2} was succesful.
329
You can now continue through the Atramhasis UI.
330
***
331
"""
332
        print(msg.format(prov_id, args.input_file, args.to))
333
334
335
336
if __name__ == '__main__':
337
    main()
338