Passed
Push — develop ( 1f6ef3...42aeec )
by Koen
01:25
created

atramhasis.scripts.import_file.main()   A

Complexity

Conditions 3

Size

Total Lines 55
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 18
dl 0
loc 55
rs 9.5
c 0
b 0
f 0
cc 3
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
import argparse
2
import csv
3
import json
4
import os
5
import sys
6
7
from rdflib import Graph
8
from rdflib.util import SUFFIX_FORMAT_MAP
9
from rdflib.util import guess_format
10
from skosprovider.providers import DictionaryProvider
11
from skosprovider.providers import SimpleCsvProvider
12
from skosprovider.uri import UriPatternGenerator
13
from skosprovider_rdf.providers import RDFProvider
14
from skosprovider_sqlalchemy.models import ConceptScheme
15
from skosprovider_sqlalchemy.models import Label
16
from skosprovider_sqlalchemy.models import Note
17
from skosprovider_sqlalchemy.models import Source
18
from skosprovider_sqlalchemy.utils import import_provider
19
from sqlalchemy import create_engine
20
from sqlalchemy.engine import url
21
from sqlalchemy.orm import sessionmaker
22
23
24
def file_to_rdf_provider(**kwargs):
25
    """
26
    Create RDF provider from the input file
27
    """
28
    input_file = kwargs.get('input_file')
29
    input_name, input_ext = os.path.splitext(os.path.basename(input_file))
30
    graph = Graph()
31
    graph.parse(input_file, format=guess_format(input_ext))
32
    return RDFProvider(
33
        {'id': input_name.upper()},
34
        graph
35
    )
36
37
38
def file_to_csv_provider(**kwargs):
39
    """
40
    Create CSV provider from the input file
41
    """
42
    input_file = kwargs.get('input_file')
43
    input_name, input_ext = os.path.splitext(os.path.basename(input_file))
44
    with open(input_file) as ifile:
45
        reader = csv.reader(ifile)
46
        uri_pattern = kwargs.get('uri_pattern')
47
        provider_kwargs = {'uri_generator': UriPatternGenerator(uri_pattern)} if uri_pattern else {}
48
        return SimpleCsvProvider(
49
            {'id': input_name.upper()},
50
            reader,
51
            **provider_kwargs
52
        )
53
54
55
def file_to_json_provider(**kwargs):
56
    """
57
    Create Dictionary provider from the input file
58
    """
59
    input_file = kwargs.get('input_file')
60
    input_name, input_ext = os.path.splitext(os.path.basename(input_file))
61
    with open(input_file) as data_file:
62
        dictionary = json.load(data_file)
63
    uri_pattern = kwargs.get('uri_pattern')
64
    provider_kwargs = {'uri_generator': UriPatternGenerator(uri_pattern)} if uri_pattern else {}
65
    return DictionaryProvider(
66
        {'id': input_name.upper()},
67
        dictionary,
68
        **provider_kwargs
69
    )
70
71
72
supported_types = {
73
    'JSON': {
74
        'extensions': ['.json'],
75
        'file_to_provider': file_to_json_provider
76
    },
77
    'RDF': {
78
        'extensions': ['.%s' % suffix for suffix in SUFFIX_FORMAT_MAP],
79
        'file_to_provider': file_to_rdf_provider
80
    },
81
    'CSV': {
82
        'extensions': ['.csv'],
83
        'file_to_provider': file_to_csv_provider
84
    }
85
}
86
87
supported_ext = [item for sublist in [supported_types[filetype]['extensions'] for filetype in supported_types.keys()]
88
                 for item in sublist]
89
90
91
def parse_argv_for_import(argv):
92
    """
93
    Parse parameters and validate
94
    """
95
    cmd = os.path.basename(argv[0])
96
    parser = argparse.ArgumentParser(
97
        description='Import file to a database',
98
        epilog=(
99
            f'example: {cmd} '
100
            '--from atramhasis/scripts/my_file '
101
            '--to sqlite:///atramhasis.sqlite '
102
            '--conceptscheme_label Labels '
103
            '--conceptscheme_uri urn:x-skosprovider:trees '
104
            '--uri_pattern urn:x-skosprovider:trees:%s'
105
        )
106
    )
107
    parser.add_argument('--from',
108
                        dest='input_file',
109
                        type=str,
110
                        help='local path to the input file',
111
                        required=True
112
                        )
113
    parser.add_argument('--to',
114
                        dest='to',
115
                        metavar='conn_string',
116
                        type=str,
117
                        help='Connection string of the output database',
118
                        required=False,
119
                        default='sqlite:///atramhasis.sqlite'
120
                        )
121
    parser.add_argument('--conceptscheme_label',
122
                        dest='cs_label',
123
                        type=str,
124
                        help='Label of the conceptscheme',
125
                        required=False,
126
                        default=None
127
                        )
128
    parser.add_argument('--conceptscheme_uri',
129
                        dest='cs_uri',
130
                        type=str,
131
                        help='URI of the conceptscheme',
132
                        required=False,
133
                        default=None
134
                        )
135
    parser.add_argument('--uri_pattern',
136
                        dest='uri_pattern',
137
                        type=str,
138
                        help='URI pattern input for the URIGenerator',
139
                        required=False,
140
                        default=None
141
                        )
142
    args = parser.parse_args()
143
    if not validate_file(args.input_file) or not validate_connection_string(args.to):
144
        sys.exit(1)
145
    return args
146
147
148
def validate_file(input_file):
149
    if not os.path.exists(input_file):
150
        print(f'The input file {input_file} does not exists')
151
        return False
152
    elif os.path.splitext(input_file)[1] not in supported_ext:
153
        print(f'the input file {input_file} is not supported. Allowed extensions are: {supported_ext}')
154
        return False
155
    else:
156
        return True
157
158
159
def validate_connection_string(connection_string):
160
    """
161
    Validate the connection string
162
    :param connection_string
163
    :return: Boolean True if correct connection string
164
    """
165
    u = url.make_url(connection_string)
166
    if u.drivername == 'postgresql':
167
        if u.username and u.password and u.host and u.port and u.database:
168
            return True
169
    elif u.drivername == 'sqlite':
170
        if u.database:
171
            return True
172
    elif u.drivername:
173
        print('The database driver ' + u.drivername + ' is not supported.')
174
    print('Wrong structure of connection string "' + connection_string + '"')
175
    print('Structure: postgresql://username:password@host:port/db_name OR sqlite:///path/db_name.sqlite')
176
    return False
177
178
179
def conn_str_to_session(conn_str):
180
    """
181
    create session from database connection string
182
    """
183
    connect_uri = conn_str
184
    engine = create_engine(connect_uri, echo=True)
185
    return sessionmaker(
186
        bind=engine,
187
    )()
188
189
190
def create_conceptscheme(conceptscheme_label, conceptscheme_uri):
191
    """
192
    Configure output conceptscheme based on arg values
193
    """
194
    cs = ConceptScheme(uri=conceptscheme_uri)
195
    label = Label(conceptscheme_label, 'prefLabel', 'und')
196
    cs.labels.append(label)
197
    return cs
198
199
200
def create_conceptscheme_from_skos(conceptscheme):
201
    """
202
    Configure output conceptscheme based on a `skosprovider.skos.ConceptScheme`
203
    """
204
    return ConceptScheme(
205
        uri=conceptscheme.uri,
206
        labels=[
207
            Label(label.label, label.type, label.language)
208
            for label in conceptscheme.labels
209
        ],
210
        notes=[
211
            Note(n.note, n.type, n.language, n.markup)
212
            for n in conceptscheme.notes
213
        ],
214
        sources=[
215
            Source(s.citation, s.markup)
216
            for s in conceptscheme.sources
217
        ],
218
        languages=[
219
            language for language in conceptscheme.languages
220
        ]
221
    )
222
223
224
def provider_to_db(provider, conceptscheme, session):
225
    """
226
    import provider data into the database
227
    """
228
    session.add(conceptscheme)
229
    import_provider(provider, conceptscheme, session)
230
    session.commit()
231
232
233
def main(argv=sys.argv):
234
    """
235
    Documentation: import -h
236
    Run: import --from <path_input_file> --to <conn_string> --conceptscheme_label <cs_label> --conceptscheme_uri <cs_uri> --uri_pattern <uri_pattern>
237
238
    example path_input_file:
239
     atramhasis/scripts/my_file
240
241
    structure conn_string:
242
     postgresql://username:password@host:port/db_name
243
     sqlite:///path/db_name.sqlite
244
    default conn_string:
245
     sqlite:///atramhasis.sqlite
246
247
    example conceptscheme_label
248
     My Conceptscheme
249
    default conceptscheme_label is the name of the file if a URI is specified.
250
    If no URI is specified, a conceptscheme will be imported from the input
251
    file. This only works for RDf files. For other file types (JSON and CSV)
252
    conceptscheme_uri is mandatory and conceptscheme_label is recommended.
253
    """
254
255
    # Import the data
256
    args = parse_argv_for_import(argv)
257
    input_name, input_ext = os.path.splitext(os.path.basename(args.input_file))
258
    session = conn_str_to_session(args.to)
259
    file_to_provider_function = [supported_types[filetype]['file_to_provider'] for filetype in supported_types.keys()
260
                                 if input_ext in supported_types[filetype]['extensions']][0]
261
    provider = file_to_provider_function(**vars(args))
262
    if args.cs_uri:
263
        cs_uri = args.cs_uri
264
        cs_label = args.cs_label if args.cs_label else input_name.capitalize()
265
        cs = create_conceptscheme(cs_label, cs_uri)
266
    else:
267
        cs = create_conceptscheme_from_skos(provider.concept_scheme)
268
    provider_to_db(provider, cs, session)
269
270
    # Get info to return to the user
271
    prov_id = input_name.upper()
272
    scheme_id = cs.id
273
    print("\n\n*** The import of conceptscheme {0} from the {1} file to {2} was succesful. ***\
274
          \n\nTo use the data in Atramhasis, you must edit the file my_thesaurus/skos/__init__.py.\
275
          \nAdd a configuration similar to:\
276
            \n\ndef create_registry(request):\
277
            \n\t# create the SKOS registry\
278
            \n\tregistry = Registry(instance_scope='threaded_thread')\
279
            \n\t{3} = SQLAlchemyProvider(\
280
            \n\t\t{{'id': '{4}', 'conceptscheme_id': {5}}},\
281
            \n\t\trequest.db\
282
            \n\t)\
283
            \n\tregistry.register_provider({6})\
284
            \n\treturn registry\
285
            \n\n".
286
          format(prov_id, args.input_file, args.to,
287
                 prov_id.replace(' ', '_'), prov_id, scheme_id, prov_id.replace(' ', '_')))
288
289
290
if __name__ == '__main__':
291
    main()