| Conditions | 17 |
| Total Lines | 134 |
| Code Lines | 104 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like atramhasis.scripts.dump_rdf.main() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | import logging |
||
| 21 | def main(): |
||
| 22 | description = """\ |
||
| 23 | Dump all conceptschemes to files. Will serialise as Turtle and RDF/XML format. |
||
| 24 | """ |
||
| 25 | usage = "usage: %prog config_uri" |
||
| 26 | parser = optparse.OptionParser( |
||
| 27 | usage=usage, |
||
| 28 | description=textwrap.dedent(description) |
||
| 29 | ) |
||
| 30 | parser.add_option( |
||
| 31 | '-l', '--location', dest='dump_location', type='string', |
||
| 32 | help='Specify where to dump the conceptschemes. If not specified, this \ |
||
| 33 | is set to the atramhasis.dump_location from your ini file.' |
||
| 34 | ) |
||
| 35 | parser.add_option( |
||
| 36 | '-r', '--rdf2hdt', dest='rdf2hdt', type='string', default=False, |
||
| 37 | help='Specify where the rdf2hdt command can be found. If not specified, this \ |
||
| 38 | is set to atramhasis.rdf2hdt from your ini file.' |
||
| 39 | ) |
||
| 40 | |||
| 41 | options, args = parser.parse_args(sys.argv[1:]) |
||
| 42 | |||
| 43 | if not len(args) >= 1: |
||
| 44 | log.error('You must provide at least one argument.') |
||
| 45 | return 2 |
||
| 46 | |||
| 47 | config_uri = args[0] |
||
| 48 | |||
| 49 | env = bootstrap(config_uri) |
||
| 50 | setup_logging(config_uri) |
||
| 51 | |||
| 52 | dump_location = options.dump_location |
||
| 53 | if dump_location is None: |
||
| 54 | dump_location = env['registry'].settings.get( |
||
| 55 | 'atramhasis.dump_location', |
||
| 56 | os.path.abspath(os.path.dirname(config_uri)) |
||
| 57 | ) |
||
| 58 | if not os.access(dump_location, os.W_OK | os.X_OK): |
||
| 59 | log.error('Dump location "' + dump_location + '" is not writable.') |
||
| 60 | return 2 |
||
| 61 | |||
| 62 | rdf2hdt = options.rdf2hdt |
||
| 63 | if not rdf2hdt: |
||
| 64 | rdf2hdt = env['registry'].settings.get( |
||
| 65 | 'atramhasis.rdf2hdt', |
||
| 66 | False |
||
| 67 | ) |
||
| 68 | |||
| 69 | request = env['request'] |
||
| 70 | |||
| 71 | if hasattr(request, 'skos_registry') and request.skos_registry is not None: |
||
| 72 | skos_registry = request.skos_registry |
||
| 73 | else: |
||
| 74 | raise SkosRegistryNotFoundException() # pragma: no cover |
||
| 75 | |||
| 76 | counts = [] |
||
| 77 | |||
| 78 | files = [] |
||
| 79 | |||
| 80 | for p in skos_registry.get_providers(): |
||
| 81 | if any([not_shown in p.get_metadata()['subject'] for not_shown in ['external']]): |
||
| 82 | continue |
||
| 83 | start_time = time.time() |
||
| 84 | pid = p.get_metadata()['id'] |
||
| 85 | filename = os.path.join(dump_location, '%s-full' % pid) |
||
| 86 | filename_ttl = '%s.ttl' % filename |
||
| 87 | filename_rdf = '%s.rdf' % filename |
||
| 88 | files.append(filename_ttl) |
||
| 89 | log.info('Generating dump for %s' % pid) |
||
| 90 | graph = utils.rdf_dumper(p) |
||
| 91 | triples = len(graph) |
||
| 92 | log.info('Number of triples in Graph: %d' % triples) |
||
| 93 | csuri = URIRef(p.concept_scheme.uri) |
||
| 94 | cs_triples = len(list(graph.predicate_objects(csuri))) |
||
| 95 | log.info('Number of triples in Conceptscheme: %d' % cs_triples) |
||
| 96 | count_concepts = len(list(graph.subjects(RDF.type, SKOS.Concept))) |
||
| 97 | count_collections = len(list(graph.subjects(RDF.type, SKOS.Collection))) |
||
| 98 | try: |
||
| 99 | avg_concept_triples = ((triples - cs_triples) / |
||
| 100 | (count_concepts + count_collections)) |
||
| 101 | except ZeroDivisionError: |
||
| 102 | avg_concept_triples = 0 |
||
| 103 | log.info('Average number of triples per concept: %d' % avg_concept_triples) |
||
| 104 | counts.append({ |
||
| 105 | 'conceptscheme_id': pid, |
||
| 106 | 'triples': triples, |
||
| 107 | 'conceptscheme_triples': cs_triples, |
||
| 108 | 'avg_concept_triples': avg_concept_triples |
||
| 109 | }) |
||
| 110 | log.info(f'Dumping {pid} to Turtle: {filename_ttl}') |
||
| 111 | graph.serialize(destination=filename_ttl, format='turtle') |
||
| 112 | log.info(f'Dumping {pid} to RDFxml: {filename_rdf}') |
||
| 113 | graph.serialize(destination=filename_rdf, format='pretty-xml') |
||
| 114 | del graph |
||
| 115 | log.info(f'--- {(time.time() - start_time)} seconds ---') |
||
| 116 | |||
| 117 | log.info('All files dumped to %s' % dump_location) |
||
| 118 | |||
| 119 | if rdf2hdt: |
||
| 120 | from subprocess import check_call, CalledProcessError |
||
| 121 | parsing_error = False |
||
| 122 | for f in files: |
||
| 123 | log.info(f'Converting {f} to hdt') |
||
| 124 | hdtf = f.replace('.ttl', '.hdt') |
||
| 125 | try: |
||
| 126 | check_call([rdf2hdt, '-f', 'turtle', f, hdtf]) |
||
| 127 | except (FileNotFoundError, CalledProcessError) as e: |
||
| 128 | # Turtle failed, let's try rdfxml |
||
| 129 | parsing_error = True |
||
| 130 | log.warning(f'rdf2hdt for file {f} failed with error {e}. Trying rdfxml...') |
||
| 131 | rdff = f.replace('.ttl', '.rdf') |
||
| 132 | try: |
||
| 133 | check_call([rdf2hdt, '-f', 'rdfxml', rdff, hdtf]) |
||
| 134 | except (FileNotFoundError, CalledProcessError) as e: |
||
| 135 | # rdfxml failed |
||
| 136 | log.error(f'rdfxml for file {f} failed with error {e}') |
||
| 137 | if parsing_error: |
||
| 138 | log.error('Error during rdf2hdt conversion. Check logs for more information.') |
||
| 139 | else: |
||
| 140 | log.info(f'All hdt files dumped to {dump_location}') |
||
| 141 | |||
| 142 | with transaction.manager: |
||
| 143 | dbsession = request.registry.dbmaker() |
||
| 144 | manager = CountsManager(dbsession) |
||
| 145 | for c in counts: |
||
| 146 | cs_count = ConceptschemeCounts( |
||
| 147 | conceptscheme_id=c['conceptscheme_id'], |
||
| 148 | triples=c['triples'], |
||
| 149 | conceptscheme_triples=c['conceptscheme_triples'], |
||
| 150 | avg_concept_triples=c['avg_concept_triples'] |
||
| 151 | ) |
||
| 152 | manager.save(cs_count) |
||
| 153 | |||
| 154 | env['closer']() |
||
| 155 |