Conditions | 6 |
Total Lines | 125 |
Code Lines | 87 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.
There are several approaches to avoid long parameter lists:
1 | """ |
||
31 | def run_processor( |
||
32 | processorClass, |
||
33 | mets_url=None, |
||
34 | resolver=None, |
||
35 | workspace=None, |
||
36 | page_id=None, |
||
37 | log_level=None, |
||
38 | input_file_grp=None, |
||
39 | output_file_grp=None, |
||
40 | parameter=None, |
||
41 | working_dir=None, |
||
42 | mets_server_url=None, |
||
43 | instance_caching=False |
||
44 | ): # pylint: disable=too-many-locals |
||
45 | """ |
||
46 | Instantiate a Pythonic processor, open a workspace, run the processor and save the workspace. |
||
47 | |||
48 | If :py:attr:`workspace` is not none, reuse that. Otherwise, instantiate an |
||
49 | :py:class:`~ocrd.Workspace` for :py:attr:`mets_url` (and :py:attr:`working_dir`) |
||
50 | by using :py:meth:`ocrd.Resolver.workspace_from_url` (i.e. open or clone local workspace). |
||
51 | |||
52 | Instantiate a Python object for :py:attr:`processorClass`, passing: |
||
53 | - the workspace, |
||
54 | - :py:attr:`page_id` |
||
55 | - :py:attr:`input_file_grp` |
||
56 | - :py:attr:`output_file_grp` |
||
57 | - :py:attr:`parameter` (after applying any :py:attr:`parameter_override` settings) |
||
58 | |||
59 | Warning: Avoid setting the `instance_caching` flag to True. It may have unexpected side effects. |
||
60 | This flag is used for an experimental feature we would like to adopt in future. |
||
61 | |||
62 | Run the processor on the workspace (creating output files in the filesystem). |
||
63 | |||
64 | Finally, write back the workspace (updating the METS in the filesystem). |
||
65 | |||
66 | Args: |
||
67 | processorClass (object): Python class of the module processor. |
||
68 | """ |
||
69 | if log_level: |
||
70 | setOverrideLogLevel(log_level) |
||
71 | workspace = _get_workspace( |
||
72 | workspace, |
||
73 | resolver, |
||
74 | mets_url, |
||
75 | working_dir, |
||
76 | mets_server_url |
||
77 | ) |
||
78 | log = getLogger('ocrd.processor.helpers.run_processor') |
||
79 | log.debug("Running processor %s", processorClass) |
||
80 | |||
81 | processor = get_processor( |
||
82 | processorClass, |
||
83 | parameter=parameter, |
||
84 | workspace=None, |
||
85 | page_id=page_id, |
||
86 | input_file_grp=input_file_grp, |
||
87 | output_file_grp=output_file_grp, |
||
88 | instance_caching=instance_caching |
||
89 | ) |
||
90 | |||
91 | ocrd_tool = processor.ocrd_tool |
||
92 | name = '%s v%s' % (ocrd_tool['executable'], processor.version) |
||
93 | otherrole = ocrd_tool.get('steps', [''])[0] |
||
94 | logProfile = getLogger('ocrd.process.profile') |
||
95 | log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole) |
||
96 | t0_wall = perf_counter() |
||
97 | t0_cpu = process_time() |
||
98 | t0_os = times() |
||
99 | if any(x in config.OCRD_PROFILE for x in ['RSS', 'PSS']): |
||
100 | backend = 'psutil_pss' if 'PSS' in config.OCRD_PROFILE else 'psutil' |
||
101 | from memory_profiler import memory_usage # pylint: disable=import-outside-toplevel |
||
102 | try: |
||
103 | mem_usage = memory_usage(proc=(processor.process_workspace, [workspace], {}), |
||
104 | # only run process once |
||
105 | max_iterations=1, |
||
106 | interval=.1, timeout=None, timestamps=True, |
||
107 | # include sub-processes |
||
108 | multiprocess=True, include_children=True, |
||
109 | # get proportional set size instead of RSS |
||
110 | backend=backend) |
||
111 | except Exception as err: |
||
112 | log.exception("Failure in processor '%s'" % ocrd_tool['executable']) |
||
113 | raise err |
||
114 | mem_usage_values = [mem for mem, _ in mem_usage] |
||
115 | mem_output = 'memory consumption: ' |
||
116 | mem_output += sparkline(mem_usage_values) |
||
117 | mem_output += ' max: %.2f MiB min: %.2f MiB' % (max(mem_usage_values), min(mem_usage_values)) |
||
118 | logProfile.info(mem_output) |
||
119 | else: |
||
120 | try: |
||
121 | processor.process_workspace(workspace) |
||
122 | except Exception as err: |
||
123 | log.exception("Failure in processor '%s'" % ocrd_tool['executable']) |
||
124 | raise err |
||
125 | |||
126 | t1_wall = perf_counter() - t0_wall |
||
127 | t1_cpu = process_time() - t0_cpu |
||
128 | t1_os = times() |
||
129 | # add CPU time from child processes (page worker etc) |
||
130 | t1_cpu += t1_os.children_user - t0_os.children_user |
||
131 | t1_cpu += t1_os.children_system - t0_os.children_system |
||
132 | logProfile.info( |
||
133 | "Executing processor '%s' took %fs (wall) %fs (CPU)( " |
||
134 | "[--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']", |
||
135 | ocrd_tool['executable'], |
||
136 | t1_wall, |
||
137 | t1_cpu, |
||
138 | processor.input_file_grp or '', |
||
139 | processor.output_file_grp or '', |
||
140 | json.dumps(processor.parameter) or '', |
||
141 | processor.page_id or '' |
||
142 | ) |
||
143 | workspace.mets.add_agent( |
||
144 | name=name, |
||
145 | _type='OTHER', |
||
146 | othertype='SOFTWARE', |
||
147 | role='OTHER', |
||
148 | otherrole=otherrole, |
||
149 | notes=[({'option': 'input-file-grp'}, processor.input_file_grp or ''), |
||
150 | ({'option': 'output-file-grp'}, processor.output_file_grp or ''), |
||
151 | ({'option': 'parameter'}, json.dumps(processor.parameter or '')), |
||
152 | ({'option': 'page-id'}, processor.page_id or '')] |
||
153 | ) |
||
154 | workspace.save_mets() |
||
155 | return processor |
||
156 | |||
277 |