Conditions | 13 |
Total Lines | 76 |
Code Lines | 60 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like e2edutch.predict.main() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | import sys |
||
108 | def main(args=None): |
||
109 | parser = get_parser() |
||
110 | args = parser.parse_args() |
||
111 | if args.verbose: |
||
112 | logger.setLevel(logging.INFO) |
||
113 | |||
114 | # Input file in .jsonlines format or .conll. |
||
115 | input_filename = args.input_filename |
||
116 | |||
117 | ext_input = os.path.splitext(input_filename)[-1] |
||
118 | if ext_input not in ['.conll', '.jsonlines', '.txt', '.naf']: |
||
119 | raise Exception( |
||
120 | 'Input file should be .naf, .conll, .txt or .jsonlines, but is {}.' |
||
121 | .format(ext_input)) |
||
122 | |||
123 | if ext_input == '.conll': |
||
124 | labels = collections.defaultdict(set) |
||
125 | stats = collections.defaultdict(int) |
||
126 | docs = minimize.minimize_partition( |
||
127 | input_filename, labels, stats, args.word_col) |
||
128 | elif ext_input == '.jsonlines': |
||
129 | docs = read_jsonlines(input_filename) |
||
130 | elif ext_input == '.naf': |
||
131 | naf_obj = naf.get_naf(input_filename) |
||
132 | jsonlines_obj, term_ids, tok_ids = naf.get_jsonlines(naf_obj) |
||
133 | docs = [jsonlines_obj] |
||
134 | else: |
||
135 | text = open(input_filename).read() |
||
136 | docs = [util.create_example(text)] |
||
137 | |||
138 | output_file = args.output_file |
||
139 | |||
140 | config = util.initialize_from_env(cfg_file=args.cfg_file, model_cfg_file=args.config) |
||
141 | predictor = Predictor(config=config) |
||
142 | |||
143 | sentences = {} |
||
144 | predictions = {} |
||
145 | for example_num, example in enumerate(docs): |
||
146 | example["predicted_clusters"], _ = predictor.predict(example) |
||
147 | if args.format_out == 'jsonlines': |
||
148 | output_file.write(json.dumps(example)) |
||
149 | output_file.write("\n") |
||
150 | else: |
||
151 | predictions[example['doc_key']] = example["predicted_clusters"] |
||
152 | sentences[example['doc_key']] = example["sentences"] |
||
153 | if example_num % 100 == 0: |
||
154 | logger.info("Decoded {} examples.".format(example_num + 1)) |
||
155 | if args.format_out == 'conll': |
||
156 | conll.output_conll(output_file, sentences, predictions) |
||
157 | elif args.format_out == 'naf': |
||
158 | # Check number of docs - what to do if multiple? |
||
159 | # Create naf obj if input format was not naf |
||
160 | if ext_input != '.naf': |
||
161 | # To do: add linguistic processing layers for terms and tokens |
||
162 | logger.warn( |
||
163 | 'Outputting NAF when input was not naf,' |
||
164 | + 'no dependency information available') |
||
165 | for doc_key in sentences: |
||
166 | naf_obj, term_ids = naf.get_naf_from_sentences( |
||
167 | sentences[doc_key]) |
||
168 | naf_obj = naf.create_coref_layer( |
||
169 | naf_obj, predictions[doc_key], term_ids) |
||
170 | naf_obj = naf.add_linguistic_processors(naf_obj) |
||
171 | buffer = io.BytesIO() |
||
172 | naf_obj.dump(buffer) |
||
173 | output_file.write(buffer.getvalue().decode('utf-8')) |
||
174 | # To do, make sepearate outputs? |
||
175 | # TO do, use dependency information from conll? |
||
176 | else: |
||
177 | # We only have one input doc |
||
178 | naf_obj = naf.create_coref_layer( |
||
179 | naf_obj, example["predicted_clusters"], term_ids) |
||
180 | naf_obj = naf.add_linguistic_processors(naf_obj) |
||
181 | buffer = io.BytesIO() |
||
182 | naf_obj.dump(buffer) |
||
183 | output_file.write(buffer.getvalue().decode('utf-8')) |
||
184 | |||
188 |