Conditions | 14 |
Total Lines | 86 |
Code Lines | 71 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like e2edutch.predict.main() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | import sys |
||
42 | def main(args=None): |
||
43 | parser = get_parser() |
||
44 | args = parser.parse_args() |
||
45 | if args.verbose: |
||
46 | logging.basicConfig(level=logging.DEBUG) |
||
47 | config = util.initialize_from_env(args.config, args.cfg_file) |
||
48 | |||
49 | # Input file in .jsonlines format or .conll. |
||
50 | input_filename = args.input_filename |
||
51 | |||
52 | ext_input = os.path.splitext(input_filename)[-1] |
||
53 | if ext_input not in ['.conll', '.jsonlines', '.txt', '.naf']: |
||
54 | raise Exception( |
||
55 | 'Input file should be .naf, .conll, .txt or .jsonlines, but is {}.' |
||
56 | .format(ext_input)) |
||
57 | |||
58 | if ext_input == '.conll': |
||
59 | labels = collections.defaultdict(set) |
||
60 | stats = collections.defaultdict(int) |
||
61 | docs = minimize.minimize_partition( |
||
62 | input_filename, labels, stats, args.word_col) |
||
63 | elif ext_input == '.jsonlines': |
||
64 | docs = read_jsonlines(input_filename) |
||
65 | elif ext_input == '.naf': |
||
66 | naf_obj = naf.get_naf(input_filename) |
||
67 | jsonlines_obj, term_ids, tok_ids = naf.get_jsonlines(naf_obj) |
||
68 | docs = [jsonlines_obj] |
||
69 | else: |
||
70 | text = open(input_filename).read() |
||
71 | docs = [util.create_example(text)] |
||
72 | |||
73 | output_file = args.output_file |
||
74 | model = cm.CorefModel(config) |
||
75 | sentences = {} |
||
76 | predictions = {} |
||
77 | with tf.Session() as session: |
||
78 | model.restore(session) |
||
79 | for example_num, example in enumerate(docs): |
||
80 | # logging.info(example['doc_key']) |
||
81 | tensorized_example = model.tensorize_example( |
||
82 | example, is_training=False) |
||
83 | feed_dict = {i: t for i, t in zip( |
||
84 | model.input_tensors, tensorized_example)} |
||
85 | _, _, _, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores = session.run( |
||
86 | model.predictions, feed_dict=feed_dict) |
||
87 | predicted_antecedents = model.get_predicted_antecedents( |
||
88 | top_antecedents, top_antecedent_scores) |
||
89 | example["predicted_clusters"], _ = model.get_predicted_clusters( |
||
90 | top_span_starts, top_span_ends, predicted_antecedents) |
||
91 | if args.format_out == 'jsonlines': |
||
92 | output_file.write(json.dumps(example)) |
||
93 | output_file.write("\n") |
||
94 | else: |
||
95 | predictions[example['doc_key']] = example["predicted_clusters"] |
||
96 | sentences[example['doc_key']] = example["sentences"] |
||
97 | if example_num % 100 == 0: |
||
98 | logging.info("Decoded {} examples.".format(example_num + 1)) |
||
99 | if args.format_out == 'conll': |
||
100 | conll.output_conll(output_file, sentences, predictions) |
||
101 | elif args.format_out == 'naf': |
||
102 | # Check number of docs - what to do if multiple? |
||
103 | # Create naf obj if input format was not naf |
||
104 | if ext_input != '.naf': |
||
105 | # To do: add linguistic processing layers for terms and tokens |
||
106 | logging.warn( |
||
107 | 'Outputting NAF when input was not naf,' |
||
108 | + 'no dependency information available') |
||
109 | for doc_key in sentences: |
||
110 | naf_obj, term_ids = naf.get_naf_from_sentences( |
||
111 | sentences[doc_key]) |
||
112 | naf_obj = naf.create_coref_layer( |
||
113 | naf_obj, predictions[doc_key], term_ids) |
||
114 | naf_obj = naf.add_linguistic_processors(naf_obj) |
||
115 | buffer = io.BytesIO() |
||
116 | naf_obj.dump(buffer) |
||
117 | output_file.write(buffer.getvalue().decode('utf-8')) |
||
118 | # To do, make sepearate outputs? |
||
119 | # TO do, use dependency information from conll? |
||
120 | else: |
||
121 | # We only have one input doc |
||
122 | naf_obj = naf.create_coref_layer( |
||
123 | naf_obj, example["predicted_clusters"], term_ids) |
||
124 | naf_obj = naf.add_linguistic_processors(naf_obj) |
||
125 | buffer = io.BytesIO() |
||
126 | naf_obj.dump(buffer) |
||
127 | output_file.write(buffer.getvalue().decode('utf-8')) |
||
128 | |||
132 |