| Conditions | 14 |
| Total Lines | 60 |
| Code Lines | 52 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like e2edutch.minimize.handle_line() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | import re |
||
| 116 | def handle_line(line, document_state, labels, stats, word_col): |
||
| 117 | begin_document_match = re.match(conll.BEGIN_DOCUMENT_REGEX, line) |
||
| 118 | if begin_document_match: |
||
| 119 | document_state.assert_empty() |
||
| 120 | document_state.doc_key = conll.get_doc_key( |
||
| 121 | *begin_document_match.groups()) |
||
| 122 | return None |
||
| 123 | elif line.startswith("#end document"): |
||
| 124 | if len(document_state.text) > 0: # no newline before end document |
||
| 125 | stats["max_sent_len"] = max( |
||
| 126 | len(document_state.text), stats["max_sent_len"]) |
||
| 127 | stats["num_sents"] += 1 |
||
| 128 | document_state.sentences.append(tuple(document_state.text)) |
||
| 129 | del document_state.text[:] |
||
| 130 | document_state.assert_finalizable() |
||
| 131 | finalized_state = document_state.finalize() |
||
| 132 | stats["num_clusters"] += len(finalized_state["clusters"]) |
||
| 133 | stats["num_mentions"] += sum(len(c) |
||
| 134 | for c in finalized_state["clusters"]) |
||
| 135 | # labels["const_labels"].update( |
||
| 136 | # l for _, _, l in finalized_state["constituents"]) |
||
| 137 | # labels["ner"].update(l for _, _, l in finalized_state["ner"]) |
||
| 138 | return finalized_state |
||
| 139 | else: |
||
| 140 | row = line.split() |
||
| 141 | if len(row) == 0 and len(document_state.text) > 0: |
||
| 142 | stats["max_sent_len"] = max( |
||
| 143 | len(document_state.text), stats["max_sent_len"]) |
||
| 144 | stats["num_sents"] += 1 |
||
| 145 | document_state.sentences.append(tuple(document_state.text)) |
||
| 146 | del document_state.text[:] |
||
| 147 | return None |
||
| 148 | elif len(row) == 0 and len(document_state.text) == 0: |
||
| 149 | return None |
||
| 150 | assert len(row) >= 4 |
||
| 151 | |||
| 152 | word = normalize_word(row[word_col]) |
||
| 153 | coref = row[-1] |
||
| 154 | |||
| 155 | word_index = (len(document_state.text) |
||
| 156 | + sum(len(s) for s in document_state.sentences)) |
||
| 157 | document_state.text.append(word) |
||
| 158 | |||
| 159 | if coref != "-" and coref != '_': |
||
| 160 | for segment in coref.split("|"): |
||
| 161 | if segment[0] == "(": |
||
| 162 | if segment[-1] == ")": |
||
| 163 | cluster_id = int(segment[1:-1]) |
||
| 164 | document_state.clusters[cluster_id].append( |
||
| 165 | (word_index, word_index)) |
||
| 166 | else: |
||
| 167 | cluster_id = int(segment[1:]) |
||
| 168 | document_state.coref_stacks[cluster_id].append( |
||
| 169 | word_index) |
||
| 170 | elif segment[-1] == ")": |
||
| 171 | cluster_id = int(segment[:-1]) |
||
| 172 | start = document_state.coref_stacks[cluster_id].pop() |
||
| 173 | document_state.clusters[cluster_id].append( |
||
| 174 | (start, word_index)) |
||
| 175 | return None |
||
| 176 | |||
| 226 |