Conditions | 18 |
Total Lines | 158 |
Lines | 0 |
Ratio | 0 % |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like coalib.bearlib.languages.documentation.extract_documentation_with_docstyle() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | import re |
||
15 | def extract_documentation_with_docstyle(content, docstyle_definition): |
||
16 | """ |
||
17 | Extracts all documentation texts inside the given source-code-string. |
||
18 | |||
19 | :param content: The source-code-string where to extract |
||
20 | documentation from or an iterable with strings |
||
21 | where each string is a single line (including |
||
22 | ending whitespaces like `\\n`). |
||
23 | :param docstyle_definition: The DocstyleDefinition that identifies the |
||
24 | documentation comments. |
||
25 | :return: An iterator returning each documentation text |
||
26 | found in the content. |
||
27 | """ |
||
28 | if isinstance(content, str): |
||
29 | content = content.splitlines(keepends=True) |
||
30 | else: |
||
31 | content = list(content) |
||
32 | |||
33 | # Used to break out of outer loops via exception raise. |
||
34 | class BreakOut(Exception): |
||
35 | pass |
||
36 | |||
37 | # Prepare marker-tuple dict that maps a begin pattern to the corresponding |
||
38 | # marker_set(s). This makes it faster to retrieve a marker-set from a |
||
39 | # begin sequence we initially want to search for in source code. Then |
||
40 | # the possible found documentation match is processed further with the |
||
41 | # rest markers. |
||
42 | begin_sequence_dict = {} |
||
43 | for marker_set in docstyle_definition.markers: |
||
44 | if marker_set[0] not in begin_sequence_dict: |
||
45 | begin_sequence_dict[marker_set[0]] = [marker_set] |
||
46 | else: |
||
47 | begin_sequence_dict[marker_set[0]].append(marker_set) |
||
48 | |||
49 | # Using regexes to perform a variable match is faster than finding each |
||
50 | # substring with `str.find()` choosing the lowest match. |
||
51 | begin_regex = re.compile("|".join( |
||
52 | re.escape(marker_set[0]) |
||
53 | for marker_set in docstyle_definition.markers)) |
||
54 | |||
55 | line = 0 |
||
56 | line_pos = 0 |
||
57 | while line < len(content): |
||
58 | begin_match = begin_regex.search(content[line], line_pos) |
||
59 | |||
60 | if begin_match: |
||
61 | begin_match_line = line |
||
62 | # Prevents infinite loop when the start marker matches but not the |
||
63 | # complete documentation comment. |
||
64 | line_pos = begin_match.end() |
||
65 | |||
66 | # begin_sequence_dict[begin_match.group()] returns the marker set |
||
67 | # the begin sequence from before matched. |
||
68 | for marker_set in begin_sequence_dict[begin_match.group()]: |
||
69 | try: |
||
70 | # If the each-line marker and the end marker do equal, |
||
71 | # search for the each-line marker until it runs out. |
||
72 | if marker_set[1] == marker_set[2]: |
||
73 | docstring = content[line][begin_match.end():] |
||
74 | |||
75 | line2 = line + 1 |
||
76 | stripped_content = content[line2].lstrip() |
||
77 | |||
78 | # Now the each-line marker is no requirement for a |
||
79 | # docstring any more, just extract as long as there are |
||
80 | # no each-line markers any more. |
||
81 | while (stripped_content[:len(marker_set[1])] == |
||
82 | marker_set[1]): |
||
83 | docstring += stripped_content[len(marker_set[1]):] |
||
84 | |||
85 | line2 += 1 |
||
86 | if line2 >= len(content): |
||
87 | # End of content reached, done with |
||
88 | # doc-extraction. |
||
89 | break |
||
90 | |||
91 | stripped_content = content[line2].lstrip() |
||
92 | |||
93 | line = line2 - 1 |
||
94 | line_pos = len(content[line]) |
||
95 | else: |
||
96 | end_marker_pos = content[line].find(marker_set[2], |
||
97 | begin_match.end()) |
||
98 | |||
99 | if end_marker_pos == -1: |
||
100 | docstring = content[line][begin_match.end():] |
||
101 | |||
102 | line2 = line + 1 |
||
103 | if line2 >= len(content): |
||
104 | continue |
||
105 | |||
106 | end_marker_pos = content[line2].find(marker_set[2]) |
||
107 | |||
108 | while end_marker_pos == -1: |
||
109 | if marker_set[1] == "": |
||
110 | # When no each-line marker is set (i.e. for |
||
111 | # Python docstrings), then align the |
||
112 | # comment to the start-marker. |
||
113 | stripped_content = ( |
||
114 | content[line2][begin_match.start():]) |
||
115 | else: |
||
116 | # Check whether we violate the each-line |
||
117 | # marker "rule". |
||
118 | current_each_line_marker = (content[line2] |
||
119 | [begin_match.start(): |
||
120 | begin_match.start() |
||
121 | + len(marker_set[1])]) |
||
122 | if (current_each_line_marker != |
||
123 | marker_set[1]): |
||
124 | # Effectively a 'continue' for the |
||
125 | # outer for-loop. |
||
126 | raise BreakOut |
||
127 | |||
128 | stripped_content = ( |
||
129 | content[line2][begin_match.start() |
||
130 | + len(marker_set[1]):]) |
||
131 | |||
132 | # TODO Test also other C style doccomments |
||
133 | |||
134 | docstring += stripped_content |
||
135 | line2 += 1 |
||
136 | |||
137 | if line2 >= len(content): |
||
138 | # End of content reached, so there's no |
||
139 | # closing marker and that's a mismatch. |
||
140 | raise BreakOut |
||
141 | |||
142 | end_marker_pos = content[line2].find( |
||
143 | marker_set[2]) |
||
144 | |||
145 | docstring += (content[line2] |
||
146 | [begin_match.start():end_marker_pos]) |
||
147 | line = line2 |
||
148 | else: |
||
149 | docstring = (content[line] |
||
150 | [begin_match.end():end_marker_pos]) |
||
151 | |||
152 | line_pos = end_marker_pos + len(marker_set[2]) |
||
153 | |||
154 | rng = TextRange.from_values(begin_match_line + 1, |
||
155 | begin_match.start() + 1, |
||
156 | line + 1, |
||
157 | line_pos + 1) |
||
158 | |||
159 | yield DocumentationComment(docstring, |
||
160 | docstyle_definition, |
||
161 | marker_set, |
||
162 | rng) |
||
163 | |||
164 | break |
||
165 | |||
166 | except BreakOut: |
||
167 | # Continues the marker_set loop. |
||
168 | pass |
||
169 | |||
170 | else: |
||
171 | line += 1 |
||
172 | line_pos = 0 |
||
173 | |||
202 |