1
|
|
|
import re |
2
|
|
|
|
3
|
|
|
from coalib.bearlib.languages.documentation.DocstyleDefinition import ( |
4
|
|
|
DocstyleDefinition) |
5
|
|
|
from coalib.bearlib.languages.documentation.DocumentationComment import ( |
6
|
|
|
DocumentationComment) |
7
|
|
|
from coalib.results.TextRange import TextRange |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
#TODO - Implement Match also for `split` and `search_for`? File an issue |
11
|
|
|
#TODO - document currently existing docstyles from coala side? |
12
|
|
|
#TODO - Add ''' ''' as markers for python 2/3 inside doc-definition files. |
13
|
|
|
|
14
|
|
|
|
15
|
|
|
def extract_documentation_with_docstyle(content, docstyle_definition): |
16
|
|
|
""" |
17
|
|
|
Extracts all documentation texts inside the given source-code-string. |
18
|
|
|
|
19
|
|
|
:param content: The source-code-string where to extract |
20
|
|
|
documentation from or an iterable with strings |
21
|
|
|
where each string is a single line (including |
22
|
|
|
ending whitespaces like `\\n`). |
23
|
|
|
:param docstyle_definition: The DocstyleDefinition that identifies the |
24
|
|
|
documentation comments. |
25
|
|
|
:return: An iterator returning each documentation text |
26
|
|
|
found in the content. |
27
|
|
|
""" |
28
|
|
|
if isinstance(content, str): |
29
|
|
|
content = content.splitlines(keepends=True) |
30
|
|
|
else: |
31
|
|
|
content = list(content) |
32
|
|
|
|
33
|
|
|
# Used to break out of outer loops via exception raise. |
34
|
|
|
class BreakOut(Exception): |
35
|
|
|
pass |
36
|
|
|
|
37
|
|
|
# Prepare marker-tuple dict that maps a begin pattern to the corresponding |
38
|
|
|
# marker_set(s). This makes it faster to retrieve a marker-set from a |
39
|
|
|
# begin sequence we initially want to search for in source code. Then |
40
|
|
|
# the possible found documentation match is processed further with the |
41
|
|
|
# rest markers. |
42
|
|
|
begin_sequence_dict = {} |
43
|
|
|
for marker_set in docstyle_definition.markers: |
44
|
|
|
if marker_set[0] not in begin_sequence_dict: |
45
|
|
|
begin_sequence_dict[marker_set[0]] = [marker_set] |
46
|
|
|
else: |
47
|
|
|
begin_sequence_dict[marker_set[0]].append(marker_set) |
48
|
|
|
|
49
|
|
|
# Using regexes to perform a variable match is faster than finding each |
50
|
|
|
# substring with `str.find()` choosing the lowest match. |
51
|
|
|
begin_regex = re.compile("|".join( |
52
|
|
|
re.escape(marker_set[0]) |
53
|
|
|
for marker_set in docstyle_definition.markers)) |
54
|
|
|
|
55
|
|
|
line = 0 |
56
|
|
|
line_pos = 0 |
57
|
|
|
while line < len(content): |
58
|
|
|
begin_match = begin_regex.search(content[line], line_pos) |
59
|
|
|
|
60
|
|
|
if begin_match: |
61
|
|
|
begin_match_line = line |
62
|
|
|
# Prevents infinite loop when the start marker matches but not the |
63
|
|
|
# complete documentation comment. |
64
|
|
|
line_pos = begin_match.end() |
65
|
|
|
|
66
|
|
|
# begin_sequence_dict[begin_match.group()] returns the marker set |
67
|
|
|
# the begin sequence from before matched. |
68
|
|
|
for marker_set in begin_sequence_dict[begin_match.group()]: |
69
|
|
|
try: |
70
|
|
|
# If the each-line marker and the end marker do equal, |
71
|
|
|
# search for the each-line marker until it runs out. |
72
|
|
|
if marker_set[1] == marker_set[2]: |
73
|
|
|
docstring = content[line][begin_match.end():] |
74
|
|
|
|
75
|
|
|
line2 = line + 1 |
76
|
|
|
stripped_content = content[line2].lstrip() |
77
|
|
|
|
78
|
|
|
# Now the each-line marker is no requirement for a |
79
|
|
|
# docstring any more, just extract as long as there are |
80
|
|
|
# no each-line markers any more. |
81
|
|
|
while (stripped_content[:len(marker_set[1])] == |
82
|
|
|
marker_set[1]): |
83
|
|
|
docstring += stripped_content[len(marker_set[1]):] |
84
|
|
|
|
85
|
|
|
line2 += 1 |
86
|
|
|
if line2 >= len(content): |
87
|
|
|
# End of content reached, done with |
88
|
|
|
# doc-extraction. |
89
|
|
|
break |
90
|
|
|
|
91
|
|
|
stripped_content = content[line2].lstrip() |
92
|
|
|
|
93
|
|
|
line = line2 - 1 |
94
|
|
|
line_pos = len(content[line]) |
95
|
|
|
else: |
96
|
|
|
end_marker_pos = content[line].find(marker_set[2], |
97
|
|
|
begin_match.end()) |
98
|
|
|
|
99
|
|
|
if end_marker_pos == -1: |
100
|
|
|
docstring = content[line][begin_match.end():] |
101
|
|
|
|
102
|
|
|
line2 = line + 1 |
103
|
|
|
if line2 >= len(content): |
104
|
|
|
continue |
105
|
|
|
|
106
|
|
|
end_marker_pos = content[line2].find(marker_set[2]) |
107
|
|
|
|
108
|
|
|
while end_marker_pos == -1: |
109
|
|
|
if marker_set[1] == "": |
110
|
|
|
# When no each-line marker is set (i.e. for |
111
|
|
|
# Python docstrings), then align the |
112
|
|
|
# comment to the start-marker. |
113
|
|
|
stripped_content = ( |
114
|
|
|
content[line2][begin_match.start():]) |
115
|
|
|
else: |
116
|
|
|
# Check whether we violate the each-line |
117
|
|
|
# marker "rule". |
118
|
|
|
current_each_line_marker = (content[line2] |
119
|
|
|
[begin_match.start(): |
120
|
|
|
begin_match.start() |
121
|
|
|
+ len(marker_set[1])]) |
122
|
|
|
if (current_each_line_marker != |
123
|
|
|
marker_set[1]): |
124
|
|
|
# Effectively a 'continue' for the |
125
|
|
|
# outer for-loop. |
126
|
|
|
raise BreakOut |
127
|
|
|
|
128
|
|
|
stripped_content = ( |
129
|
|
|
content[line2][begin_match.start() |
130
|
|
|
+ len(marker_set[1]):]) |
131
|
|
|
|
132
|
|
|
# TODO Test also other C style doccomments |
133
|
|
|
|
134
|
|
|
docstring += stripped_content |
135
|
|
|
line2 += 1 |
136
|
|
|
|
137
|
|
|
if line2 >= len(content): |
138
|
|
|
# End of content reached, so there's no |
139
|
|
|
# closing marker and that's a mismatch. |
140
|
|
|
raise BreakOut |
141
|
|
|
|
142
|
|
|
end_marker_pos = content[line2].find( |
143
|
|
|
marker_set[2]) |
144
|
|
|
|
145
|
|
|
docstring += (content[line2] |
146
|
|
|
[begin_match.start():end_marker_pos]) |
147
|
|
|
line = line2 |
148
|
|
|
else: |
149
|
|
|
docstring = (content[line] |
150
|
|
|
[begin_match.end():end_marker_pos]) |
151
|
|
|
|
152
|
|
|
line_pos = end_marker_pos + len(marker_set[2]) |
153
|
|
|
|
154
|
|
|
rng = TextRange.from_values(begin_match_line + 1, |
155
|
|
|
begin_match.start() + 1, |
156
|
|
|
line + 1, |
157
|
|
|
line_pos + 1) |
158
|
|
|
|
159
|
|
|
yield DocumentationComment(docstring, |
160
|
|
|
docstyle_definition, |
161
|
|
|
marker_set, |
162
|
|
|
rng) |
163
|
|
|
|
164
|
|
|
break |
165
|
|
|
|
166
|
|
|
except BreakOut: |
167
|
|
|
# Continues the marker_set loop. |
168
|
|
|
pass |
169
|
|
|
|
170
|
|
|
else: |
171
|
|
|
line += 1 |
172
|
|
|
line_pos = 0 |
173
|
|
|
|
174
|
|
|
|
175
|
|
|
def extract_documentation(content, language, docstyle): |
176
|
|
|
""" |
177
|
|
|
Extracts all documentation texts inside the given source-code-string using |
178
|
|
|
the coala docstyle definition files. |
179
|
|
|
|
180
|
|
|
The documentation texts are sorted by their order appearing in `content`. |
181
|
|
|
|
182
|
|
|
For more information about how documentation comments are identified and |
183
|
|
|
extracted, see DocstyleDefinition.doctypes enumeration. |
184
|
|
|
|
185
|
|
|
:param content: The source-code-string where to extract |
186
|
|
|
documentation from. |
187
|
|
|
:param language: The programming language used. |
188
|
|
|
:param docstyle: The documentation style/tool used |
189
|
|
|
(i.e. doxygen). |
190
|
|
|
:raises FileNotFoundError: Raised when the docstyle definition file was not |
191
|
|
|
found. This is a compatability exception from |
192
|
|
|
`coalib.misc.Compatability` module. |
193
|
|
|
:raises KeyError: Raised when the given language is not defined in |
194
|
|
|
given docstyle. |
195
|
|
|
:raises ValueError: Raised when a docstyle definition setting has an |
196
|
|
|
invalid format. |
197
|
|
|
:return: An iterator returning each DocumentationComment |
198
|
|
|
found in the content. |
199
|
|
|
""" |
200
|
|
|
docstyle_definition = DocstyleDefinition.load(language, docstyle) |
201
|
|
|
return extract_documentation_with_docstyle(content, docstyle_definition) |
202
|
|
|
|