Failed Conditions
Pull Request — master (#1139)
by Mischa
01:46
created

coalib.bearlib.languages.documentation._extract_doccomment_continuous()   B

Complexity

Conditions 4

Size

Total Lines 46

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 4
dl 0
loc 46
rs 8.6316
1
import re
2
3
from coalib.bearlib.languages.documentation.DocstyleDefinition import (
4
    DocstyleDefinition)
5
from coalib.bearlib.languages.documentation.DocumentationComment import (
6
    DocumentationComment)
7
from coalib.results.TextPosition import TextPosition
8
from coalib.results.TextRange import TextRange
9
10
11
def _extract_doccomment_simple(content, line, column, markers):
12
    """
13
    Extract a documentation that starts at given beginning with simple layout.
14
15
    The property of the simple layout is that there's no each-line marker. This
16
    applies e.g. for python docstrings.
17
18
    :param content: Presplitted lines of the source-code-string.
19
    :param line:    Line where the documentation comment starts (behind the
20
                    start marker). Zero-based.
21
    :param column:  Column where the documentation comment starts (behind the
22
                    start marker). Zero-based.
23
    :param markers: The documentation identifying markers.
24
    :return:        If the comment matched layout a triple with end-of-comment
25
                    line, column and the extracted documentation. If not
26
                    matched, returns None.
27
    """
28
    align_column = column - len(markers[0])
29
30
    pos = content[line].find(markers[2], column)
31
    if pos == -1:
32
        doccomment = content[line][column:]
33
        line += 1
34
35
        while line < len(content):
36
            pos = content[line].find(markers[2])
37
            if pos == -1:
38
                doccomment += content[line][align_column:]
39
            else:
40
                doccomment += content[line][align_column:pos]
41
                return line, pos + len(markers[2]), doccomment
42
43
            line += 1
44
45
        return None
46
    else:
47
        return line, pos + len(markers[2]), content[line][column:pos]
48
49
50
def _extract_doccomment_continuous(content, line, column, markers):
51
    """
52
    Extract a documentation that starts at given beginning with continuous
53
    layout.
54
55
    The property of the continuous layout is that the each-line-marker and the
56
    end-marker do equal. Documentation is extracted until no further marker is
57
    found. Applies e.g. for doxygen style python documentation:
58
59
    ```
60
    ## main
61
    #
62
    #  detailed
63
    ```
64
65
    :param content: Presplitted lines of the source-code-string.
66
    :param line:    Line where the documentation comment starts (behind the
67
                    start marker). Zero-based.
68
    :param column:  Column where the documentation comment starts (behind the
69
                    start marker). Zero-based.
70
    :param markers: The documentation identifying markers.
71
    :return:        If the comment matched layout a triple with end-of-comment
72
                    line, column and the extracted documentation. If not
73
                    matched, returns None.
74
    """
75
    marker_len = len(markers[1])
76
77
    doccomment = content[line][column:]
78
    line += 1
79
    while line < len(content):
80
        pos = content[line].find(markers[1])
81
        if pos == -1:
82
            return line, 0, doccomment
83
        else:
84
            doccomment += content[line][pos+marker_len:]
85
86
        line += 1
87
88
    if content[line - 1][-1] == "\n":
89
        column = 0
90
    else:
91
        # This case can appear on end-of-document without a `\n`.
92
        line -= 1
93
        column = len(content[line])
94
95
    return line, column, doccomment
96
97
98
def _extract_doccomment_standard(content, line, column, markers):
99
    """
100
    Extract a documentation that starts at given beginning with standard
101
    layout.
102
103
    The standard layout applies e.g. for C doxygen-style documentation:
104
105
    ```
106
    /**
107
     * documentation
108
     */
109
    ```
110
111
    :param content: Presplitted lines of the source-code-string.
112
    :param line:    Line where the documentation comment starts (behind the
113
                    start marker). Zero-based.
114
    :param column:  Column where the documentation comment starts (behind the
115
                    start marker). Zero-based.
116
    :param markers: The documentation identifying markers.
117
    :return:        If the comment matched layout a triple with end-of-comment
118
                    line, column and the extracted documentation. If not
119
                    matched, returns None.
120
    """
121
    pos = content[line].find(markers[2], column)
122
    if pos != -1:
123
        return line, pos + len(markers[2]), content[line][column:pos]
124
    else:
125
        doccomment = content[line][column:]
126
        line += 1
127
128
        while line < len(content):
129
            pos = content[line].find(markers[2])
130
            each_line_pos = content[line].find(markers[1])
131
132
            if pos == -1:
133
                if each_line_pos == -1:
134
                    # If the first text occurrence is not the each-line marker
135
                    # now we violate the doc-comment layout.
136
                    return None
137
                doccomment += content[line][each_line_pos+len(markers[1]):]
138
            else:
139
                # If no each-line marker found or it's located past the end
140
                # marker: extract no further and end the doc-comment.
141
                if each_line_pos != -1 and each_line_pos + 1 < pos:
142
                    doccomment += (
143
                        content[line][each_line_pos+len(markers[1]):pos])
144
145
                return line, pos + len(markers[2]), doccomment
146
147
            line += 1
148
149
        return None
150
151
152
def _extract_doccomment(content, line, column, markers):
153
    """
154
    Delegates depending on the given markers to the right extraction method.
155
156
    :param content: Presplitted lines of the source-code-string.
157
    :param line:    Line where the documentation comment starts (behind the
158
                    start marker). Zero-based.
159
    :param column:  Column where the documentation comment starts (behind the
160
                    start marker). Zero-based.
161
    :param markers: The documentation identifying markers.
162
    :return:        If the comment matched layout a triple with end-of-comment
163
                    line, column and the extracted documentation. If not
164
                    matched, returns None.
165
    """
166
    if markers[1] == "":
167
        # Extract and align to start marker.
168
        return _extract_doccomment_simple(content, line, column, markers)
169
    elif markers[1] == markers[2]:
170
        # Search for the each-line marker until it runs out.
171
        return _extract_doccomment_continuous(content, line, column, markers)
172
    else:
173
        return _extract_doccomment_standard(content, line, column, markers)
174
175
176
def _compile_multi_match_regex(strings):
177
    """
178
    Compiles a regex object that matches each of the given strings.
179
180
    :param strings: The strings to match.
181
    :return:        A regex object.
182
    """
183
    return re.compile("|".join(re.escape(s) for s in strings))
184
185
186
def _extract_doccomment_from_line(content, line, column, regex, marker_dict):
187
    begin_match = regex.search(content[line], column)
188
    if begin_match:
189
        column = begin_match.end()
190
        for marker in marker_dict[begin_match.group()]:
191
            doccomment = _extract_doccomment(content, line, column, marker)
192
            if doccomment is not None:
193
                start_position = TextPosition(line + 1,
194
                                              begin_match.start() + 1)
195
                line, column, documentation = doccomment
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 45.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 136.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 149.
Loading history...
196
                end_position = TextPosition(line + 1, column + 1)
197
198
                doc = DocumentationComment(documentation,
199
                                           marker,
200
                                           TextRange(start_position,
201
                                                     end_position))
202
203
                return line, column, doc
204
205
    return line + 1, 0, None
206
207
208
def extract_documentation_with_markers(content, markers):
209
    """
210
    Extracts all documentation texts inside the given source-code-string.
211
212
    :param content: The source-code-string where to extract documentation from.
213
                    Needs to be a list or tuple where each string item is a
214
                    single line (including ending whitespaces like `\\n`).
215
    :param markers: The list/tuple of marker-sets that identify a
216
                    documentation-comment. Low-index markers have higher
217
                    priority than high-index markers.
218
    :return:        An iterator returning each DocumentationComment found in
219
                    the content.
220
    """
221
    # Prepare marker-tuple dict that maps a begin pattern to the corresponding
222
    # marker_set(s). This makes it faster to retrieve a marker-set from a
223
    # begin sequence we initially want to search for in source code. Then
224
    # the possible found documentation match is processed further with the
225
    # rest markers.
226
    marker_dict = {}
227
    for marker_set in markers:
228
        if marker_set[0] not in marker_dict:
229
            marker_dict[marker_set[0]] = [marker_set]
230
        else:
231
            marker_dict[marker_set[0]].append(marker_set)
232
233
    # Using regexes to perform a variable match is faster than finding each
234
    # substring with `str.find()` choosing the lowest match.
235
    begin_regex = _compile_multi_match_regex(
236
        marker_set[0] for marker_set in markers)
237
238
    line = 0
239
    column = 0
240
    while line < len(content):
241
        line, column, doc = _extract_doccomment_from_line(content,
242
                                                          line,
243
                                                          column,
244
                                                          begin_regex,
245
                                                          marker_dict)
246
        if doc:
247
            yield doc
248
249
250
def extract_documentation(content, language, docstyle):
251
    """
252
    Extracts all documentation texts inside the given source-code-string using
253
    the coala docstyle definition files.
254
255
    The documentation texts are sorted by their order appearing in `content`.
256
257
    For more information about how documentation comments are identified and
258
    extracted, see DocstyleDefinition.doctypes enumeration.
259
260
    :param content:            The source-code-string where to extract
261
                               documentation from. Needs to be a list or tuple
262
                               where each string item is a single line
263
                               (including ending whitespaces like `\\n`).
264
    :param language:           The programming language used.
265
    :param docstyle:           The documentation style/tool used
266
                               (e.g. doxygen).
267
    :raises FileNotFoundError: Raised when the docstyle definition file was not
268
                               found. This is a compatability exception from
269
                               `coalib.misc.Compatability` module.
270
    :raises KeyError:          Raised when the given language is not defined in
271
                               given docstyle.
272
    :raises ValueError:        Raised when a docstyle definition setting has an
273
                               invalid format.
274
    :return:                   An iterator returning each DocumentationComment
275
                               found in the content.
276
    """
277
    docstyle_definition = DocstyleDefinition.load(language, docstyle)
278
    return extract_documentation_with_markers(content,
279
                                              docstyle_definition.markers)
280