Failed Conditions
Pull Request — master (#1139)
by Mischa
01:51
created

coalib.bearlib.languages.documentation.extract_documentation_with_markers()   B

Complexity

Conditions 6

Size

Total Lines 40

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 6
dl 0
loc 40
rs 7.5385
1
import re
2
3
from coalib.bearlib.languages.documentation.DocstyleDefinition import (
4
    DocstyleDefinition)
5
from coalib.bearlib.languages.documentation.DocumentationComment import (
6
    DocumentationComment)
7
from coalib.results.TextRange import TextRange
8
9
10
def _extract_doc_comment_simple(content, line, column, markers):
11
    """
12
    Extract a documentation that starts at given beginning with simple layout.
13
14
    The property of the simple layout is that there's no each-line marker. This
15
    applies e.g. for python docstrings.
16
17
    :param content: Presplitted lines of the source-code-string.
18
    :param line:    Line where the documentation comment starts (behind the
19
                    start marker). Zero-based.
20
    :param column:  Column where the documentation comment starts (behind the
21
                    start marker). Zero-based.
22
    :param markers: The documentation identifying markers.
23
    :return:        If the comment matched layout a triple with end-of-comment
24
                    line, column and the extracted documentation. If not
25
                    matched, returns None.
26
    """
27
    align_column = column - len(markers[0])
28
29
    pos = content[line].find(markers[2], column)
30
    if pos != -1:
31
        return line, pos + len(markers[2]), content[line][column:pos]
32
33
    doc_comment = content[line][column:]
34
    line += 1
35
36
    while line < len(content):
37
        pos = content[line].find(markers[2])
38
        if pos == -1:
39
            doc_comment += content[line][align_column:]
40
        else:
41
            doc_comment += content[line][align_column:pos]
42
            return line, pos + len(markers[2]), doc_comment
43
44
        line += 1
45
46
    return None
47
48
49
def _extract_doc_comment_continuous(content, line, column, markers):
50
    """
51
    Extract a documentation that starts at given beginning with continuous
52
    layout.
53
54
    The property of the continuous layout is that the each-line-marker and the
55
    end-marker do equal. Documentation is extracted until no further marker is
56
    found. Applies e.g. for doxygen style python documentation:
57
58
    ```
59
    ## main
60
    #
61
    #  detailed
62
    ```
63
64
    :param content: Presplitted lines of the source-code-string.
65
    :param line:    Line where the documentation comment starts (behind the
66
                    start marker). Zero-based.
67
    :param column:  Column where the documentation comment starts (behind the
68
                    start marker). Zero-based.
69
    :param markers: The documentation identifying markers.
70
    :return:        If the comment matched layout a triple with end-of-comment
71
                    line, column and the extracted documentation. If not
72
                    matched, returns None.
73
    """
74
    marker_len = len(markers[1])
75
76
    doc_comment = content[line][column:]
77
    line += 1
78
    while line < len(content):
79
        pos = content[line].find(markers[1])
80
        if pos == -1:
81
            return line, 0, doc_comment
82
        else:
83
            doc_comment += content[line][pos+marker_len:]
84
85
        line += 1
86
87
    if content[line - 1][-1] == "\n":
88
        column = 0
89
    else:
90
        # This case can appear on end-of-document without a `\n`.
91
        line -= 1
92
        column = len(content[line])
93
94
    return line, column, doc_comment
95
96
97
def _extract_doc_comment_standard(content, line, column, markers):
98
    """
99
    Extract a documentation that starts at given beginning with standard
100
    layout.
101
102
    The standard layout applies e.g. for C doxygen-style documentation:
103
104
    ```
105
    /**
106
     * documentation
107
     */
108
    ```
109
110
    :param content: Presplitted lines of the source-code-string.
111
    :param line:    Line where the documentation comment starts (behind the
112
                    start marker). Zero-based.
113
    :param column:  Column where the documentation comment starts (behind the
114
                    start marker). Zero-based.
115
    :param markers: The documentation identifying markers.
116
    :return:        If the comment matched layout a triple with end-of-comment
117
                    line, column and the extracted documentation. If not
118
                    matched, returns None.
119
    """
120
    pos = content[line].find(markers[2], column)
121
    if pos != -1:
122
        return line, pos + len(markers[2]), content[line][column:pos]
123
124
    doc_comment = content[line][column:]
125
    line += 1
126
127
    while line < len(content):
128
        pos = content[line].find(markers[2])
129
        each_line_pos = content[line].find(markers[1])
130
131
        if pos == -1:
132
            if each_line_pos == -1:
133
                # If the first text occurrence is not the each-line marker
134
                # now we violate the doc-comment layout.
135
                return None
136
            doc_comment += content[line][each_line_pos+len(markers[1]):]
137
        else:
138
            # If no each-line marker found or it's located past the end marker:
139
            # extract no further and end the doc-comment.
140
            if each_line_pos != -1 and each_line_pos + 1 < pos:
141
                doc_comment += content[line][each_line_pos+len(markers[1]):pos]
142
143
            return line, pos + len(markers[2]), doc_comment
144
145
        line += 1
146
147
    return None
148
149
150
def _extract_doc_comment(content, line, column, markers):
151
    """
152
    Delegates depending on the given markers to the right extraction method.
153
154
    :param content: Presplitted lines of the source-code-string.
155
    :param line:    Line where the documentation comment starts (behind the
156
                    start marker). Zero-based.
157
    :param column:  Column where the documentation comment starts (behind the
158
                    start marker). Zero-based.
159
    :param markers: The documentation identifying markers.
160
    :return:        If the comment matched layout a triple with end-of-comment
161
                    line, column and the extracted documentation. If not
162
                    matched, returns None.
163
    """
164
    if markers[1] == "":
165
        # Extract and align to start marker.
166
        return _extract_doc_comment_simple(content, line, column, markers)
167
    elif markers[1] == markers[2]:
168
        # Search for the each-line marker until it runs out.
169
        return _extract_doc_comment_continuous(content, line, column, markers)
170
    else:
171
        return _extract_doc_comment_standard(content, line, column, markers)
172
173
174
def _compile_multi_match_regex(strings):
175
    """
176
    Compiles a regex object that matches each of the given strings.
177
178
    :param strings: The strings to match.
179
    :return:        A regex object.
180
    """
181
    return re.compile("|".join(re.escape(s) for s in strings))
182
183
184
def _extract_doc_comment_from_line(content, line, column, regex, marker_dict):
185
    begin_match = regex.search(content[line], column)
186
    if begin_match:
187
        column = begin_match.end()
188
        for marker in marker_dict[begin_match.group()]:
189
            doc_comment = _extract_doc_comment(content, line, column, marker)
190
            if doc_comment is not None:
191
                end_line, end_column, documentation = doc_comment
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 135.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 46.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 147.
Loading history...
192
193
                rng = TextRange.from_values(line + 1,
194
                                            begin_match.start() + 1,
195
                                            end_line + 1,
196
                                            end_column + 1)
197
                doc = DocumentationComment(documentation, marker, rng)
198
199
                return end_line, end_column, doc
200
201
    return line + 1, 0, None
202
203
204
def extract_documentation_with_markers(content, markers):
205
    """
206
    Extracts all documentation texts inside the given source-code-string.
207
208
    :param content: The source-code-string where to extract documentation from.
209
                    Needs to be a list or tuple where each string item is a
210
                    single line (including ending whitespaces like `\\n`).
211
    :param markers: The list/tuple of marker-sets that identify a
212
                    documentation-comment. Low-index markers have higher
213
                    priority than high-index markers.
214
    :return:        An iterator returning each DocumentationComment found in
215
                    the content.
216
    """
217
    # Prepare marker-tuple dict that maps a begin pattern to the corresponding
218
    # marker_set(s). This makes it faster to retrieve a marker-set from a
219
    # begin sequence we initially want to search for in source code. Then
220
    # the possible found documentation match is processed further with the
221
    # rest markers.
222
    marker_dict = {}
223
    for marker_set in markers:
224
        if marker_set[0] not in marker_dict:
225
            marker_dict[marker_set[0]] = [marker_set]
226
        else:
227
            marker_dict[marker_set[0]].append(marker_set)
228
229
    # Using regexes to perform a variable match is faster than finding each
230
    # substring with `str.find()` choosing the lowest match.
231
    begin_regex = _compile_multi_match_regex(
232
        marker_set[0] for marker_set in markers)
233
234
    line = 0
235
    column = 0
236
    while line < len(content):
237
        line, column, doc = _extract_doc_comment_from_line(content,
238
                                                           line,
239
                                                           column,
240
                                                           begin_regex,
241
                                                           marker_dict)
242
        if doc:
243
            yield doc
244
245
246
def extract_documentation(content, language, docstyle):
247
    """
248
    Extracts all documentation texts inside the given source-code-string using
249
    the coala docstyle definition files.
250
251
    The documentation texts are sorted by their order appearing in `content`.
252
253
    For more information about how documentation comments are identified and
254
    extracted, see DocstyleDefinition.doctypes enumeration.
255
256
    :param content:            The source-code-string where to extract
257
                               documentation from. Needs to be a list or tuple
258
                               where each string item is a single line
259
                               (including ending whitespaces like `\\n`).
260
    :param language:           The programming language used.
261
    :param docstyle:           The documentation style/tool used
262
                               (e.g. doxygen).
263
    :raises FileNotFoundError: Raised when the docstyle definition file was not
264
                               found. This is a compatability exception from
265
                               `coalib.misc.Compatability` module.
266
    :raises KeyError:          Raised when the given language is not defined in
267
                               given docstyle.
268
    :raises ValueError:        Raised when a docstyle definition setting has an
269
                               invalid format.
270
    :return:                   An iterator returning each DocumentationComment
271
                               found in the content.
272
    """
273
    docstyle_definition = DocstyleDefinition.load(language, docstyle)
274
    return extract_documentation_with_markers(content,
275
                                              docstyle_definition.markers)
276