Failed Conditions
Pull Request — master (#1139)
by Mischa
01:50
created

coalib.bearlib.languages.documentation.extract_documentation()   B

Complexity

Conditions 1

Size

Total Lines 30

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 30
rs 8.8571
1
import re
2
3
from coalib.bearlib.languages.documentation.DocstyleDefinition import (
4
    DocstyleDefinition)
5
from coalib.bearlib.languages.documentation.DocumentationComment import (
6
    DocumentationComment)
7
from coalib.results.TextRange import TextRange
8
9
10
def _extract_doccomment_simple(content, line, column, markers):
11
    """
12
    Extract a documentation that starts at given beginning with simple layout.
13
14
    The property of the simple layout is that there's no each-line marker. This
15
    applies e.g. for python docstrings.
16
17
    :param content: Presplitted lines of the source-code-string.
18
    :param line:    Line where the documentation comment starts (behind the
19
                    start marker). Zero-based.
20
    :param column:  Column where the documentation comment starts (behind the
21
                    start marker). Zero-based.
22
    :param markers: The documentation identifying markers.
23
    :return:        If the comment matched layout a triple with end-of-comment
24
                    line, column and the extracted documentation. If not
25
                    matched, returns None.
26
    """
27
    align_column = column - len(markers[0])
28
29
    pos = content[line].find(markers[2], column)
30
    if pos == -1:
31
        doccomment = content[line][column:]
32
        line += 1
33
34
        while line < len(content):
35
            pos = content[line].find(markers[2])
36
            if pos == -1:
37
                doccomment += content[line][align_column:]
38
            else:
39
                doccomment += content[line][align_column:pos]
40
                return line, pos + len(markers[2]), doccomment
41
42
            line += 1
43
44
        return None
45
    else:
46
        return line, pos + len(markers[2]), content[line][column:pos]
47
48
49
def _extract_doccomment_continuous(content, line, column, markers):
50
    """
51
    Extract a documentation that starts at given beginning with continuous
52
    layout.
53
54
    The property of the continuous layout is that the each-line-marker and the
55
    end-marker do equal. Documentation is extracted until no further marker is
56
    found. Applies e.g. for doxygen style python documentation:
57
58
    ```
59
    ## main
60
    #
61
    #  detailed
62
    ```
63
64
    :param content: Presplitted lines of the source-code-string.
65
    :param line:    Line where the documentation comment starts (behind the
66
                    start marker). Zero-based.
67
    :param column:  Column where the documentation comment starts (behind the
68
                    start marker). Zero-based.
69
    :param markers: The documentation identifying markers.
70
    :return:        If the comment matched layout a triple with end-of-comment
71
                    line, column and the extracted documentation. If not
72
                    matched, returns None.
73
    """
74
    marker_len = len(markers[1])
75
76
    doccomment = content[line][column:]
77
    line += 1
78
    while line < len(content):
79
        pos = content[line].find(markers[1])
80
        if pos == -1:
81
            return line, 0, doccomment
82
        else:
83
            doccomment += content[line][pos+marker_len:]
84
85
        line += 1
86
87
    if content[line - 1][-1] == "\n":
88
        column = 0
89
    else:
90
        # This case can appear on end-of-document without a `\n`.
91
        line -= 1
92
        column = len(content[line])
93
94
    return line, column, doccomment
95
96
97
def _extract_doccomment_standard(content, line, column, markers):
98
    """
99
    Extract a documentation that starts at given beginning with standard
100
    layout.
101
102
    The standard layout applies e.g. for C doxygen-style documentation:
103
104
    ```
105
    /**
106
     * documentation
107
     */
108
    ```
109
110
    :param content: Presplitted lines of the source-code-string.
111
    :param line:    Line where the documentation comment starts (behind the
112
                    start marker). Zero-based.
113
    :param column:  Column where the documentation comment starts (behind the
114
                    start marker). Zero-based.
115
    :param markers: The documentation identifying markers.
116
    :return:        If the comment matched layout a triple with end-of-comment
117
                    line, column and the extracted documentation. If not
118
                    matched, returns None.
119
    """
120
    pos = content[line].find(markers[2], column)
121
    if pos != -1:
122
        return line, pos + len(markers[2]), content[line][column:pos]
123
    else:
124
        doccomment = content[line][column:]
125
        line += 1
126
127
        while line < len(content):
128
            pos = content[line].find(markers[2])
129
            each_line_pos = content[line].find(markers[1])
130
131
            if pos == -1:
132
                if each_line_pos == -1:
133
                    # If the first text occurrence is not the each-line marker
134
                    # now we violate the doc-comment layout.
135
                    return None
136
                doccomment += content[line][each_line_pos+len(markers[1]):]
137
            else:
138
                # If no each-line marker found or it's located past the end
139
                # marker: extract no further and end the doc-comment.
140
                if each_line_pos != -1 and each_line_pos + 1 < pos:
141
                    doccomment += (
142
                        content[line][each_line_pos+len(markers[1]):pos])
143
144
                return line, pos + len(markers[2]), doccomment
145
146
            line += 1
147
148
        return None
149
150
151
def _extract_doccomment(content, line, column, markers):
152
    """
153
    Delegates depending on the given markers to the right extraction method.
154
155
    :param content: Presplitted lines of the source-code-string.
156
    :param line:    Line where the documentation comment starts (behind the
157
                    start marker). Zero-based.
158
    :param column:  Column where the documentation comment starts (behind the
159
                    start marker). Zero-based.
160
    :param markers: The documentation identifying markers.
161
    :return:        If the comment matched layout a triple with end-of-comment
162
                    line, column and the extracted documentation. If not
163
                    matched, returns None.
164
    """
165
    if markers[1] == "":
166
        # Extract and align to start marker.
167
        return _extract_doccomment_simple(content, line, column, markers)
168
    elif markers[1] == markers[2]:
169
        # Search for the each-line marker until it runs out.
170
        return _extract_doccomment_continuous(content, line, column, markers)
171
    else:
172
        return _extract_doccomment_standard(content, line, column, markers)
173
174
175
def _compile_multi_match_regex(strings):
176
    """
177
    Compiles a regex object that matches each of the given strings.
178
179
    :param strings: The strings to match.
180
    :return:        A regex object.
181
    """
182
    return re.compile("|".join(re.escape(s) for s in strings))
183
184
185
def _extract_doccomment_from_line(content, line, column, regex, marker_dict):
186
    begin_match = regex.search(content[line], column)
187
    if begin_match:
188
        column = begin_match.end()
189
        for marker in marker_dict[begin_match.group()]:
190
            doccomment = _extract_doccomment(content, line, column, marker)
191
            if doccomment is not None:
192
                end_line, end_column, documentation = doccomment
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 44.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 135.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 148.
Loading history...
193
194
                rng = TextRange.from_values(line + 1,
195
                                            begin_match.start() + 1,
196
                                            end_line + 1,
197
                                            end_column + 1)
198
                doc = DocumentationComment(documentation, marker, rng)
199
200
                return end_line, end_column, doc
201
202
    return line + 1, 0, None
203
204
205
def extract_documentation_with_markers(content, markers):
206
    """
207
    Extracts all documentation texts inside the given source-code-string.
208
209
    :param content: The source-code-string where to extract documentation from.
210
                    Needs to be a list or tuple where each string item is a
211
                    single line (including ending whitespaces like `\\n`).
212
    :param markers: The list/tuple of marker-sets that identify a
213
                    documentation-comment. Low-index markers have higher
214
                    priority than high-index markers.
215
    :return:        An iterator returning each DocumentationComment found in
216
                    the content.
217
    """
218
    # Prepare marker-tuple dict that maps a begin pattern to the corresponding
219
    # marker_set(s). This makes it faster to retrieve a marker-set from a
220
    # begin sequence we initially want to search for in source code. Then
221
    # the possible found documentation match is processed further with the
222
    # rest markers.
223
    marker_dict = {}
224
    for marker_set in markers:
225
        if marker_set[0] not in marker_dict:
226
            marker_dict[marker_set[0]] = [marker_set]
227
        else:
228
            marker_dict[marker_set[0]].append(marker_set)
229
230
    # Using regexes to perform a variable match is faster than finding each
231
    # substring with `str.find()` choosing the lowest match.
232
    begin_regex = _compile_multi_match_regex(
233
        marker_set[0] for marker_set in markers)
234
235
    line = 0
236
    column = 0
237
    while line < len(content):
238
        line, column, doc = _extract_doccomment_from_line(content,
239
                                                          line,
240
                                                          column,
241
                                                          begin_regex,
242
                                                          marker_dict)
243
        if doc:
244
            yield doc
245
246
247
def extract_documentation(content, language, docstyle):
248
    """
249
    Extracts all documentation texts inside the given source-code-string using
250
    the coala docstyle definition files.
251
252
    The documentation texts are sorted by their order appearing in `content`.
253
254
    For more information about how documentation comments are identified and
255
    extracted, see DocstyleDefinition.doctypes enumeration.
256
257
    :param content:            The source-code-string where to extract
258
                               documentation from. Needs to be a list or tuple
259
                               where each string item is a single line
260
                               (including ending whitespaces like `\\n`).
261
    :param language:           The programming language used.
262
    :param docstyle:           The documentation style/tool used
263
                               (e.g. doxygen).
264
    :raises FileNotFoundError: Raised when the docstyle definition file was not
265
                               found. This is a compatability exception from
266
                               `coalib.misc.Compatability` module.
267
    :raises KeyError:          Raised when the given language is not defined in
268
                               given docstyle.
269
    :raises ValueError:        Raised when a docstyle definition setting has an
270
                               invalid format.
271
    :return:                   An iterator returning each DocumentationComment
272
                               found in the content.
273
    """
274
    docstyle_definition = DocstyleDefinition.load(language, docstyle)
275
    return extract_documentation_with_markers(content,
276
                                              docstyle_definition.markers)
277