Failed Conditions
Pull Request — master (#1138)
by Mischa
02:04
created

coalib.bearlib.languages.documentation.extract_documentation()   B

Complexity

Conditions 1

Size

Total Lines 30

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 30
rs 8.8571
1
import re
2
3
from coalib.bearlib.languages.documentation.DocstyleDefinition import (
4
    DocstyleDefinition)
5
from coalib.bearlib.languages.documentation.DocumentationComment import (
6
    DocumentationComment)
7
from coalib.results.TextPosition import TextPosition
8
from coalib.results.TextRange import TextRange
9
10
11
def _extract_doccomment_simple(content, line, column, markers):
12
    align_column = column - len(markers[0])
13
14
    pos = content[line].find(markers[2], column)
15
    if pos == -1:
16
        doccomment = content[line][column:]
17
        line += 1
18
19
        while line < len(content):
20
            pos = content[line].find(markers[2])
21
            if pos == -1:
22
                doccomment += content[line][align_column:]
23
            else:
24
                doccomment += content[line][align_column:pos]
25
                return line, pos + len(markers[2]), doccomment
26
27
            line += 1
28
29
        return None
30
    else:
31
        return line, pos + len(markers[2]), content[line][column:pos]
32
33
34
def _extract_doccomment_continuous(content, line, column, markers):
35
    marker_len = len(markers[1])
36
37
    doccomment = content[line][column:]
38
    line += 1
39
    while line < len(content):
40
        pos = content[line].find(markers[1])
41
        if pos == -1:
42
            return line, 0, doccomment
43
        else:
44
            doccomment += content[line][pos+marker_len:]
45
46
        line += 1
47
48
    if content[line - 1][-1] == "\n":
49
        column = 0
50
    else:
51
        # This case can appear on end-of-document without a `\n`.
52
        line -= 1
53
        column = len(content[line])
54
55
    return line, column, doccomment
56
57
58
def _extract_doccomment_standard(content, line, column, markers):
59
    pos = content[line].find(markers[2], column)
60
    if pos == -1:
61
        doccomment = content[line][column:]
62
        line += 1
63
64
        while line < len(content):
65
            pos = content[line].find(markers[2])
66
            each_line_pos = content[line].find(markers[1])
67
68
            if pos == -1:
69
                if each_line_pos == -1:
70
                    # If the first text occurrence is not the each-line marker
71
                    # now we violate the doc-comment layout.
72
                    return None
73
                doccomment += content[line][each_line_pos+len(markers[1]):]
74
            else:
75
                # If no each-line marker found or it's located past the end
76
                # marker: extract no further and end the doc-comment.
77
                if each_line_pos != -1 and each_line_pos + 1 < pos:
78
                    doccomment += (
79
                        content[line][each_line_pos+len(markers[1]):pos])
80
81
                return line, pos + len(markers[2]), doccomment
82
83
            line += 1
84
85
        return None
86
    else:
87
        return line, pos + len(markers[2]), content[line][column:pos]
88
89
90
def _extract_doccomment(content, line, column, markers):
91
    if markers[1] == "":
92
        # Extract and align to start marker.
93
        return _extract_doccomment_simple(content, line, column, markers)
94
    elif markers[1] == markers[2]:
95
        # Search for the each-line marker until it runs out.
96
        return _extract_doccomment_continuous(content, line, column, markers)
97
    else:
98
        return _extract_doccomment_standard(content, line, column, markers)
99
100
101
def _compile_multi_match_regex(strings):
102
    """
103
    Compiles a regex object that matches each of the given strings.
104
105
    :param strings: The strings to match.
106
    :return:        A regex object.
107
    """
108
    return re.compile("|".join(re.escape(s) for s in strings))
109
110
111
def extract_documentation_with_markers(content, markers):
112
    """
113
    Extracts all documentation texts inside the given source-code-string.
114
115
    :param content: The source-code-string where to extract documentation from.
116
                    Needs to be a list or tuple where each string item is a
117
                    single line (including ending whitespaces like `\\n`).
118
    :param markers: The list/tuple of marker-sets that identify a
119
                    documentation-comment. Low-index markers have higher
120
                    priority than high-index markers.
121
    :return:        An iterator returning each DocumentationComment found in
122
                    the content.
123
    """
124
    # Prepare marker-tuple dict that maps a begin pattern to the corresponding
125
    # marker_set(s). This makes it faster to retrieve a marker-set from a
126
    # begin sequence we initially want to search for in source code. Then
127
    # the possible found documentation match is processed further with the
128
    # rest markers.
129
    marker_dict = {}
130
    for marker_set in markers:
131
        if marker_set[0] not in marker_dict:
132
            marker_dict[marker_set[0]] = [marker_set]
133
        else:
134
            marker_dict[marker_set[0]].append(marker_set)
135
136
    # Using regexes to perform a variable match is faster than finding each
137
    # substring with `str.find()` choosing the lowest match.
138
    begin_regex = _compile_multi_match_regex(
139
        marker_set[0] for marker_set in markers)
140
141
    line = 0
142
    column = 0
143
    while line < len(content):
144
        begin_match = begin_regex.search(content[line], column)
145
        if begin_match:
146
            column = begin_match.end()
147
            for marker in marker_dict[begin_match.group()]:
148
                doccomment = _extract_doccomment(content, line, column, marker)
149
                if doccomment is not None:
150
                    start_position = TextPosition(line + 1,
151
                                                  begin_match.start() + 1)
152
                    line, column, doccomment = doccomment
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 29.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 72.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 85.
Loading history...
153
                    end_position = TextPosition(line + 1, column + 1)
154
155
                    yield DocumentationComment(doccomment,
156
                                               marker,
157
                                               TextRange(start_position,
158
                                                         end_position))
159
                    break
160
        else:
161
            line += 1
162
            column = 0
163
164
165
def extract_documentation(content, language, docstyle):
166
    """
167
    Extracts all documentation texts inside the given source-code-string using
168
    the coala docstyle definition files.
169
170
    The documentation texts are sorted by their order appearing in `content`.
171
172
    For more information about how documentation comments are identified and
173
    extracted, see DocstyleDefinition.doctypes enumeration.
174
175
    :param content:            The source-code-string where to extract
176
                               documentation from. Needs to be a list or tuple
177
                               where each string item is a single line
178
                               (including ending whitespaces like `\\n`).
179
    :param language:           The programming language used.
180
    :param docstyle:           The documentation style/tool used
181
                               (e.g. doxygen).
182
    :raises FileNotFoundError: Raised when the docstyle definition file was not
183
                               found. This is a compatability exception from
184
                               `coalib.misc.Compatability` module.
185
    :raises KeyError:          Raised when the given language is not defined in
186
                               given docstyle.
187
    :raises ValueError:        Raised when a docstyle definition setting has an
188
                               invalid format.
189
    :return:                   An iterator returning each DocumentationComment
190
                               found in the content.
191
    """
192
    docstyle_definition = DocstyleDefinition.load(language, docstyle)
193
    return extract_documentation_with_markers(content,
194
                                              docstyle_definition.markers)
195