Failed Conditions
Pull Request — master (#1138)
by Mischa
02:14
created

coalib.bearlib.languages.documentation._extract_doccomment_standard()   C

Complexity

Conditions 7

Size

Total Lines 30

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 7
dl 0
loc 30
rs 5.5
1
import re
2
3
from coalib.bearlib.languages.documentation.DocstyleDefinition import (
4
    DocstyleDefinition)
5
from coalib.bearlib.languages.documentation.DocumentationComment import (
6
    DocumentationComment)
7
from coalib.results.TextPosition import TextPosition
8
from coalib.results.TextRange import TextRange
9
10
11
def _extract_doccomment_simple(content, line, column, markers):
12
    align_column = column - len(markers[0])
13
14
    pos = content[line].find(markers[2], column)
15
    if pos == -1:
16
        doccomment = content[line][column:]
17
        line += 1
18
19
        while line < len(content):
20
            pos = content[line].find(markers[2])
21
            if pos == -1:
22
                doccomment += content[line][align_column:]
23
            else:
24
                doccomment += content[line][align_column:pos]
25
                return line, pos + len(markers[2]), doccomment
26
27
            line += 1
28
29
        return None
30
    else:
31
        return line, pos + len(markers[2]), content[line][column:pos]
32
33
34
def _extract_doccomment_continuous(content, line, column, markers):
35
    marker_len = len(markers[1])
36
37
    doccomment = content[line][column:]
38
    line += 1
39
    while line < len(content):
40
        pos = content[line].find(markers[1])
41
        if pos == -1:
42
            return line, 0, doccomment
43
        else:
44
            doccomment += content[line][pos+marker_len:]
45
46
        line += 1
47
48
    if content[line - 1][-1] == "\n":
49
        column = 0
50
    else:
51
        # This case can appear on end-of-document without a `\n`.
52
        line -= 1
53
        column = len(content[line])
54
55
    return line, column, doccomment
56
57
58
def _extract_doccomment_standard(content, line, column, markers):
59
    pos = content[line].find(markers[2], column)
60
    if pos == -1:
61
        doccomment = content[line][column:]
62
        line += 1
63
64
        while line < len(content):
65
            pos = content[line].find(markers[2])
66
            each_line_pos = content[line].find(markers[1])
67
68
            if pos == -1:
69
                if each_line_pos == -1:
70
                    # If the first text occurrence is not the each-line marker
71
                    # now we violate the doc-comment layout.
72
                    return None
73
                doccomment += content[line][each_line_pos+len(markers[1]):]
74
            else:
75
                # If no each-line marker found or it's located past the end
76
                # marker: extract no further and end the doc-comment.
77
                if each_line_pos != -1 and each_line_pos + 1 < pos:
78
                    doccomment += (
79
                        content[line][each_line_pos+len(markers[1]):pos])
80
81
                return line, pos + len(markers[2]), doccomment
82
83
            line += 1
84
85
        return None
86
    else:
87
        return line, pos + len(markers[2]), content[line][column:pos]
88
89
90
def _extract_doccomment(content, line, column, markers):
91
    if markers[1] == "":
92
        # Extract and align to start marker.
93
        return _extract_doccomment_simple(content, line, column, markers)
94
    elif markers[1] == markers[2]:
95
        # Search for the each-line marker until it runs out.
96
        return _extract_doccomment_continuous(content, line, column, markers)
97
    else:
98
        return _extract_doccomment_standard(content, line, column, markers)
99
100
101
def _compile_multi_match_regex(strings):
102
    """
103
    Compiles a regex object that matches each of the given strings.
104
105
    :param strings: The strings to match.
106
    :return:        A regex object.
107
    """
108
    return re.compile("|".join(re.escape(s) for s in strings))
109
110
111
def _extract_doccomment_from_line(content, line, column, regex, marker_dict):
112
    begin_match = regex.search(content[line], column)
113
    if begin_match:
114
        column = begin_match.end()
115
        for marker in marker_dict[begin_match.group()]:
116
            doccomment = _extract_doccomment(content, line, column, marker)
117
            if doccomment is not None:
118
                start_position = TextPosition(line + 1,
119
                                              begin_match.start() + 1)
120
                line, column, doccomment = doccomment
0 ignored issues
show
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 29.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 72.
Loading history...
Bug Best Practice introduced by
It seems like you are trying to unpack a non-sequence, which was defined at line 85.
Loading history...
121
                end_position = TextPosition(line + 1, column + 1)
122
123
                doc = DocumentationComment(doccomment,
124
                                           marker,
125
                                           TextRange(start_position,
126
                                                     end_position))
127
128
                return line, column, doc
129
130
    return line + 1, 0, None
131
132
133
def extract_documentation_with_markers(content, markers):
134
    """
135
    Extracts all documentation texts inside the given source-code-string.
136
137
    :param content: The source-code-string where to extract documentation from.
138
                    Needs to be a list or tuple where each string item is a
139
                    single line (including ending whitespaces like `\\n`).
140
    :param markers: The list/tuple of marker-sets that identify a
141
                    documentation-comment. Low-index markers have higher
142
                    priority than high-index markers.
143
    :return:        An iterator returning each DocumentationComment found in
144
                    the content.
145
    """
146
    # Prepare marker-tuple dict that maps a begin pattern to the corresponding
147
    # marker_set(s). This makes it faster to retrieve a marker-set from a
148
    # begin sequence we initially want to search for in source code. Then
149
    # the possible found documentation match is processed further with the
150
    # rest markers.
151
    marker_dict = {}
152
    for marker_set in markers:
153
        if marker_set[0] not in marker_dict:
154
            marker_dict[marker_set[0]] = [marker_set]
155
        else:
156
            marker_dict[marker_set[0]].append(marker_set)
157
158
    # Using regexes to perform a variable match is faster than finding each
159
    # substring with `str.find()` choosing the lowest match.
160
    begin_regex = _compile_multi_match_regex(
161
        marker_set[0] for marker_set in markers)
162
163
    line = 0
164
    column = 0
165
    while line < len(content):
166
        line, column, doc = _extract_doccomment_from_line(content,
167
                                                          line,
168
                                                          column,
169
                                                          begin_regex,
170
                                                          marker_dict)
171
        if doc:
172
            yield doc
173
174
175
def extract_documentation(content, language, docstyle):
176
    """
177
    Extracts all documentation texts inside the given source-code-string using
178
    the coala docstyle definition files.
179
180
    The documentation texts are sorted by their order appearing in `content`.
181
182
    For more information about how documentation comments are identified and
183
    extracted, see DocstyleDefinition.doctypes enumeration.
184
185
    :param content:            The source-code-string where to extract
186
                               documentation from. Needs to be a list or tuple
187
                               where each string item is a single line
188
                               (including ending whitespaces like `\\n`).
189
    :param language:           The programming language used.
190
    :param docstyle:           The documentation style/tool used
191
                               (e.g. doxygen).
192
    :raises FileNotFoundError: Raised when the docstyle definition file was not
193
                               found. This is a compatability exception from
194
                               `coalib.misc.Compatability` module.
195
    :raises KeyError:          Raised when the given language is not defined in
196
                               given docstyle.
197
    :raises ValueError:        Raised when a docstyle definition setting has an
198
                               invalid format.
199
    :return:                   An iterator returning each DocumentationComment
200
                               found in the content.
201
    """
202
    docstyle_definition = DocstyleDefinition.load(language, docstyle)
203
    return extract_documentation_with_markers(content,
204
                                              docstyle_definition.markers)
205