| 1 |  |  | import re | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | from coalib.bearlib.languages.documentation.DocstyleDefinition import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |     DocstyleDefinition) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from coalib.bearlib.languages.documentation.DocumentationComment import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |     DocumentationComment) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | from coalib.results.TextRange import TextRange | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | #TODO - Implement Match also for `split` and `search_for`? File an issue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | #TODO - document currently existing docstyles from coala side? | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | #TODO - Add ''' ''' as markers for python 2/3 inside doc-definition files. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | def extract_documentation_with_docstyle(content, docstyle_definition): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     Extracts all documentation texts inside the given source-code-string. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |     :param content:             The source-code-string where to extract | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |                                 documentation from or an iterable with strings | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |                                 where each string is a single line (including | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |                                 ending whitespaces like `\\n`). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     :param docstyle_definition: The DocstyleDefinition that identifies the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |                                 documentation comments. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     :return:                    An iterator returning each documentation text | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |                                 found in the content. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     if isinstance(content, str): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |         content = content.splitlines(keepends=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |         content = list(content) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     # Used to break out of outer loops via exception raise. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     class BreakOut(Exception): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     # Prepare marker-tuple dict that maps a begin pattern to the corresponding | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     # marker_set(s). This makes it faster to retrieve a marker-set from a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     # begin sequence we initially want to search for in source code. Then | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     # the possible found documentation match is processed further with the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     # rest markers. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     begin_sequence_dict = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     for marker_set in docstyle_definition.markers: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         if marker_set[0] not in begin_sequence_dict: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |             begin_sequence_dict[marker_set[0]] = [marker_set] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |             begin_sequence_dict[marker_set[0]].append(marker_set) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     # Using regexes to perform a variable match is faster than finding each | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     # substring with `str.find()` choosing the lowest match. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     begin_regex = re.compile("|".join( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         re.escape(marker_set[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         for marker_set in docstyle_definition.markers)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     line = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |     line_pos = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     while line < len(content): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         begin_match = begin_regex.search(content[line], line_pos) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         if begin_match: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |             begin_match_line = line | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |             # Prevents infinite loop when the start marker matches but not the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |             # complete documentation comment. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |             line_pos = begin_match.end() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |             # begin_sequence_dict[begin_match.group()] returns the marker set | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |             # the begin sequence from before matched. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |             for marker_set in begin_sequence_dict[begin_match.group()]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |                 try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |                     # If the each-line marker and the end marker do equal, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |                     # search for the each-line marker until it runs out. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |                     if marker_set[1] == marker_set[2]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |                         docstring = content[line][begin_match.end():] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |                         line2 = line + 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |                         stripped_content = content[line2].lstrip() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |                         # Now the each-line marker is no requirement for a | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |                         # docstring any more, just extract as long as there are | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |                         # no each-line markers any more. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |                         while (stripped_content[:len(marker_set[1])] == | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |                                marker_set[1]): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |                             docstring += stripped_content[len(marker_set[1]):] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |                             line2 += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |                             if line2 >= len(content): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |                                 # End of content reached, done with | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |                                 # doc-extraction. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |                                 break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |                             stripped_content = content[line2].lstrip() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |                         line = line2 - 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |                         line_pos = len(content[line]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |                     else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |                         end_marker_pos = content[line].find(marker_set[2], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |                                                             begin_match.end()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |                         if end_marker_pos == -1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |                             docstring = content[line][begin_match.end():] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |                             line2 = line + 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |                             if line2 >= len(content): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |                                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |                             end_marker_pos = content[line2].find(marker_set[2]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |                             while end_marker_pos == -1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                                 if marker_set[1] == "": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |                                     # When no each-line marker is set (i.e. for | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |                                     # Python docstrings), then align the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |                                     # comment to the start-marker. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |                                     stripped_content = ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |                                         content[line2][begin_match.start():]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |                                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |                                     # Check whether we violate the each-line | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |                                     # marker "rule". | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |                                     current_each_line_marker = (content[line2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |                                         [begin_match.start(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |                                          begin_match.start() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |                                              + len(marker_set[1])]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |                                     if (current_each_line_marker != | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |                                             marker_set[1]): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |                                         # Effectively a 'continue' for the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |                                         # outer for-loop. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |                                         raise BreakOut | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |                                     stripped_content = ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |                                         content[line2][begin_match.start() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |                                                        + len(marker_set[1]):]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |                                 # TODO Test also other C style doccomments | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |                                 docstring += stripped_content | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |                                 line2 += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |                                 if line2 >= len(content): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |                                     # End of content reached, so there's no | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |                                     # closing marker and that's a mismatch. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |                                     raise BreakOut | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |                                 end_marker_pos = content[line2].find( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |                                     marker_set[2]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |                             docstring += (content[line2] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |                                 [begin_match.start():end_marker_pos]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |                             line = line2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |                         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |                             docstring = (content[line] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |                                 [begin_match.end():end_marker_pos]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |                         line_pos = end_marker_pos + len(marker_set[2]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |                     rng = TextRange.from_values(begin_match_line + 1, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |                                                 begin_match.start() + 1, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |                                                 line + 1, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |                                                 line_pos + 1) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |                     yield DocumentationComment(docstring, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |                                                docstyle_definition, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |                                                marker_set, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |                                                rng) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |                     break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |                 except BreakOut: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |                     # Continues the marker_set loop. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |                     pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |             line += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |             line_pos = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 174 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 175 |  |  | def extract_documentation(content, language, docstyle): | 
            
                                                        
            
                                    
            
            
                | 176 |  |  |     """ | 
            
                                                        
            
                                    
            
            
                | 177 |  |  |     Extracts all documentation texts inside the given source-code-string using | 
            
                                                        
            
                                    
            
            
                | 178 |  |  |     the coala docstyle definition files. | 
            
                                                        
            
                                    
            
            
                | 179 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 180 |  |  |     The documentation texts are sorted by their order appearing in `content`. | 
            
                                                        
            
                                    
            
            
                | 181 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 182 |  |  |     For more information about how documentation comments are identified and | 
            
                                                        
            
                                    
            
            
                | 183 |  |  |     extracted, see DocstyleDefinition.doctypes enumeration. | 
            
                                                        
            
                                    
            
            
                | 184 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 185 |  |  |     :param content:            The source-code-string where to extract | 
            
                                                        
            
                                    
            
            
                | 186 |  |  |                                documentation from. | 
            
                                                        
            
                                    
            
            
                | 187 |  |  |     :param language:           The programming language used. | 
            
                                                        
            
                                    
            
            
                | 188 |  |  |     :param docstyle:           The documentation style/tool used | 
            
                                                        
            
                                    
            
            
                | 189 |  |  |                                (i.e. doxygen). | 
            
                                                        
            
                                    
            
            
                | 190 |  |  |     :raises FileNotFoundError: Raised when the docstyle definition file was not | 
            
                                                        
            
                                    
            
            
                | 191 |  |  |                                found. This is a compatability exception from | 
            
                                                        
            
                                    
            
            
                | 192 |  |  |                                `coalib.misc.Compatability` module. | 
            
                                                        
            
                                    
            
            
                | 193 |  |  |     :raises KeyError:          Raised when the given language is not defined in | 
            
                                                        
            
                                    
            
            
                | 194 |  |  |                                given docstyle. | 
            
                                                        
            
                                    
            
            
                | 195 |  |  |     :raises ValueError:        Raised when a docstyle definition setting has an | 
            
                                                        
            
                                    
            
            
                | 196 |  |  |                                invalid format. | 
            
                                                        
            
                                    
            
            
                | 197 |  |  |     :return:                   An iterator returning each DocumentationComment | 
            
                                                        
            
                                    
            
            
                | 198 |  |  |                                found in the content. | 
            
                                                        
            
                                    
            
            
                | 199 |  |  |     """ | 
            
                                                        
            
                                    
            
            
                | 200 |  |  |     docstyle_definition = DocstyleDefinition.load(language, docstyle) | 
            
                                                        
            
                                    
            
            
                | 201 |  |  |     return extract_documentation_with_docstyle(content, docstyle_definition) | 
            
                                                        
            
                                    
            
            
                | 202 |  |  |  |