| Total Complexity | 49 |
| Total Lines | 261 |
| Duplicated Lines | 0 % |
Complex classes like src.docmanager.Analyzer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # |
||
| 26 | class Analyzer(object): |
||
| 27 | |||
| 28 | def __init__(self, xmlhandler): |
||
| 29 | """Constructor for the Analyzer class |
||
| 30 | |||
| 31 | :param XmlHandler xmlhandler: A valid XmlHandler object |
||
| 32 | """ |
||
| 33 | |||
| 34 | self.xmlhandler = xmlhandler |
||
| 35 | self.fields = set() |
||
| 36 | self.filters_matched = True |
||
| 37 | |||
| 38 | # validate the XmlHandler object |
||
| 39 | if self.xmlhandler is None: |
||
| 40 | raise DMInvalidXMLHandlerObject() |
||
| 41 | |||
| 42 | def replace_constants(self, queryformat): |
||
| 43 | """Replaces constants |
||
| 44 | |||
| 45 | :param string queryformat: The query format string from parameter -qf |
||
| 46 | """ |
||
| 47 | |||
| 48 | # constants |
||
| 49 | formatconst = [ |
||
| 50 | [ "{os.file}", self.xmlhandler.filename ], |
||
| 51 | [ "{os.lastmodify}", self.xmlhandler.fileutil.get_mtime_format('%Y-%m-%d %H:%M:%S') ] |
||
| 52 | ] |
||
| 53 | |||
| 54 | # replace constants |
||
| 55 | for i in formatconst: |
||
| 56 | queryformat = queryformat.replace(i[0], i[1]) |
||
| 57 | |||
| 58 | return queryformat |
||
| 59 | |||
| 60 | def extract_fields(self, queryformat): |
||
| 61 | """Extract requested properties from -qf (--queryformat) |
||
| 62 | |||
| 63 | :param string queryformat: The query format string from parameter -qf |
||
| 64 | :return list: the list of all requested properties from -qf (--queryformat) |
||
| 65 | """ |
||
| 66 | |||
| 67 | fields = set() |
||
| 68 | |||
| 69 | state = 0 |
||
| 70 | ignorenext = False |
||
| 71 | field = "" |
||
| 72 | skip = -1 |
||
| 73 | length = len(queryformat) |
||
| 74 | |||
| 75 | # algorithm for detecting the requested properties |
||
| 76 | for idx, char in enumerate(queryformat): |
||
| 77 | # ignore the current char if needed |
||
| 78 | if ignorenext: |
||
| 79 | ignorenext = False |
||
| 80 | |||
| 81 | # if we are in the "capturing" state (1), we can just add the |
||
| 82 | # current char to our field string |
||
| 83 | if state == 1: |
||
| 84 | field += char |
||
| 85 | |||
| 86 | continue |
||
| 87 | |||
| 88 | # skip the current char if needed (user for escaping with '{') |
||
| 89 | if skip != -1 and skip == idx: |
||
| 90 | skip = -1 |
||
| 91 | continue |
||
| 92 | |||
| 93 | # this is also an escape detection but it is actually no longer needed. |
||
| 94 | # can be removed in future versions |
||
| 95 | if char == '\\': |
||
| 96 | ignorenext = True |
||
| 97 | continue |
||
| 98 | |||
| 99 | # if we are not in any capturing state (1), we jump into this condition if |
||
| 100 | # we found a '{'. '{' means, if there is not a second '{' after the current |
||
| 101 | # '{', this should be a capturing instruction |
||
| 102 | if char == '{' and state == 0: |
||
| 103 | # if we are not at the end of the string, we have a look onto the next |
||
| 104 | # character. If the next character also contains a '{', we are in a |
||
| 105 | # 'ignore everything in it' statement |
||
| 106 | if length-1 != idx: |
||
| 107 | if queryformat[idx+1] == '{': |
||
| 108 | # ok, we are in a 'ignore everything' statement. we skip now the next |
||
| 109 | # character (because that's the '{') and jump into the 'ignore everything' |
||
| 110 | # state (state 3) |
||
| 111 | skip = idx+1 |
||
| 112 | state = 3 |
||
| 113 | else: |
||
| 114 | # the next character is not a 'ignore everything' instruction. So we're |
||
| 115 | # in a 'capturing' state. (state 1) |
||
| 116 | state = 1 |
||
| 117 | else: |
||
| 118 | # we reached the end of the string - just jump into the 'capturing' statement |
||
| 119 | state = 1 |
||
| 120 | continue |
||
| 121 | |||
| 122 | # detect the end of the 'capturing' sequence. (the current character has to be a '}' and |
||
| 123 | # we also have to be in the 'capturing' state) |
||
| 124 | if char == '}' and state == 1: |
||
| 125 | state = 0 |
||
| 126 | |||
| 127 | # we copy our captured string into our 'fields' list - that is really important |
||
| 128 | # because we need all requested properties |
||
| 129 | fields.add(field) |
||
| 130 | |||
| 131 | # clear the string because we need it for the next capturing sequence |
||
| 132 | field = "" |
||
| 133 | continue |
||
| 134 | |||
| 135 | # detect the end of a 'ignore everything' sequence |
||
| 136 | if char == '}' and state == 3: |
||
| 137 | # check if we reached the end of the string - if not, look onto the next character. |
||
| 138 | # if the next character is a '}', we can leave the 'ignore everything' sequence. |
||
| 139 | # if not, just skip it |
||
| 140 | if length-1 != idx: |
||
| 141 | if queryformat[idx+1] == '}': |
||
| 142 | # go back into the 'nothing' (append until we found a new instruction) |
||
| 143 | # statement |
||
| 144 | state = 0 |
||
| 145 | skip = idx+1 |
||
| 146 | |||
| 147 | # if we're in the 'capturing' sequence, we have to append the current character |
||
| 148 | # to our 'field' string |
||
| 149 | if state == 1: |
||
| 150 | field += char |
||
| 151 | |||
| 152 | # make the fields list public - this will be needed for some other functions |
||
| 153 | # like 'fetch_data' since there is no option for passing the 'fields' list |
||
| 154 | # over the argument list |
||
| 155 | self.fields = fields |
||
| 156 | return self.fields |
||
| 157 | |||
| 158 | def fetch_data(self, filter=None, sort=None, default_output=None): |
||
| 159 | """Fetches the requested properties from self.extract_fields() |
||
| 160 | |||
| 161 | :param list filter: The filter list from args.filter (can be None if we don't need the filter function) |
||
| 162 | :param string sort: The sort item for the sort function (this is a property - can be None if we don't |
||
| 163 | want to use the sort function) |
||
| 164 | :return dict: a dictionary with all properties and their values |
||
| 165 | """ |
||
| 166 | |||
| 167 | data = dict() |
||
| 168 | |||
| 169 | if not filter: |
||
| 170 | self.filters_matched = True |
||
| 171 | |||
| 172 | if self.fields: |
||
| 173 | # build the xpath for selecting all needed properties |
||
| 174 | xpath = "*[self::dm:" + " or self::dm:".join(self.fields) |
||
| 175 | if sort is None: |
||
| 176 | xpath += "]" |
||
| 177 | else: |
||
| 178 | # when the args.sort is set, we can append that string to our |
||
| 179 | # xpath. that's needed if we want to display 'maintainer' but |
||
| 180 | # sort |
||
| 181 | xpath += " or self::dm:" + sort + "]" |
||
| 182 | self.fields.add(sort) |
||
| 183 | |||
| 184 | # if there are invalid characters in the xpath, lxml throws an exception. |
||
| 185 | # We have to catch that. |
||
| 186 | try: |
||
| 187 | data = { localname(e.tag): e.text for e in self.xmlhandler.dm.xpath(xpath, namespaces=NS) } |
||
| 188 | except etree.XPathEvalError: |
||
| 189 | log.error("The given XML properties in --sort/-s or --queryformat/-qf are invalid.") |
||
| 190 | sys.exit(ReturnCodes.E_INVALID_XML_PROPERTIES) |
||
| 191 | |||
| 192 | # loop over all 'properties' and fetch their values from the XML file. properties |
||
| 193 | # without values will become an empty string if the 'default-option' was not set |
||
| 194 | for f in self.fields: |
||
| 195 | data.setdefault(f, data.get(f, None)) |
||
| 196 | |||
| 197 | if not data[f]: |
||
| 198 | if not default_output: |
||
| 199 | data[f] = '' |
||
| 200 | else: |
||
| 201 | data[f] = default_output |
||
| 202 | |||
| 203 | if filter: |
||
| 204 | filters = dict() |
||
| 205 | filters_xpath = "" |
||
| 206 | |||
| 207 | for idx, f in enumerate(filter): |
||
| 208 | try: |
||
| 209 | # validate the filter syntax of any given filter |
||
| 210 | mode, prop, condition = self.validate_filter(f) |
||
| 211 | |||
| 212 | # save the details about a filter in a dictionary |
||
| 213 | filters[prop] = dict() |
||
| 214 | filters[prop]['mode'] = mode |
||
| 215 | filters[prop]['condition'] = condition |
||
| 216 | |||
| 217 | if idx == 0: |
||
| 218 | filters_xpath += "self::dm:" + prop |
||
| 219 | else: |
||
| 220 | filters_xpath += " or self::dm:" + prop |
||
| 221 | except DMAnalyzeInvalidFilterSyntax: |
||
| 222 | # syntax is wrong |
||
| 223 | log.error("Invalid syntax in filter: '{}'".format(f)) |
||
| 224 | log.error("Look into the manual page for more information about using filters.") |
||
| 225 | sys.exit(ReturnCodes.E_ANALYZE_FILTER_INVALID_SYNTAX) |
||
| 226 | |||
| 227 | # catch the values of the filter properties |
||
| 228 | f_xpath = { localname(e.tag): e.text for e in self.xmlhandler.dm.xpath("*[{}]".format(filters_xpath), namespaces=NS) } |
||
| 229 | |||
| 230 | for f in filters: |
||
| 231 | # if the filter property was not found in the XML file -> the filter didn't |
||
| 232 | # not match and we have to return an empty dictionary |
||
| 233 | if f not in f_xpath: |
||
| 234 | self.filters_matched = False |
||
| 235 | return {} |
||
| 236 | |||
| 237 | # f_xpath[f] = '' |
||
| 238 | |||
| 239 | # condition checks |
||
| 240 | if filters[f]['mode'] == '+': |
||
| 241 | if f_xpath[f] != filters[f]['condition']: |
||
| 242 | self.filters_matched = False |
||
| 243 | return {} |
||
| 244 | elif filters[f]['mode'] == '-': |
||
| 245 | if f_xpath[f] == filters[f]['condition']: |
||
| 246 | self.filters_matched = False |
||
| 247 | return {} |
||
| 248 | |||
| 249 | return data |
||
| 250 | |||
| 251 | def validate_filter(self, filter): |
||
| 252 | """Validates the syntax of a filter (example: +property=value, -property=value, property=value) |
||
| 253 | |||
| 254 | :param string filter: One single filter (not the filter list) |
||
| 255 | :return list: the mode, the property, the filter property condition |
||
| 256 | """ |
||
| 257 | |||
| 258 | # look for the operator (if no valid operator was found, use '+') |
||
| 259 | if filter[0] != '+' and filter[0] != '-': |
||
| 260 | filter = "+{}".format(filter) |
||
| 261 | |||
| 262 | # detect the first occurrence of the character '=' |
||
| 263 | pos = filter.find("=") |
||
| 264 | if pos == -1: |
||
| 265 | raise DMAnalyzeInvalidFilterSyntax() |
||
| 266 | |||
| 267 | # extract the property and the condition |
||
| 268 | prop = filter[1:-(len(filter)-pos)] |
||
| 269 | cond = filter[pos+1:] |
||
| 270 | |||
| 271 | return [filter[0],prop,cond] |
||
| 272 | |||
| 273 | def format_output(self, source, data): |
||
| 274 | """formats the output of the -qf (--queryformat) |
||
| 275 | |||
| 276 | :param string source: The query format string (-qf/--queryformat) from the command line |
||
| 277 | :param string data: the data items from fetch_data |
||
| 278 | :return string: the formatted query format string |
||
| 279 | """ |
||
| 280 | |||
| 281 | if data: |
||
| 282 | # iterate through each item and replace the properties with their values |
||
| 283 | for i in data: |
||
| 284 | source = source.replace('{' + i + '}', data[i]) |
||
| 285 | |||
| 286 | return source |
||
| 287 |