Total Complexity | 49 |
Total Lines | 261 |
Duplicated Lines | 0 % |
Complex classes like src.docmanager.Analyzer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # |
||
26 | class Analyzer(object): |
||
27 | |||
28 | def __init__(self, xmlhandler): |
||
29 | """Constructor for the Analyzer class |
||
30 | |||
31 | :param XmlHandler xmlhandler: A valid XmlHandler object |
||
32 | """ |
||
33 | |||
34 | self.xmlhandler = xmlhandler |
||
35 | self.fields = set() |
||
36 | self.filters_matched = True |
||
37 | |||
38 | # validate the XmlHandler object |
||
39 | if self.xmlhandler is None: |
||
40 | raise DMInvalidXMLHandlerObject() |
||
41 | |||
42 | def replace_constants(self, queryformat): |
||
43 | """Replaces constants |
||
44 | |||
45 | :param string queryformat: The query format string from parameter -qf |
||
46 | """ |
||
47 | |||
48 | # constants |
||
49 | formatconst = [ |
||
50 | [ "{os.file}", self.xmlhandler.filename ], |
||
51 | [ "{os.lastmodify}", self.xmlhandler.fileutil.get_mtime_format('%Y-%m-%d %H:%M:%S') ] |
||
52 | ] |
||
53 | |||
54 | # replace constants |
||
55 | for i in formatconst: |
||
56 | queryformat = queryformat.replace(i[0], i[1]) |
||
57 | |||
58 | return queryformat |
||
59 | |||
60 | def extract_fields(self, queryformat): |
||
61 | """Extract requested properties from -qf (--queryformat) |
||
62 | |||
63 | :param string queryformat: The query format string from parameter -qf |
||
64 | :return list: the list of all requested properties from -qf (--queryformat) |
||
65 | """ |
||
66 | |||
67 | fields = set() |
||
68 | |||
69 | state = 0 |
||
70 | ignorenext = False |
||
71 | field = "" |
||
72 | skip = -1 |
||
73 | length = len(queryformat) |
||
74 | |||
75 | # algorithm for detecting the requested properties |
||
76 | for idx, char in enumerate(queryformat): |
||
77 | # ignore the current char if needed |
||
78 | if ignorenext: |
||
79 | ignorenext = False |
||
80 | |||
81 | # if we are in the "capturing" state (1), we can just add the |
||
82 | # current char to our field string |
||
83 | if state == 1: |
||
84 | field += char |
||
85 | |||
86 | continue |
||
87 | |||
88 | # skip the current char if needed (user for escaping with '{') |
||
89 | if skip != -1 and skip == idx: |
||
90 | skip = -1 |
||
91 | continue |
||
92 | |||
93 | # this is also an escape detection but it is actually no longer needed. |
||
94 | # can be removed in future versions |
||
95 | if char == '\\': |
||
96 | ignorenext = True |
||
97 | continue |
||
98 | |||
99 | # if we are not in any capturing state (1), we jump into this condition if |
||
100 | # we found a '{'. '{' means, if there is not a second '{' after the current |
||
101 | # '{', this should be a capturing instruction |
||
102 | if char == '{' and state == 0: |
||
103 | # if we are not at the end of the string, we have a look onto the next |
||
104 | # character. If the next character also contains a '{', we are in a |
||
105 | # 'ignore everything in it' statement |
||
106 | if length-1 != idx: |
||
107 | if queryformat[idx+1] == '{': |
||
108 | # ok, we are in a 'ignore everything' statement. we skip now the next |
||
109 | # character (because that's the '{') and jump into the 'ignore everything' |
||
110 | # state (state 3) |
||
111 | skip = idx+1 |
||
112 | state = 3 |
||
113 | else: |
||
114 | # the next character is not a 'ignore everything' instruction. So we're |
||
115 | # in a 'capturing' state. (state 1) |
||
116 | state = 1 |
||
117 | else: |
||
118 | # we reached the end of the string - just jump into the 'capturing' statement |
||
119 | state = 1 |
||
120 | continue |
||
121 | |||
122 | # detect the end of the 'capturing' sequence. (the current character has to be a '}' and |
||
123 | # we also have to be in the 'capturing' state) |
||
124 | if char == '}' and state == 1: |
||
125 | state = 0 |
||
126 | |||
127 | # we copy our captured string into our 'fields' list - that is really important |
||
128 | # because we need all requested properties |
||
129 | fields.add(field) |
||
130 | |||
131 | # clear the string because we need it for the next capturing sequence |
||
132 | field = "" |
||
133 | continue |
||
134 | |||
135 | # detect the end of a 'ignore everything' sequence |
||
136 | if char == '}' and state == 3: |
||
137 | # check if we reached the end of the string - if not, look onto the next character. |
||
138 | # if the next character is a '}', we can leave the 'ignore everything' sequence. |
||
139 | # if not, just skip it |
||
140 | if length-1 != idx: |
||
141 | if queryformat[idx+1] == '}': |
||
142 | # go back into the 'nothing' (append until we found a new instruction) |
||
143 | # statement |
||
144 | state = 0 |
||
145 | skip = idx+1 |
||
146 | |||
147 | # if we're in the 'capturing' sequence, we have to append the current character |
||
148 | # to our 'field' string |
||
149 | if state == 1: |
||
150 | field += char |
||
151 | |||
152 | # make the fields list public - this will be needed for some other functions |
||
153 | # like 'fetch_data' since there is no option for passing the 'fields' list |
||
154 | # over the argument list |
||
155 | self.fields = fields |
||
156 | return self.fields |
||
157 | |||
158 | def fetch_data(self, filter=None, sort=None, default_output=None): |
||
159 | """Fetches the requested properties from self.extract_fields() |
||
160 | |||
161 | :param list filter: The filter list from args.filter (can be None if we don't need the filter function) |
||
162 | :param string sort: The sort item for the sort function (this is a property - can be None if we don't |
||
163 | want to use the sort function) |
||
164 | :return dict: a dictionary with all properties and their values |
||
165 | """ |
||
166 | |||
167 | data = dict() |
||
168 | |||
169 | if not filter: |
||
170 | self.filters_matched = True |
||
171 | |||
172 | if self.fields: |
||
173 | # build the xpath for selecting all needed properties |
||
174 | xpath = "*[self::dm:" + " or self::dm:".join(self.fields) |
||
175 | if sort is None: |
||
176 | xpath += "]" |
||
177 | else: |
||
178 | # when the args.sort is set, we can append that string to our |
||
179 | # xpath. that's needed if we want to display 'maintainer' but |
||
180 | # sort |
||
181 | xpath += " or self::dm:" + sort + "]" |
||
182 | self.fields.add(sort) |
||
183 | |||
184 | # if there are invalid characters in the xpath, lxml throws an exception. |
||
185 | # We have to catch that. |
||
186 | try: |
||
187 | data = { localname(e.tag): e.text for e in self.xmlhandler.dm.xpath(xpath, namespaces=NS) } |
||
188 | except etree.XPathEvalError: |
||
189 | log.error("The given XML properties in --sort/-s or --queryformat/-qf are invalid.") |
||
190 | sys.exit(ReturnCodes.E_INVALID_XML_PROPERTIES) |
||
191 | |||
192 | # loop over all 'properties' and fetch their values from the XML file. properties |
||
193 | # without values will become an empty string if the 'default-option' was not set |
||
194 | for f in self.fields: |
||
195 | data.setdefault(f, data.get(f, None)) |
||
196 | |||
197 | if not data[f]: |
||
198 | if not default_output: |
||
199 | data[f] = '' |
||
200 | else: |
||
201 | data[f] = default_output |
||
202 | |||
203 | if filter: |
||
204 | filters = dict() |
||
205 | filters_xpath = "" |
||
206 | |||
207 | for idx, f in enumerate(filter): |
||
208 | try: |
||
209 | # validate the filter syntax of any given filter |
||
210 | mode, prop, condition = self.validate_filter(f) |
||
211 | |||
212 | # save the details about a filter in a dictionary |
||
213 | filters[prop] = dict() |
||
214 | filters[prop]['mode'] = mode |
||
215 | filters[prop]['condition'] = condition |
||
216 | |||
217 | if idx == 0: |
||
218 | filters_xpath += "self::dm:" + prop |
||
219 | else: |
||
220 | filters_xpath += " or self::dm:" + prop |
||
221 | except DMAnalyzeInvalidFilterSyntax: |
||
222 | # syntax is wrong |
||
223 | log.error("Invalid syntax in filter: '{}'".format(f)) |
||
224 | log.error("Look into the manual page for more information about using filters.") |
||
225 | sys.exit(ReturnCodes.E_ANALYZE_FILTER_INVALID_SYNTAX) |
||
226 | |||
227 | # catch the values of the filter properties |
||
228 | f_xpath = { localname(e.tag): e.text for e in self.xmlhandler.dm.xpath("*[{}]".format(filters_xpath), namespaces=NS) } |
||
229 | |||
230 | for f in filters: |
||
231 | # if the filter property was not found in the XML file -> the filter didn't |
||
232 | # not match and we have to return an empty dictionary |
||
233 | if f not in f_xpath: |
||
234 | self.filters_matched = False |
||
235 | return {} |
||
236 | |||
237 | # f_xpath[f] = '' |
||
238 | |||
239 | # condition checks |
||
240 | if filters[f]['mode'] == '+': |
||
241 | if f_xpath[f] != filters[f]['condition']: |
||
242 | self.filters_matched = False |
||
243 | return {} |
||
244 | elif filters[f]['mode'] == '-': |
||
245 | if f_xpath[f] == filters[f]['condition']: |
||
246 | self.filters_matched = False |
||
247 | return {} |
||
248 | |||
249 | return data |
||
250 | |||
251 | def validate_filter(self, filter): |
||
252 | """Validates the syntax of a filter (example: +property=value, -property=value, property=value) |
||
253 | |||
254 | :param string filter: One single filter (not the filter list) |
||
255 | :return list: the mode, the property, the filter property condition |
||
256 | """ |
||
257 | |||
258 | # look for the operator (if no valid operator was found, use '+') |
||
259 | if filter[0] != '+' and filter[0] != '-': |
||
260 | filter = "+{}".format(filter) |
||
261 | |||
262 | # detect the first occurrence of the character '=' |
||
263 | pos = filter.find("=") |
||
264 | if pos == -1: |
||
265 | raise DMAnalyzeInvalidFilterSyntax() |
||
266 | |||
267 | # extract the property and the condition |
||
268 | prop = filter[1:-(len(filter)-pos)] |
||
269 | cond = filter[pos+1:] |
||
270 | |||
271 | return [filter[0],prop,cond] |
||
272 | |||
273 | def format_output(self, source, data): |
||
274 | """formats the output of the -qf (--queryformat) |
||
275 | |||
276 | :param string source: The query format string (-qf/--queryformat) from the command line |
||
277 | :param string data: the data items from fetch_data |
||
278 | :return string: the formatted query format string |
||
279 | """ |
||
280 | |||
281 | if data: |
||
282 | # iterate through each item and replace the properties with their values |
||
283 | for i in data: |
||
284 | source = source.replace('{' + i + '}', data[i]) |
||
285 | |||
286 | return source |
||
287 |