Total Complexity | 80 |
Total Lines | 434 |
Duplicated Lines | 88.71 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like dynamicserialize.ThriftSerializationContext often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | # |
||
2 | # A port of the Java ThriftSerializationContext, used for reading/writing |
||
3 | # DynamicSerialize objects to/from thrift. |
||
4 | # |
||
5 | # For serialization, it has no knowledge of the expected types in other |
||
6 | # languages, it is instead all based on inspecting the types of the objects |
||
7 | # passed to it. Therefore, ensure the types of python objects and primitives |
||
8 | # match what they should be in the destination language. |
||
9 | # |
||
10 | # |
||
11 | # SOFTWARE HISTORY |
||
12 | # |
||
13 | # Date Ticket# Engineer Description |
||
14 | # ------------ ---------- ----------- -------------------------- |
||
15 | # 06/09/10 njensen Initial Creation. |
||
16 | # 06/12/13 #2099 dgilling Implement readObject() and |
||
17 | # writeObject(). |
||
18 | # Apr 24, 2015 4425 nabowle Add Double support |
||
19 | # Oct 17, 2016 5919 njensen Optimized for speed |
||
20 | # Sep 06, 2018 mjames@ucar Python3 compliance |
||
21 | # |
||
22 | # |
||
23 | |||
24 | import inspect |
||
25 | import sys |
||
26 | import types |
||
27 | import six |
||
28 | import numpy |
||
29 | from thrift.Thrift import TType |
||
30 | import dynamicserialize |
||
31 | from dynamicserialize import dstypes, adapters |
||
32 | from dynamicserialize import SelfDescribingBinaryProtocol |
||
33 | |||
34 | DS_LEN = len('dynamicserialize.dstypes.') |
||
35 | |||
36 | dsObjTypes = {} |
||
37 | |||
38 | |||
39 | View Code Duplication | def buildObjMap(module): |
|
|
|||
40 | if '__all__' in module.__dict__: |
||
41 | for i in module.__all__: |
||
42 | name = module.__name__ + '.' + i |
||
43 | __import__(name) |
||
44 | buildObjMap(sys.modules[name]) |
||
45 | else: |
||
46 | clzName = module.__name__[module.__name__.rfind('.') + 1:] |
||
47 | clz = module.__dict__[clzName] |
||
48 | tname = module.__name__ |
||
49 | tname = tname[DS_LEN:] |
||
50 | dsObjTypes[tname] = clz |
||
51 | |||
52 | |||
53 | buildObjMap(dstypes) |
||
54 | |||
55 | View Code Duplication | if six.PY2: |
|
56 | pythonToThriftMap = { |
||
57 | types.StringType: TType.STRING, |
||
58 | types.IntType: TType.I32, |
||
59 | types.LongType: TType.I64, |
||
60 | types.ListType: TType.LIST, |
||
61 | unicode: TType.STRING, |
||
62 | types.DictionaryType: TType.MAP, |
||
63 | type(set([])): TType.SET, |
||
64 | types.FloatType: SelfDescribingBinaryProtocol.FLOAT, |
||
65 | # types.FloatType: TType.DOUBLE, |
||
66 | types.BooleanType: TType.BOOL, |
||
67 | types.InstanceType: TType.STRUCT, |
||
68 | types.NoneType: TType.VOID, |
||
69 | numpy.float32: SelfDescribingBinaryProtocol.FLOAT, |
||
70 | numpy.int32: TType.I32, |
||
71 | numpy.ndarray: TType.LIST, |
||
72 | numpy.object_: TType.STRING, # making an assumption here |
||
73 | numpy.string_: TType.STRING, |
||
74 | numpy.float64: TType.DOUBLE, |
||
75 | numpy.int16: TType.I16, |
||
76 | numpy.int8: TType.BYTE, |
||
77 | numpy.int64: TType.I64 |
||
78 | } |
||
79 | else: |
||
80 | pythonToThriftMap = { |
||
81 | bytes: TType.STRING, |
||
82 | int: TType.I32, |
||
83 | int: TType.I64, |
||
84 | list: TType.LIST, |
||
85 | dict: TType.MAP, |
||
86 | type(set([])): TType.SET, |
||
87 | float: SelfDescribingBinaryProtocol.FLOAT, |
||
88 | # types.FloatType: TType.DOUBLE, |
||
89 | bool: TType.BOOL, |
||
90 | object: TType.STRUCT, |
||
91 | str: TType.STRING, |
||
92 | type(None): TType.VOID, |
||
93 | numpy.float32: SelfDescribingBinaryProtocol.FLOAT, |
||
94 | numpy.int32: TType.I32, |
||
95 | numpy.ndarray: TType.LIST, |
||
96 | numpy.object_: TType.STRING, # making an assumption here |
||
97 | numpy.string_: TType.STRING, |
||
98 | numpy.float64: TType.DOUBLE, |
||
99 | numpy.int16: TType.I16, |
||
100 | numpy.int8: TType.BYTE, |
||
101 | numpy.int64: TType.I64 |
||
102 | } |
||
103 | |||
104 | primitiveSupport = (TType.BYTE, TType.I16, TType.I32, TType.I64, |
||
105 | SelfDescribingBinaryProtocol.FLOAT, TType.DOUBLE) |
||
106 | |||
107 | |||
108 | View Code Duplication | class ThriftSerializationContext(object): |
|
109 | |||
110 | def __init__(self, serializationManager, selfDescribingBinaryProtocol): |
||
111 | self.serializationManager = serializationManager |
||
112 | self.protocol = selfDescribingBinaryProtocol |
||
113 | self.typeDeserializationMethod = { |
||
114 | TType.STRING: self.protocol.readString, |
||
115 | TType.I16: self.protocol.readI16, |
||
116 | TType.I32: self.protocol.readI32, |
||
117 | TType.LIST: self._deserializeArray, |
||
118 | TType.MAP: self._deserializeMap, |
||
119 | TType.SET: self._deserializeSet, |
||
120 | SelfDescribingBinaryProtocol.FLOAT: self.protocol.readFloat, |
||
121 | TType.BYTE: self.protocol.readByte, |
||
122 | TType.I64: self.protocol.readI64, |
||
123 | TType.DOUBLE: self.protocol.readDouble, |
||
124 | TType.BOOL: self.protocol.readBool, |
||
125 | TType.STRUCT: self.deserializeMessage, |
||
126 | TType.VOID: lambda: None |
||
127 | } |
||
128 | self.typeSerializationMethod = { |
||
129 | TType.STRING: self.protocol.writeString, |
||
130 | TType.I16: self.protocol.writeI16, |
||
131 | TType.I32: self.protocol.writeI32, |
||
132 | TType.LIST: self._serializeArray, |
||
133 | TType.MAP: self._serializeMap, |
||
134 | TType.SET: self._serializeSet, |
||
135 | SelfDescribingBinaryProtocol.FLOAT: self.protocol.writeFloat, |
||
136 | TType.BYTE: self.protocol.writeByte, |
||
137 | TType.I64: self.protocol.writeI64, |
||
138 | TType.DOUBLE: self.protocol.writeDouble, |
||
139 | TType.BOOL: self.protocol.writeBool, |
||
140 | TType.STRUCT: self.serializeMessage, |
||
141 | TType.VOID: lambda x: None |
||
142 | } |
||
143 | self.listDeserializationMethod = { |
||
144 | TType.BYTE: self.protocol.readI8List, |
||
145 | TType.I16: self.protocol.readI16List, |
||
146 | TType.I32: self.protocol.readI32List, |
||
147 | TType.I64: self.protocol.readI64List, |
||
148 | SelfDescribingBinaryProtocol.FLOAT: self.protocol.readF32List, |
||
149 | TType.DOUBLE: self.protocol.readF64List |
||
150 | } |
||
151 | self.listSerializationMethod = { |
||
152 | TType.BYTE: self.protocol.writeI8List, |
||
153 | TType.I16: self.protocol.writeI16List, |
||
154 | TType.I32: self.protocol.writeI32List, |
||
155 | TType.I64: self.protocol.writeI64List, |
||
156 | SelfDescribingBinaryProtocol.FLOAT: self.protocol.writeF32List, |
||
157 | TType.DOUBLE: self.protocol.writeF64List |
||
158 | } |
||
159 | |||
160 | def readMessageStart(self): |
||
161 | msg = self.protocol.readMessageBegin() |
||
162 | return msg[0] |
||
163 | |||
164 | def readMessageEnd(self): |
||
165 | self.protocol.readMessageEnd() |
||
166 | |||
167 | def deserializeMessage(self): |
||
168 | name = self.protocol.readStructBegin() |
||
169 | name = name.decode('cp437') |
||
170 | name = name.replace('_', '.') |
||
171 | if name.isdigit(): |
||
172 | obj = self._deserializeType(int(name)) |
||
173 | return obj |
||
174 | if name in adapters.classAdapterRegistry: |
||
175 | return adapters.classAdapterRegistry[name].deserialize(self) |
||
176 | elif '$' in name: |
||
177 | # it's an inner class, we're going to hope it's an enum, treat it |
||
178 | # special |
||
179 | fieldName, fieldType, fieldId = self.protocol.readFieldBegin() |
||
180 | if fieldName.decode('utf8') != '__enumValue__': |
||
181 | raise dynamicserialize.SerializationException( |
||
182 | "Expected to find enum payload. Found: " + fieldName) |
||
183 | obj = self.protocol.readString() |
||
184 | self.protocol.readFieldEnd() |
||
185 | return obj |
||
186 | else: |
||
187 | clz = dsObjTypes[name] |
||
188 | obj = clz() |
||
189 | |||
190 | while self._deserializeField(obj): |
||
191 | pass |
||
192 | |||
193 | self.protocol.readStructEnd() |
||
194 | return obj |
||
195 | |||
196 | def _deserializeType(self, b): |
||
197 | try: |
||
198 | return self.typeDeserializationMethod[b]() |
||
199 | except KeyError: |
||
200 | raise dynamicserialize.SerializationException( |
||
201 | "Unsupported type value " + str(b)) |
||
202 | |||
203 | def _deserializeField(self, obj): |
||
204 | fieldName, fieldType, fieldId = self.protocol.readFieldBegin() |
||
205 | if fieldType == TType.STOP: |
||
206 | return False |
||
207 | elif fieldType != TType.VOID: |
||
208 | result = self._deserializeType(fieldType) |
||
209 | fn_str = bytes.decode(fieldName) |
||
210 | lookingFor = "set" + fn_str[0].upper() + fn_str[1:] |
||
211 | |||
212 | try: |
||
213 | setMethod = getattr(obj, lookingFor) |
||
214 | setMethod(result) |
||
215 | except ValueError: |
||
216 | raise dynamicserialize.SerializationException( |
||
217 | "Couldn't find setter method " + lookingFor) |
||
218 | |||
219 | self.protocol.readFieldEnd() |
||
220 | return True |
||
221 | |||
222 | def _deserializeArray(self): |
||
223 | listType, size = self.protocol.readListBegin() |
||
224 | result = [] |
||
225 | if size: |
||
226 | if listType not in primitiveSupport: |
||
227 | m = self.typeDeserializationMethod[listType] |
||
228 | result = [m() for __ in range(size)] |
||
229 | else: |
||
230 | result = self.listDeserializationMethod[listType](size) |
||
231 | self.protocol.readListEnd() |
||
232 | return result |
||
233 | |||
234 | def _deserializeMap(self): |
||
235 | keyType, valueType, size = self.protocol.readMapBegin() |
||
236 | result = {} |
||
237 | for __ in range(size): |
||
238 | # can't go off the type, due to java generics limitations dynamic serialize is |
||
239 | # serializing keys and values as void |
||
240 | key = self.typeDeserializationMethod[TType.STRUCT]() |
||
241 | value = self.typeDeserializationMethod[TType.STRUCT]() |
||
242 | result[key] = value |
||
243 | self.protocol.readMapEnd() |
||
244 | return result |
||
245 | |||
246 | def _deserializeSet(self): |
||
247 | setType, setSize = self.protocol.readSetBegin() |
||
248 | result = set([]) |
||
249 | for __ in range(setSize): |
||
250 | result.add(self.typeDeserializationMethod[TType.STRUCT]()) |
||
251 | self.protocol.readSetEnd() |
||
252 | return result |
||
253 | |||
254 | def _lookupType(self, obj): |
||
255 | pyt = type(obj) |
||
256 | if pyt in pythonToThriftMap: |
||
257 | return pythonToThriftMap[pyt] |
||
258 | elif pyt.__module__[:DS_LEN - 1] == ('dynamicserialize.dstypes'): |
||
259 | if six.PY2: |
||
260 | return pythonToThriftMap[types.InstanceType] |
||
261 | return pythonToThriftMap[object] |
||
262 | raise dynamicserialize.SerializationException( |
||
263 | "Don't know how to serialize object of type: " + str(pyt)) |
||
264 | |||
265 | def serializeMessage(self, obj): |
||
266 | tt = self._lookupType(obj) |
||
267 | |||
268 | if tt == TType.STRUCT: |
||
269 | fqn = obj.__module__[DS_LEN:] |
||
270 | if fqn in adapters.classAdapterRegistry: |
||
271 | # get proper class name when writing class name to serialization stream |
||
272 | # in case we have a special inner-class case |
||
273 | m = sys.modules[adapters.classAdapterRegistry[fqn].__name__] |
||
274 | if isinstance(m.ClassAdapter, list): |
||
275 | fqn = m.ClassAdapter[0] |
||
276 | self.protocol.writeStructBegin(fqn) |
||
277 | adapters.classAdapterRegistry[fqn].serialize(self, obj) |
||
278 | return |
||
279 | else: |
||
280 | self.protocol.writeStructBegin(fqn) |
||
281 | methods = inspect.getmembers(obj, inspect.ismethod) |
||
282 | fid = 1 |
||
283 | for m in methods: |
||
284 | methodName = m[0] |
||
285 | if methodName.startswith('get'): |
||
286 | fieldname = methodName[3].lower() + methodName[4:] |
||
287 | val = m[1]() |
||
288 | ft = self._lookupType(val) |
||
289 | if ft == TType.STRUCT: |
||
290 | self._serializeField(fieldname, ft, fid, val) |
||
291 | else: |
||
292 | self._serializeField(fieldname, ft, fid, val) |
||
293 | fid += 1 |
||
294 | self.protocol.writeFieldStop() |
||
295 | |||
296 | self.protocol.writeStructEnd() |
||
297 | else: |
||
298 | # basic types |
||
299 | self.protocol.writeStructBegin(str(tt)) |
||
300 | self._serializeType(obj, tt) |
||
301 | self.protocol.writeStructEnd() |
||
302 | |||
303 | def _serializeField(self, fieldName, fieldType, fieldId, fieldValue): |
||
304 | self.protocol.writeFieldBegin(fieldName, fieldType, fieldId) |
||
305 | self._serializeType(fieldValue, fieldType) |
||
306 | self.protocol.writeFieldEnd() |
||
307 | |||
308 | def _serializeType(self, fieldValue, fieldType): |
||
309 | if fieldType in self.typeSerializationMethod: |
||
310 | return self.typeSerializationMethod[fieldType](fieldValue) |
||
311 | else: |
||
312 | raise dynamicserialize.SerializationException( |
||
313 | "Unsupported type value " + str(fieldType)) |
||
314 | |||
315 | def _serializeArray(self, obj): |
||
316 | size = len(obj) |
||
317 | if size: |
||
318 | if isinstance(obj, numpy.ndarray): |
||
319 | t = pythonToThriftMap[obj.dtype.type] |
||
320 | size = obj.size |
||
321 | else: |
||
322 | t = self._lookupType(obj[0]) |
||
323 | else: |
||
324 | t = TType.STRUCT |
||
325 | self.protocol.writeListBegin(t, size) |
||
326 | if t == TType.STRING: |
||
327 | if isinstance(obj, numpy.ndarray): |
||
328 | if len(obj.shape) == 1: |
||
329 | for x in obj: |
||
330 | s = str(x).strip() |
||
331 | self.typeSerializationMethod[t](s) |
||
332 | else: |
||
333 | for x in obj: |
||
334 | for y in x: |
||
335 | s = str(y).strip() |
||
336 | self.typeSerializationMethod[t](s) |
||
337 | else: |
||
338 | for x in obj: |
||
339 | s = str(x) |
||
340 | self.typeSerializationMethod[t](s) |
||
341 | elif t not in primitiveSupport: |
||
342 | for x in obj: |
||
343 | self.typeSerializationMethod[t](x) |
||
344 | else: |
||
345 | self.listSerializationMethod[t](obj) |
||
346 | self.protocol.writeListEnd() |
||
347 | |||
348 | def _serializeMap(self, obj): |
||
349 | size = len(obj) |
||
350 | self.protocol.writeMapBegin(TType.VOID, TType.VOID, size) |
||
351 | for k in list(obj.keys()): |
||
352 | self.typeSerializationMethod[TType.STRUCT](k) |
||
353 | self.typeSerializationMethod[TType.STRUCT](obj[k]) |
||
354 | self.protocol.writeMapEnd() |
||
355 | |||
356 | def _serializeSet(self, obj): |
||
357 | size = len(obj) |
||
358 | self.protocol.writeSetBegin(TType.VOID, size) |
||
359 | for x in obj: |
||
360 | self.typeSerializationMethod[TType.STRUCT](x) |
||
361 | self.protocol.writeSetEnd() |
||
362 | |||
363 | def writeMessageStart(self, name): |
||
364 | self.protocol.writeMessageBegin(name, TType.VOID, 0) |
||
365 | |||
366 | def writeMessageEnd(self): |
||
367 | self.protocol.writeMessageEnd() |
||
368 | |||
369 | def readBool(self): |
||
370 | return self.protocol.readBool() |
||
371 | |||
372 | def writeBool(self, b): |
||
373 | self.protocol.writeBool(b) |
||
374 | |||
375 | def readByte(self): |
||
376 | return self.protocol.readByte() |
||
377 | |||
378 | def writeByte(self, b): |
||
379 | self.protocol.writeByte(b) |
||
380 | |||
381 | def readDouble(self): |
||
382 | return self.protocol.readDouble() |
||
383 | |||
384 | def writeDouble(self, d): |
||
385 | self.protocol.writeDouble(d) |
||
386 | |||
387 | def readFloat(self): |
||
388 | return self.protocol.readFloat() |
||
389 | |||
390 | def writeFloat(self, f): |
||
391 | self.protocol.writeFloat(f) |
||
392 | |||
393 | def readI16(self): |
||
394 | return self.protocol.readI16() |
||
395 | |||
396 | def writeI16(self, i): |
||
397 | self.protocol.writeI16(i) |
||
398 | |||
399 | def readI32(self): |
||
400 | return self.protocol.readI32() |
||
401 | |||
402 | def writeI32(self, i): |
||
403 | self.protocol.writeI32(i) |
||
404 | |||
405 | def readI64(self): |
||
406 | return self.protocol.readI64() |
||
407 | |||
408 | def writeI64(self, i): |
||
409 | self.protocol.writeI64(i) |
||
410 | |||
411 | def readString(self): |
||
412 | return self.protocol.readString() |
||
413 | |||
414 | def writeString(self, s): |
||
415 | self.protocol.writeString(s) |
||
416 | |||
417 | def readBinary(self): |
||
418 | numBytes = self.protocol.readI32() |
||
419 | return self.protocol.readI8List(numBytes) |
||
420 | |||
421 | def readFloatArray(self): |
||
422 | size = self.protocol.readI32() |
||
423 | return self.protocol.readF32List(size) |
||
424 | |||
425 | def writeFloatArray(self, floats): |
||
426 | self.protocol.writeI32(len(floats)) |
||
427 | self.protocol.writeF32List(floats) |
||
428 | |||
429 | def readObject(self): |
||
430 | return self.deserializeMessage() |
||
431 | |||
432 | def writeObject(self, obj): |
||
433 | self.serializeMessage(obj) |
||
434 |