Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like SchemaReader often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use SchemaReader, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
33 | class SchemaReader |
||
34 | { |
||
35 | const XSD_NS = 'http://www.w3.org/2001/XMLSchema'; |
||
36 | |||
37 | const XML_NS = 'http://www.w3.org/XML/1998/namespace'; |
||
38 | |||
39 | private $loadedFiles = array(); |
||
40 | |||
41 | private $knownLocationSchemas = array(); |
||
42 | |||
43 | private static $globalSchemaInfo = array( |
||
44 | self::XML_NS => 'http://www.w3.org/2001/xml.xsd', |
||
45 | self::XSD_NS => 'http://www.w3.org/2001/XMLSchema.xsd', |
||
46 | ); |
||
47 | |||
48 | 54 | public function __construct() |
|
57 | |||
58 | 54 | public function addKnownSchemaLocation($remote, $local) |
|
62 | |||
63 | 45 | private function loadAttributeGroup(Schema $schema, DOMElement $node) |
|
89 | |||
90 | 45 | private function loadAttribute(Schema $schema, DOMElement $node) |
|
91 | { |
||
92 | 45 | $attribute = new Attribute($schema, $node->getAttribute('name')); |
|
93 | 45 | $attribute->setDoc($this->getDocumentation($node)); |
|
94 | 45 | $this->fillItem($attribute, $node); |
|
95 | |||
96 | 45 | if ($node->hasAttribute('nillable')) { |
|
97 | 1 | $attribute->setNil($node->getAttribute('nillable') == 'true'); |
|
98 | 1 | } |
|
99 | 45 | if ($node->hasAttribute('form')) { |
|
100 | 1 | $attribute->setQualified($node->getAttribute('form') == 'qualified'); |
|
101 | 1 | } |
|
102 | 45 | if ($node->hasAttribute('use')) { |
|
103 | 45 | $attribute->setUse($node->getAttribute('use')); |
|
104 | 45 | } |
|
105 | |||
106 | 45 | return $attribute; |
|
107 | } |
||
108 | |||
109 | 45 | View Code Duplication | private function loadAttributeDef(Schema $schema, DOMElement $node) |
119 | |||
120 | /** |
||
121 | * @param DOMElement $node |
||
122 | * |
||
123 | * @return string |
||
124 | */ |
||
125 | 45 | private function getDocumentation(DOMElement $node) |
|
126 | { |
||
127 | 45 | $doc = ''; |
|
128 | 45 | foreach ($node->childNodes as $childNode) { |
|
129 | 45 | if ($childNode->localName == 'annotation') { |
|
130 | 45 | foreach ($childNode->childNodes as $subChildNode) { |
|
131 | 45 | if ($subChildNode->localName == 'documentation') { |
|
132 | 45 | $doc .= ($subChildNode->nodeValue); |
|
133 | 45 | } |
|
134 | 45 | } |
|
135 | 45 | } |
|
136 | 45 | } |
|
137 | 45 | $doc = preg_replace('/[\t ]+/', ' ', $doc); |
|
138 | |||
139 | 45 | return trim($doc); |
|
140 | } |
||
141 | |||
142 | /** |
||
143 | * @param Schema $schema |
||
144 | * @param DOMElement $node |
||
145 | * @param Schema $parent |
||
146 | * |
||
147 | * @return array |
||
148 | */ |
||
149 | 45 | private function schemaNode(Schema $schema, DOMElement $node, Schema $parent = null) |
|
150 | { |
||
151 | 45 | $schema->setDoc($this->getDocumentation($node)); |
|
152 | |||
153 | 45 | if ($node->hasAttribute('targetNamespace')) { |
|
154 | 45 | $schema->setTargetNamespace($node->getAttribute('targetNamespace')); |
|
155 | 45 | } elseif ($parent) { |
|
156 | $schema->setTargetNamespace($parent->getTargetNamespace()); |
||
157 | } |
||
158 | 45 | $schema->setElementsQualification($node->getAttribute('elementFormDefault') == 'qualified'); |
|
159 | 45 | $schema->setAttributesQualification($node->getAttribute('attributeFormDefault') == 'qualified'); |
|
160 | 45 | $schema->setDoc($this->getDocumentation($node)); |
|
161 | 45 | $functions = array(); |
|
162 | |||
163 | 45 | foreach ($node->childNodes as $childNode) { |
|
164 | 45 | switch ($childNode->localName) { |
|
165 | 45 | case 'include': |
|
166 | 45 | case 'import': |
|
167 | 45 | $functions[] = $this->loadImport($schema, $childNode); |
|
168 | 45 | break; |
|
169 | 45 | case 'element': |
|
170 | 45 | $functions[] = $this->loadElementDef($schema, $childNode); |
|
171 | 45 | break; |
|
172 | 45 | case 'attribute': |
|
173 | 45 | $functions[] = $this->loadAttributeDef($schema, $childNode); |
|
174 | 45 | break; |
|
175 | 45 | case 'attributeGroup': |
|
176 | 45 | $functions[] = $this->loadAttributeGroup($schema, $childNode); |
|
177 | 45 | break; |
|
178 | 45 | case 'group': |
|
179 | 45 | $functions[] = $this->loadGroup($schema, $childNode); |
|
180 | 45 | break; |
|
181 | 45 | case 'complexType': |
|
182 | 45 | $functions[] = $this->loadComplexType($schema, $childNode); |
|
183 | 45 | break; |
|
184 | 45 | case 'simpleType': |
|
185 | 45 | $functions[] = $this->loadSimpleType($schema, $childNode); |
|
186 | 45 | break; |
|
187 | 45 | } |
|
188 | 45 | } |
|
189 | |||
190 | 45 | return $functions; |
|
191 | } |
||
192 | |||
193 | 45 | private function loadElement(Schema $schema, DOMElement $node) |
|
194 | { |
||
195 | 45 | $element = new Element($schema, $node->getAttribute('name')); |
|
196 | 45 | $element->setDoc($this->getDocumentation($node)); |
|
197 | |||
198 | 45 | $this->fillItem($element, $node); |
|
199 | |||
200 | 45 | if ($node->hasAttribute('maxOccurs')) { |
|
201 | 45 | $element->setMax($node->getAttribute('maxOccurs') == 'unbounded' ? -1 : (int) $node->getAttribute('maxOccurs')); |
|
202 | 45 | } |
|
203 | 45 | if ($node->hasAttribute('minOccurs')) { |
|
204 | 45 | $element->setMin((int) $node->getAttribute('minOccurs')); |
|
205 | 45 | } |
|
206 | |||
207 | 45 | $xp = new \DOMXPath($node->ownerDocument); |
|
208 | 45 | $xp->registerNamespace('xs', 'http://www.w3.org/2001/XMLSchema'); |
|
209 | |||
210 | 45 | if ($xp->query('ancestor::xs:choice', $node)->length) { |
|
211 | 45 | $element->setMin(0); |
|
212 | 45 | } |
|
213 | |||
214 | 45 | if ($node->hasAttribute('nillable')) { |
|
215 | 3 | $element->setNil($node->getAttribute('nillable') == 'true'); |
|
216 | 3 | } |
|
217 | 45 | if ($node->hasAttribute('form')) { |
|
218 | 3 | $element->setQualified($node->getAttribute('form') == 'qualified'); |
|
219 | 3 | } |
|
220 | |||
221 | 45 | return $element; |
|
222 | } |
||
223 | |||
224 | 45 | private function loadGroupRef(Group $referenced, DOMElement $node) |
|
238 | |||
239 | 45 | private function loadElementRef(ElementDef $referenced, DOMElement $node) |
|
259 | |||
260 | 45 | private function loadAttributeRef(AttributeDef $referencedAttribiute, DOMElement $node) |
|
261 | { |
||
262 | 45 | $attribute = new AttributeRef($referencedAttribiute); |
|
263 | 45 | $attribute->setDoc($this->getDocumentation($node)); |
|
264 | |||
265 | 45 | if ($node->hasAttribute('nillable')) { |
|
266 | $attribute->setNil($node->getAttribute('nillable') == 'true'); |
||
267 | } |
||
268 | 45 | if ($node->hasAttribute('form')) { |
|
269 | $attribute->setQualified($node->getAttribute('form') == 'qualified'); |
||
270 | } |
||
271 | 45 | if ($node->hasAttribute('use')) { |
|
272 | $attribute->setUse($node->getAttribute('use')); |
||
273 | } |
||
274 | |||
275 | 45 | return $attribute; |
|
276 | } |
||
277 | |||
278 | 45 | private function loadSequence(ElementContainer $elementContainer, DOMElement $node, $max = null) |
|
279 | { |
||
280 | 45 | $max = $max || $node->getAttribute('maxOccurs') == 'unbounded' || $node->getAttribute('maxOccurs') > 1 ? 2 : null; |
|
281 | |||
282 | 45 | foreach ($node->childNodes as $childNode) { |
|
283 | 45 | switch ($childNode->localName) { |
|
284 | 45 | case 'choice': |
|
285 | 45 | case 'sequence': |
|
286 | 45 | case 'all': |
|
287 | 45 | $this->loadSequence($elementContainer, $childNode, $max); |
|
288 | 45 | break; |
|
289 | 45 | case 'element': |
|
290 | 45 | if ($childNode->hasAttribute('ref')) { |
|
291 | 45 | $referencedElement = $this->findSomething('findElement', $elementContainer->getSchema(), $node, $childNode->getAttribute('ref')); |
|
292 | 45 | $element = $this->loadElementRef($referencedElement, $childNode); |
|
293 | 45 | } else { |
|
294 | 45 | $element = $this->loadElement($elementContainer->getSchema(), $childNode); |
|
295 | } |
||
296 | 45 | if ($max) { |
|
297 | 45 | $element->setMax($max); |
|
298 | 45 | } |
|
299 | 45 | $elementContainer->addElement($element); |
|
300 | 45 | break; |
|
301 | 45 | View Code Duplication | case 'group': |
302 | 45 | $referencedGroup = $this->findSomething('findGroup', $elementContainer->getSchema(), $node, $childNode->getAttribute('ref')); |
|
303 | |||
304 | 45 | $group = $this->loadGroupRef($referencedGroup, $childNode); |
|
305 | 45 | $elementContainer->addElement($group); |
|
306 | 45 | break; |
|
307 | 45 | } |
|
308 | 45 | } |
|
309 | 45 | } |
|
310 | |||
311 | 45 | private function loadGroup(Schema $schema, DOMElement $node) |
|
312 | { |
||
313 | 45 | $group = new Group($schema, $node->getAttribute('name')); |
|
314 | 45 | $group->setDoc($this->getDocumentation($node)); |
|
315 | |||
316 | 45 | View Code Duplication | if ($node->hasAttribute('maxOccurs')) { |
317 | $group->setMax($node->getAttribute('maxOccurs') == 'unbounded' ? -1 : (int) $node->getAttribute('maxOccurs')); |
||
318 | } |
||
319 | 45 | if ($node->hasAttribute('minOccurs')) { |
|
320 | $group->setMin((int) $node->getAttribute('minOccurs')); |
||
321 | } |
||
322 | |||
323 | 45 | $schema->addGroup($group); |
|
324 | |||
325 | return function () use ($group, $node) { |
||
326 | 45 | foreach ($node->childNodes as $childNode) { |
|
327 | 45 | switch ($childNode->localName) { |
|
328 | 45 | case 'sequence': |
|
329 | 45 | case 'choice': |
|
330 | 45 | case 'all': |
|
331 | 45 | $this->loadSequence($group, $childNode); |
|
332 | 45 | break; |
|
333 | 45 | } |
|
334 | 45 | } |
|
335 | 45 | }; |
|
336 | } |
||
337 | |||
338 | 45 | private function loadComplexType(Schema $schema, DOMElement $node, $callback = null) |
|
339 | { |
||
340 | 45 | $isSimple = false; |
|
341 | |||
342 | 45 | foreach ($node->childNodes as $childNode) { |
|
343 | 45 | if ($childNode->localName === 'simpleContent') { |
|
344 | 2 | $isSimple = true; |
|
345 | 2 | break; |
|
346 | } |
||
347 | 45 | } |
|
348 | |||
349 | 45 | $type = $isSimple ? new ComplexTypeSimpleContent($schema, $node->getAttribute('name')) : new ComplexType($schema, $node->getAttribute('name')); |
|
350 | |||
351 | 45 | $type->setDoc($this->getDocumentation($node)); |
|
352 | 45 | if ($node->getAttribute('name')) { |
|
353 | 45 | $schema->addType($type); |
|
354 | 45 | } |
|
355 | |||
356 | return function () use ($type, $node, $schema, $callback) { |
||
357 | 45 | $this->fillTypeNode($type, $node); |
|
358 | |||
359 | 45 | foreach ($node->childNodes as $childNode) { |
|
360 | 45 | switch ($childNode->localName) { |
|
361 | 45 | case 'sequence': |
|
362 | 45 | case 'choice': |
|
363 | 45 | case 'all': |
|
364 | 45 | $this->loadSequence($type, $childNode); |
|
365 | 45 | break; |
|
366 | 45 | case 'attribute': |
|
367 | 45 | if ($childNode->hasAttribute('ref')) { |
|
368 | 45 | $referencedAttribute = $this->findSomething('findAttribute', $schema, $node, $childNode->getAttribute('ref')); |
|
369 | 45 | $attribute = $this->loadAttributeRef($referencedAttribute, $childNode); |
|
370 | 45 | } else { |
|
371 | 45 | $attribute = $this->loadAttribute($schema, $childNode); |
|
372 | } |
||
373 | |||
374 | 45 | $type->addAttribute($attribute); |
|
375 | 45 | break; |
|
376 | 45 | View Code Duplication | case 'group': |
377 | 1 | $referencedGroup = $this->findSomething('findGroup', $schema, $node, $childNode->getAttribute('ref')); |
|
378 | 1 | $group = $this->loadGroupRef($referencedGroup, $childNode); |
|
379 | 1 | $type->addElement($group); |
|
380 | 1 | break; |
|
381 | 45 | View Code Duplication | case 'attributeGroup': |
382 | 2 | $attribute = $this->findSomething('findAttributeGroup', $schema, $node, $childNode->getAttribute('ref')); |
|
383 | 2 | $type->addAttribute($attribute); |
|
384 | 2 | break; |
|
385 | 45 | } |
|
386 | 45 | } |
|
387 | |||
388 | 45 | if ($callback) { |
|
389 | 45 | call_user_func($callback, $type); |
|
390 | 45 | } |
|
391 | 45 | }; |
|
392 | } |
||
393 | |||
394 | 45 | private function loadSimpleType(Schema $schema, DOMElement $node, $callback = null) |
|
395 | { |
||
396 | 45 | $type = new SimpleType($schema, $node->getAttribute('name')); |
|
397 | 45 | $type->setDoc($this->getDocumentation($node)); |
|
398 | 45 | if ($node->getAttribute('name')) { |
|
399 | 45 | $schema->addType($type); |
|
400 | 45 | } |
|
401 | |||
402 | return function () use ($type, $node, $callback) { |
||
403 | 45 | $this->fillTypeNode($type, $node); |
|
404 | |||
405 | 45 | foreach ($node->childNodes as $childNode) { |
|
406 | 45 | switch ($childNode->localName) { |
|
407 | 45 | case 'union': |
|
408 | 45 | $this->loadUnion($type, $childNode); |
|
409 | 45 | break; |
|
410 | 45 | case 'list': |
|
411 | 45 | $this->loadList($type, $childNode); |
|
412 | 45 | break; |
|
413 | 45 | } |
|
414 | 45 | } |
|
415 | |||
416 | 45 | if ($callback) { |
|
417 | 45 | call_user_func($callback, $type); |
|
418 | 45 | } |
|
419 | 45 | }; |
|
420 | } |
||
421 | |||
422 | 45 | private function loadList(SimpleType $type, DOMElement $node) |
|
423 | { |
||
424 | 45 | View Code Duplication | if ($node->hasAttribute('itemType')) { |
425 | 45 | $type->setList($this->findSomething('findType', $type->getSchema(), $node, $node->getAttribute('itemType'))); |
|
426 | 45 | } else { |
|
427 | $addCallback = function ($list) use ($type) { |
||
428 | 45 | $type->setList($list); |
|
429 | 45 | }; |
|
430 | |||
431 | 45 | foreach ($node->childNodes as $childNode) { |
|
432 | 45 | switch ($childNode->localName) { |
|
433 | 45 | case 'simpleType': |
|
434 | 45 | call_user_func($this->loadSimpleType($type->getSchema(), $childNode, $addCallback)); |
|
435 | 45 | break; |
|
436 | 45 | } |
|
437 | 45 | } |
|
438 | } |
||
439 | 45 | } |
|
440 | |||
441 | 45 | private function loadUnion(SimpleType $type, DOMElement $node) |
|
442 | { |
||
443 | 45 | if ($node->hasAttribute('memberTypes')) { |
|
444 | 45 | $types = preg_split('/\s+/', $node->getAttribute('memberTypes')); |
|
445 | 45 | foreach ($types as $typeName) { |
|
446 | 45 | $type->addUnion($this->findSomething('findType', $type->getSchema(), $node, $typeName)); |
|
447 | 45 | } |
|
448 | 45 | } |
|
449 | $addCallback = function ($unType) use ($type) { |
||
450 | 45 | $type->addUnion($unType); |
|
451 | 45 | }; |
|
452 | |||
453 | 45 | foreach ($node->childNodes as $childNode) { |
|
454 | 45 | switch ($childNode->localName) { |
|
455 | 45 | case 'simpleType': |
|
456 | 45 | call_user_func($this->loadSimpleType($type->getSchema(), $childNode, $addCallback)); |
|
457 | 45 | break; |
|
458 | 45 | } |
|
459 | 45 | } |
|
460 | 45 | } |
|
461 | |||
462 | 45 | private function fillTypeNode(Type $type, DOMElement $node, $checkAbstract = true) |
|
463 | { |
||
464 | 45 | if ($checkAbstract) { |
|
465 | 45 | $type->setAbstract($node->getAttribute('abstract') === 'true' || $node->getAttribute('abstract') === '1'); |
|
466 | 45 | } |
|
467 | |||
468 | 45 | foreach ($node->childNodes as $childNode) { |
|
469 | 45 | switch ($childNode->localName) { |
|
470 | 45 | case 'restriction': |
|
471 | 45 | $this->loadRestriction($type, $childNode); |
|
472 | 45 | break; |
|
473 | 45 | case 'extension': |
|
474 | 45 | $this->loadExtension($type, $childNode); |
|
475 | 45 | break; |
|
476 | 45 | case 'simpleContent': |
|
477 | 45 | case 'complexContent': |
|
478 | 45 | $this->fillTypeNode($type, $childNode, false); |
|
479 | 45 | break; |
|
480 | 45 | } |
|
481 | 45 | } |
|
482 | 45 | } |
|
483 | |||
484 | 45 | private function loadExtension(BaseComplexType $type, DOMElement $node) |
|
516 | |||
517 | 45 | private function loadRestriction(Type $type, DOMElement $node) |
|
518 | { |
||
519 | 45 | $restriction = new Restriction(); |
|
520 | 45 | $type->setRestriction($restriction); |
|
521 | 45 | View Code Duplication | if ($node->hasAttribute('base')) { |
522 | 45 | $restrictedType = $this->findSomething('findType', $type->getSchema(), $node, $node->getAttribute('base')); |
|
523 | 45 | $restriction->setBase($restrictedType); |
|
524 | 45 | } else { |
|
525 | $addCallback = function ($restType) use ($restriction) { |
||
526 | 45 | $restriction->setBase($restType); |
|
527 | 45 | }; |
|
528 | |||
561 | |||
562 | 45 | private static function splitParts(DOMElement $node, $typeName) |
|
579 | |||
580 | /** |
||
581 | * @param string $finder |
||
582 | * @param Schema $schema |
||
583 | * @param DOMElement $node |
||
584 | * @param string $typeName |
||
585 | * |
||
586 | * @throws TypeException |
||
587 | * |
||
588 | * @return ElementItem|Group|AttributeItem|AttribiuteGroup|Type |
||
589 | */ |
||
590 | 45 | private function findSomething($finder, Schema $schema, DOMElement $node, $typeName) |
|
602 | |||
603 | 45 | View Code Duplication | private function loadElementDef(Schema $schema, DOMElement $node) |
612 | |||
613 | 45 | private function fillItem(Item $element, DOMElement $node) |
|
647 | |||
648 | 45 | private function loadImport(Schema $schema, DOMElement $node) |
|
694 | |||
695 | private $globalSchema; |
||
696 | |||
697 | /** |
||
698 | * @return Schema |
||
699 | */ |
||
700 | 45 | public function getGlobalSchema() |
|
727 | |||
728 | /** |
||
729 | * @param DOMNode $node |
||
730 | * @param string $file |
||
731 | * |
||
732 | * @return Schema |
||
733 | */ |
||
734 | 45 | public function readNode(DOMNode $node, $file = 'schema.xsd') |
|
748 | |||
749 | /** |
||
750 | * It is possible that a single file contains multiple <xsd:schema/> nodes, for instance in a WSDL file. |
||
751 | * |
||
752 | * Each of these <xsd:schema/> nodes typically target a specific namespace. Append the target namespace to the |
||
753 | * file to distinguish between multiple schemas in a single file. |
||
754 | * |
||
755 | * @param string $file |
||
756 | * @param string $targetNamespace |
||
757 | * |
||
758 | * @return string |
||
759 | */ |
||
760 | 45 | private function getNamespaceSpecificFileIndex($file, $targetNamespace) |
|
764 | |||
765 | /** |
||
766 | * @param string $content |
||
767 | * @param string $file |
||
768 | * |
||
769 | * @return Schema |
||
770 | * |
||
771 | * @throws IOException |
||
772 | */ |
||
773 | 44 | public function readString($content, $file = 'schema.xsd') |
|
783 | |||
784 | /** |
||
785 | * @param string $file |
||
786 | * |
||
787 | * @return Schema |
||
788 | */ |
||
789 | 1 | public function readFile($file) |
|
795 | |||
796 | /** |
||
797 | * @param string $file |
||
798 | * |
||
799 | * @return DOMDocument |
||
800 | * |
||
801 | * @throws IOException |
||
802 | */ |
||
803 | 45 | private function getDOM($file) |
|
812 | } |
||
813 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.