Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like DocumentParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DocumentParser, and based on these observations, apply Extract Interface, too.
| 1 | <?php | ||
| 26 | class DocumentParser | ||
| 27 | { | ||
| 28 | const PROPERTY_ANNOTATION = 'ONGR\ElasticsearchBundle\Annotation\Property'; | ||
| 29 | const EMBEDDED_ANNOTATION = 'ONGR\ElasticsearchBundle\Annotation\Embedded'; | ||
| 30 | const DOCUMENT_ANNOTATION = 'ONGR\ElasticsearchBundle\Annotation\Document'; | ||
| 31 | const OBJECT_ANNOTATION = 'ONGR\ElasticsearchBundle\Annotation\Object'; | ||
| 32 | const NESTED_ANNOTATION = 'ONGR\ElasticsearchBundle\Annotation\Nested'; | ||
| 33 | |||
| 34 | // Meta fields | ||
| 35 | const ID_ANNOTATION = 'ONGR\ElasticsearchBundle\Annotation\Id'; | ||
| 36 | const PARENT_ANNOTATION = 'ONGR\ElasticsearchBundle\Annotation\ParentDocument'; | ||
| 37 | const ROUTING_ANNOTATION = 'ONGR\ElasticsearchBundle\Annotation\Routing'; | ||
| 38 | |||
| 39 | /** | ||
| 40 | * @var Reader Used to read document annotations. | ||
| 41 | */ | ||
| 42 | private $reader; | ||
| 43 | |||
| 44 | /** | ||
| 45 | * @var DocumentFinder Used to find documents. | ||
| 46 | */ | ||
| 47 | private $finder; | ||
| 48 | |||
| 49 | /** | ||
| 50 | * @var array Contains gathered objects which later adds to documents. | ||
| 51 | */ | ||
| 52 | private $objects = []; | ||
| 53 | |||
| 54 | /** | ||
| 55 | * @var array Document properties aliases. | ||
| 56 | */ | ||
| 57 | private $aliases = []; | ||
| 58 | |||
| 59 | /** | ||
| 60 | * @var array Local cache for document properties. | ||
| 61 | */ | ||
| 62 | private $properties = []; | ||
| 63 | |||
| 64 | /** | ||
| 65 | * Analyzers used in documents. | ||
| 66 | * | ||
| 67 | * @var string[] | ||
| 68 | */ | ||
| 69 | private $analyzers = []; | ||
|  | |||
| 70 | |||
| 71 | /** | ||
| 72 | * @param Reader $reader Used for reading annotations. | ||
| 73 | * @param DocumentFinder $finder Used for resolving namespaces. | ||
| 74 | */ | ||
| 75 | public function __construct(Reader $reader, DocumentFinder $finder) | ||
| 76 |     { | ||
| 77 | $this->reader = $reader; | ||
| 78 | $this->finder = $finder; | ||
| 79 | $this->registerAnnotations(); | ||
| 80 | } | ||
| 81 | |||
| 82 | /** | ||
| 83 | * Parses documents by used annotations and returns mapping for elasticsearch with some extra metadata. | ||
| 84 | * | ||
| 85 | * @param \ReflectionClass $class | ||
| 86 | * | ||
| 87 | * @return array | ||
| 88 | * @throws MissingDocumentAnnotationException | ||
| 89 | */ | ||
| 90 | public function parse(\ReflectionClass $class) | ||
| 91 |     { | ||
| 92 | /** @var Document $document */ | ||
| 93 | $document = $this->reader->getClassAnnotation($class, self::DOCUMENT_ANNOTATION); | ||
| 94 | |||
| 95 |         if ($document === null) { | ||
| 96 | throw new MissingDocumentAnnotationException( | ||
| 97 | sprintf( | ||
| 98 | '"%s" class cannot be parsed as document because @Document annotation is missing.', | ||
| 99 | $class->getName() | ||
| 100 | ) | ||
| 101 | ); | ||
| 102 | } | ||
| 103 | |||
| 104 | $fields = []; | ||
| 105 | |||
| 106 | return [ | ||
| 107 | 'type' => $document->type ?: Caser::snake($class->getShortName()), | ||
| 108 | 'properties' => $this->getProperties($class), | ||
| 109 | 'fields' => array_filter( | ||
| 110 | array_merge( | ||
| 111 | $document->dump(), | ||
| 112 | $fields | ||
| 113 | ) | ||
| 114 | ), | ||
| 115 | 'aliases' => $this->getAliases($class, $fields), | ||
| 116 | 'analyzers' => $this->getAnalyzers($class), | ||
| 117 | 'objects' => $this->getObjects(), | ||
| 118 | 'namespace' => $class->getName(), | ||
| 119 | 'class' => $class->getShortName(), | ||
| 120 | ]; | ||
| 121 | } | ||
| 122 | |||
| 123 | /** | ||
| 124 | * Returns document annotation data from reader. | ||
| 125 | * | ||
| 126 | * @param \ReflectionClass $document | ||
| 127 | * | ||
| 128 | * @return Document|null | ||
| 129 | */ | ||
| 130 | private function getDocumentAnnotationData($document) | ||
| 131 |     { | ||
| 132 | return $this->reader->getClassAnnotation($document, self::DOCUMENT_ANNOTATION); | ||
| 133 | } | ||
| 134 | |||
| 135 | /** | ||
| 136 | * Returns property annotation data from reader. | ||
| 137 | * | ||
| 138 | * @param \ReflectionProperty $property | ||
| 139 | * | ||
| 140 | * @return Property|null | ||
| 141 | */ | ||
| 142 | View Code Duplication | private function getPropertyAnnotationData(\ReflectionProperty $property) | |
| 143 |     { | ||
| 144 | $result = $this->reader->getPropertyAnnotation($property, self::PROPERTY_ANNOTATION); | ||
| 145 | |||
| 146 |         if ($result !== null && $result->name === null) { | ||
| 147 | $result->name = Caser::snake($property->getName()); | ||
| 148 | } | ||
| 149 | |||
| 150 | return $result; | ||
| 151 | } | ||
| 152 | |||
| 153 | /** | ||
| 154 | * Returns Embedded annotation data from reader. | ||
| 155 | * | ||
| 156 | * @param \ReflectionProperty $property | ||
| 157 | * | ||
| 158 | * @return Embedded|null | ||
| 159 | */ | ||
| 160 | View Code Duplication | private function getEmbeddedAnnotationData(\ReflectionProperty $property) | |
| 161 |     { | ||
| 162 | $result = $this->reader->getPropertyAnnotation($property, self::EMBEDDED_ANNOTATION); | ||
| 163 | |||
| 164 |         if ($result !== null && $result->name === null) { | ||
| 165 | $result->name = Caser::snake($property->getName()); | ||
| 166 | } | ||
| 167 | |||
| 168 | return $result; | ||
| 169 | } | ||
| 170 | |||
| 171 | /** | ||
| 172 | * Returns meta field annotation data from reader. | ||
| 173 | * | ||
| 174 | * @param \ReflectionProperty $property | ||
| 175 | * | ||
| 176 | * @return array | ||
| 177 | */ | ||
| 178 | private function getMetaFieldAnnotationData($property) | ||
| 179 |     { | ||
| 180 | /** @var MetaField $annotation */ | ||
| 181 | $annotation = $this->reader->getPropertyAnnotation($property, self::ID_ANNOTATION); | ||
| 182 | $annotation = $annotation ?: $this->reader->getPropertyAnnotation($property, self::PARENT_ANNOTATION); | ||
| 183 | $annotation = $annotation ?: $this->reader->getPropertyAnnotation($property, self::ROUTING_ANNOTATION); | ||
| 184 | |||
| 185 |         if ($annotation === null) { | ||
| 186 | return null; | ||
| 187 | } | ||
| 188 | |||
| 189 | $data = [ | ||
| 190 | 'name' => $annotation->getName(), | ||
| 191 | 'settings' => $annotation->getSettings(), | ||
| 192 | ]; | ||
| 193 | |||
| 194 |         if ($annotation instanceof ParentDocument) { | ||
| 195 | $data['settings']['type'] = $this->getDocumentType($annotation->class); | ||
| 196 | } | ||
| 197 | |||
| 198 | return $data; | ||
| 199 | } | ||
| 200 | |||
| 201 | /** | ||
| 202 | * Returns objects used in document. | ||
| 203 | * | ||
| 204 | * @return array | ||
| 205 | */ | ||
| 206 | private function getObjects() | ||
| 207 |     { | ||
| 208 | return array_keys($this->objects); | ||
| 209 | } | ||
| 210 | |||
| 211 | /** | ||
| 212 | * Finds aliases for every property used in document including parent classes. | ||
| 213 | * | ||
| 214 | * @param \ReflectionClass $reflectionClass | ||
| 215 | * @param array $metaFields | ||
| 216 | * | ||
| 217 | * @return array | ||
| 218 | */ | ||
| 219 | private function getAliases(\ReflectionClass $reflectionClass, array &$metaFields = null) | ||
| 220 |     { | ||
| 221 | $reflectionName = $reflectionClass->getName(); | ||
| 222 | |||
| 223 | // We skip cache in case $metaFields is given. This should not affect performance | ||
| 224 | // because for each document this method is called only once. For objects it might | ||
| 225 | // be called few times. | ||
| 226 |         if ($metaFields === null && array_key_exists($reflectionName, $this->aliases)) { | ||
| 227 | return $this->aliases[$reflectionName]; | ||
| 228 | } | ||
| 229 | |||
| 230 | $alias = []; | ||
| 231 | |||
| 232 | /** @var \ReflectionProperty[] $properties */ | ||
| 233 | $properties = $this->getDocumentPropertiesReflection($reflectionClass); | ||
| 234 | |||
| 235 |         foreach ($properties as $name => $property) { | ||
| 236 | $type = $this->getPropertyAnnotationData($property); | ||
| 237 | $type = $type !== null ? $type : $this->getEmbeddedAnnotationData($property); | ||
| 238 | if ($type === null && $metaFields !== null | ||
| 239 |                 && ($metaData = $this->getMetaFieldAnnotationData($property)) !== null) { | ||
| 240 | $metaFields[$metaData['name']] = $metaData['settings']; | ||
| 241 | $type = new \stdClass(); | ||
| 242 | $type->name = $metaData['name']; | ||
| 243 | } | ||
| 244 |             if ($type !== null) { | ||
| 245 | $alias[$type->name] = [ | ||
| 246 | 'propertyName' => $name, | ||
| 247 | ]; | ||
| 248 | |||
| 249 |                 if ($type instanceof Property) { | ||
| 250 | $alias[$type->name]['type'] = $type->type; | ||
| 251 | } | ||
| 252 | |||
| 253 |                 switch (true) { | ||
| 254 | case $property->isPublic(): | ||
| 255 | $propertyType = 'public'; | ||
| 256 | break; | ||
| 257 | case $property->isProtected(): | ||
| 258 | case $property->isPrivate(): | ||
| 259 | $propertyType = 'private'; | ||
| 260 | $alias[$type->name]['methods'] = $this->getMutatorMethods( | ||
| 261 | $reflectionClass, | ||
| 262 | $name, | ||
| 263 | $type instanceof Property ? $type->type : null | ||
| 264 | ); | ||
| 265 | break; | ||
| 266 | default: | ||
| 267 | $message = sprintf( | ||
| 268 | 'Wrong property %s type of %s class types cannot '. | ||
| 269 | 'be static or abstract.', | ||
| 270 | $name, | ||
| 271 | $reflectionName | ||
| 272 | ); | ||
| 273 | throw new \LogicException($message); | ||
| 274 | } | ||
| 275 | $alias[$type->name]['propertyType'] = $propertyType; | ||
| 276 | |||
| 277 |                 if ($type instanceof Embedded) { | ||
| 278 | $child = new \ReflectionClass($this->finder->getNamespace($type->class)); | ||
| 279 | $alias[$type->name] = array_merge( | ||
| 280 | $alias[$type->name], | ||
| 281 | [ | ||
| 282 | 'type' => $this->getObjectMapping($type->class)['type'], | ||
| 283 | 'multiple' => $type->multiple, | ||
| 284 | 'aliases' => $this->getAliases($child), | ||
| 285 | 'namespace' => $child->getName(), | ||
| 286 | ] | ||
| 287 | ); | ||
| 288 | } | ||
| 289 | } | ||
| 290 | } | ||
| 291 | |||
| 292 | $this->aliases[$reflectionName] = $alias; | ||
| 293 | |||
| 294 | return $this->aliases[$reflectionName]; | ||
| 295 | } | ||
| 296 | |||
| 297 | /** | ||
| 298 | * Checks if class have setter and getter, and returns them in array. | ||
| 299 | * | ||
| 300 | * @param \ReflectionClass $reflectionClass | ||
| 301 | * @param string $property | ||
| 302 | * | ||
| 303 | * @return array | ||
| 304 | */ | ||
| 305 | private function getMutatorMethods(\ReflectionClass $reflectionClass, $property, $propertyType) | ||
| 306 |     { | ||
| 307 | $camelCaseName = ucfirst(Caser::camel($property)); | ||
| 308 | $setterName = 'set'.$camelCaseName; | ||
| 309 |         if (!$reflectionClass->hasMethod($setterName)) { | ||
| 310 | $message = sprintf( | ||
| 311 | 'Missing %s() method in %s class. Add it, or change property to public.', | ||
| 312 | $setterName, | ||
| 313 | $reflectionClass->getName() | ||
| 314 | ); | ||
| 315 | throw new \LogicException($message); | ||
| 316 | } | ||
| 317 | |||
| 318 | View Code Duplication |         if ($reflectionClass->hasMethod('get'.$camelCaseName)) { | |
| 319 | return [ | ||
| 320 | 'getter' => 'get' . $camelCaseName, | ||
| 321 | 'setter' => $setterName | ||
| 322 | ]; | ||
| 323 | } | ||
| 324 | |||
| 325 |         if ($propertyType === 'boolean') { | ||
| 326 | View Code Duplication |             if ($reflectionClass->hasMethod('is' . $camelCaseName)) { | |
| 327 | return [ | ||
| 328 | 'getter' => 'is' . $camelCaseName, | ||
| 329 | 'setter' => $setterName | ||
| 330 | ]; | ||
| 331 | } | ||
| 332 | |||
| 333 | $message = sprintf( | ||
| 334 | 'Missing %s() or %s() method in %s class. Add it, or change property to public.', | ||
| 335 | 'get'.$camelCaseName, | ||
| 336 | 'is'.$camelCaseName, | ||
| 337 | $reflectionClass->getName() | ||
| 338 | ); | ||
| 339 | throw new \LogicException($message); | ||
| 340 | } | ||
| 341 | |||
| 342 | $message = sprintf( | ||
| 343 | 'Missing %s() method in %s class. Add it, or change property to public.', | ||
| 344 | 'get'.$camelCaseName, | ||
| 345 | $reflectionClass->getName() | ||
| 346 | ); | ||
| 347 | throw new \LogicException($message); | ||
| 348 | } | ||
| 349 | |||
| 350 | /** | ||
| 351 | * Registers annotations to registry so that it could be used by reader. | ||
| 352 | */ | ||
| 353 | private function registerAnnotations() | ||
| 354 |     { | ||
| 355 | $annotations = [ | ||
| 356 | 'Document', | ||
| 357 | 'Property', | ||
| 358 | 'Embedded', | ||
| 359 | 'Object', | ||
| 360 | 'Nested', | ||
| 361 | 'Id', | ||
| 362 | 'ParentDocument', | ||
| 363 | 'Routing', | ||
| 364 | ]; | ||
| 365 | |||
| 366 |         foreach ($annotations as $annotation) { | ||
| 367 |             AnnotationRegistry::registerFile(__DIR__ . "/../Annotation/{$annotation}.php"); | ||
| 368 | } | ||
| 369 | } | ||
| 370 | |||
| 371 | /** | ||
| 372 | * Returns document type. | ||
| 373 | * | ||
| 374 | * @param string $document Format must be like AcmeBundle:Document. | ||
| 375 | * | ||
| 376 | * @return string | ||
| 377 | */ | ||
| 378 | private function getDocumentType($document) | ||
| 386 | |||
| 387 | /** | ||
| 388 | * Returns all defined properties including private from parents. | ||
| 389 | * | ||
| 390 | * @param \ReflectionClass $reflectionClass | ||
| 391 | * | ||
| 392 | * @return array | ||
| 393 | */ | ||
| 394 | private function getDocumentPropertiesReflection(\ReflectionClass $reflectionClass) | ||
| 420 | |||
| 421 | /** | ||
| 422 | * Parses analyzers list from document mapping. | ||
| 423 | * | ||
| 424 | * @param \ReflectionClass $reflectionClass | ||
| 425 | * @return array | ||
| 426 | */ | ||
| 427 | private function getAnalyzers(\ReflectionClass $reflectionClass) | ||
| 428 |     { | ||
| 429 | $analyzers = []; | ||
| 430 |         foreach ($this->getDocumentPropertiesReflection($reflectionClass) as $name => $property) { | ||
| 463 | |||
| 464 | /** | ||
| 465 | * Returns properties of reflection class. | ||
| 466 | * | ||
| 467 | * @param \ReflectionClass $reflectionClass Class to read properties from. | ||
| 468 | * @param array $properties Properties to skip. | ||
| 469 | * @param bool $flag If false exludes properties, true only includes properties. | ||
| 470 | * | ||
| 471 | * @return array | ||
| 472 | */ | ||
| 473 | private function getProperties(\ReflectionClass $reflectionClass, $properties = [], $flag = false) | ||
| 507 | |||
| 508 | /** | ||
| 509 | * Returns object mapping. | ||
| 510 | * | ||
| 511 | * Loads from cache if it's already loaded. | ||
| 512 | * | ||
| 513 | * @param string $className | ||
| 514 | * | ||
| 515 | * @return array | ||
| 516 | */ | ||
| 517 | private function getObjectMapping($className) | ||
| 550 | } | ||
| 551 | 
This check marks private properties in classes that are never used. Those properties can be removed.