Complex classes like YaEtl often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use YaEtl, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 28 | class YaEtl extends NodalFlow |
||
| 29 | { |
||
| 30 | /** |
||
| 31 | * The stats items added to NodalFlow's ones |
||
| 32 | * |
||
| 33 | * @var array |
||
| 34 | */ |
||
| 35 | protected $stats = [ |
||
| 36 | 'start' => null, |
||
| 37 | 'end' => null, |
||
| 38 | 'duration' => null, |
||
| 39 | 'mib' => null, |
||
| 40 | 'report' => '', |
||
| 41 | 'num_extract' => 0, |
||
| 42 | 'num_extractor' => 0, |
||
| 43 | 'num_join' => 0, |
||
| 44 | 'num_joiner' => 0, |
||
| 45 | 'num_merge' => 0, |
||
| 46 | 'num_records' => 0, |
||
| 47 | 'num_transform' => 0, |
||
| 48 | 'num_transformer' => 0, |
||
| 49 | 'num_branch' => 0, |
||
| 50 | 'num_load' => 0, |
||
| 51 | 'num_loader' => 0, |
||
| 52 | 'num_flush' => 0, |
||
| 53 | 'invocations' => [], |
||
| 54 | 'nodes' => [], |
||
| 55 | ]; |
||
| 56 | |||
| 57 | /** |
||
| 58 | * The revers aggregate lookup table |
||
| 59 | * |
||
| 60 | * @var array |
||
| 61 | */ |
||
| 62 | protected $reverseAggregateTable = []; |
||
| 63 | |||
| 64 | /** |
||
| 65 | * @var bool |
||
| 66 | */ |
||
| 67 | protected $forceFlush = false; |
||
| 68 | |||
| 69 | /** |
||
| 70 | * Adds an extractor to the Flow which may be aggregated with another one |
||
| 71 | * |
||
| 72 | * @param ExtractorInterface $extractor |
||
| 73 | * @param null|ExtractorInterface $aggregateWith Use the extractore instance you want to aggregate with |
||
| 74 | * |
||
| 75 | * @return $this |
||
| 76 | */ |
||
| 77 | public function from(ExtractorInterface $extractor, ExtractorInterface $aggregateWith = null) |
||
| 90 | |||
| 91 | /** |
||
| 92 | * Override NodalFlow's add method to prohibit its direct usage |
||
| 93 | * |
||
| 94 | * @param NodeInterface $node |
||
| 95 | * |
||
| 96 | * @throws YaEtlException |
||
| 97 | */ |
||
| 98 | public function add(NodeInterface $node) |
||
| 102 | |||
| 103 | /** |
||
| 104 | * By default, branched flows will only see their |
||
| 105 | * `flush()` method called when the top most parent |
||
| 106 | * triggers its own `flush()`. |
||
| 107 | * It make sense most of the time to to do so as |
||
| 108 | * the most common use case for branches so far is |
||
| 109 | * to deal with one record at a time without generating |
||
| 110 | * records (even when left joining). In such case, |
||
| 111 | * the `flush()` method really need to be called by the flow |
||
| 112 | * exactly when the top most flow one is. |
||
| 113 | * |
||
| 114 | * Set to true if you are generating many records in a branch |
||
| 115 | * and it makes sense to flush the branch more often |
||
| 116 | * Also note that the branch will also be flushed at the end |
||
| 117 | * of its top most parent. |
||
| 118 | * |
||
| 119 | * @param bool $forceFlush |
||
| 120 | * |
||
| 121 | * @return $this |
||
| 122 | */ |
||
| 123 | public function forceFlush($forceFlush) |
||
| 129 | |||
| 130 | /** |
||
| 131 | * Adds a Joiner to a specific Extractor in the FLow |
||
| 132 | * |
||
| 133 | * @param JoinableInterface $extractor |
||
| 134 | * @param JoinableInterface $joinFrom |
||
| 135 | * @param OnClauseInterface $onClause |
||
| 136 | * |
||
| 137 | * @return $this |
||
| 138 | */ |
||
| 139 | public function join(JoinableInterface $extractor, JoinableInterface $joinFrom, OnClauseInterface $onClause) |
||
| 151 | |||
| 152 | /** |
||
| 153 | * Adds a Transformer to the Flow |
||
| 154 | * |
||
| 155 | * @param TransformerInterface $transformer |
||
| 156 | * |
||
| 157 | * @return $this |
||
| 158 | */ |
||
| 159 | public function transform(TransformerInterface $transformer) |
||
| 167 | |||
| 168 | /** |
||
| 169 | * Adds a Loader to the Flow |
||
| 170 | * |
||
| 171 | * @param LoaderInterface $loader |
||
| 172 | * |
||
| 173 | * @return $this |
||
| 174 | */ |
||
| 175 | public function to(LoaderInterface $loader) |
||
| 183 | |||
| 184 | /** |
||
| 185 | * Adds a Branch (Flow) to the Flow |
||
| 186 | * |
||
| 187 | * @staticvar type $flowHashes |
||
| 188 | * |
||
| 189 | * @param YaEtl $flow The Branch to add in this Flow |
||
| 190 | * @param bool $isAReturningVal To indicate if this Branch Flow is a true Branch or just |
||
| 191 | * a bag of Nodes to execute at this location of the Flow |
||
| 192 | * |
||
| 193 | * @throws YaEtlException |
||
| 194 | * |
||
| 195 | * @return $this |
||
| 196 | */ |
||
| 197 | public function branch(YaEtl $flow, $isAReturningVal = false) |
||
| 218 | |||
| 219 | /** |
||
| 220 | * Triggered right after the flow stops |
||
| 221 | * |
||
| 222 | * @return $this |
||
| 223 | */ |
||
| 224 | public function flowEnd() |
||
| 232 | |||
| 233 | /** |
||
| 234 | * KISS method to expose basic stats |
||
| 235 | * |
||
| 236 | * @return array |
||
|
|
|||
| 237 | */ |
||
| 238 | public function getStats() |
||
| 273 | |||
| 274 | /** |
||
| 275 | * Tells if the flow is set to force flush |
||
| 276 | * Only used when branched (to tell the parent) |
||
| 277 | * |
||
| 278 | * @return bool |
||
| 279 | */ |
||
| 280 | public function isForceFlush() |
||
| 284 | |||
| 285 | /** |
||
| 286 | * Used internally to aggregate Extracors |
||
| 287 | * |
||
| 288 | * @param ExtractorInterface $extractor |
||
| 289 | * @param ExtractorInterface $aggregateWith |
||
| 290 | * |
||
| 291 | * @throws YaEtlException |
||
| 292 | * |
||
| 293 | * @return $this |
||
| 294 | */ |
||
| 295 | protected function aggregateTo(ExtractorInterface $extractor, ExtractorInterface $aggregateWith) |
||
| 325 | |||
| 326 | /** |
||
| 327 | * Collect Nodes stats |
||
| 328 | * |
||
| 329 | * @param array $stats |
||
| 330 | * |
||
| 331 | * @return $this |
||
| 332 | */ |
||
| 333 | protected function collectNodeStats(array &$stats) |
||
| 365 | |||
| 366 | /** |
||
| 367 | * Replaces a node with another one |
||
| 368 | * |
||
| 369 | * @param type $nodeIdx |
||
| 370 | * @param NodeInterface $node |
||
| 371 | * |
||
| 372 | * @throws YaEtlException |
||
| 373 | * |
||
| 374 | * @return $this |
||
| 375 | */ |
||
| 376 | protected function replace($nodeIdx, NodeInterface $node) |
||
| 401 | |||
| 402 | /** |
||
| 403 | * Compute final stats |
||
| 404 | * |
||
| 405 | * @param array $stats |
||
| 406 | * |
||
| 407 | * @return array |
||
| 408 | */ |
||
| 409 | protected function processStats($stats) |
||
| 429 | |||
| 430 | /** |
||
| 431 | * It could lead to really tricky situation if we where to |
||
| 432 | * allow multiple instances of the same node. It's obviously |
||
| 433 | * wrong with an Extractor, but even a Transformer could |
||
| 434 | * create dark corner cases. |
||
| 435 | * |
||
| 436 | * @param NodeInterface $node |
||
| 437 | * |
||
| 438 | * @throws YaEtlException |
||
| 439 | * |
||
| 440 | * @return $this |
||
| 441 | */ |
||
| 442 | protected function enforceNodeInstanceUnicity(NodeInterface $node) |
||
| 450 | |||
| 451 | /** |
||
| 452 | * Find a Node by its hash in a nodemap, used to enfore Node instance unicity |
||
| 453 | * |
||
| 454 | * @param string $hash |
||
| 455 | * @param array $nodeMap |
||
| 456 | * |
||
| 457 | * @return bool |
||
| 458 | */ |
||
| 459 | protected function findNodeHashInMap($hash, $nodeMap) |
||
| 476 | |||
| 477 | /** |
||
| 478 | * Calls each WorkFlow's loaders and branch flush method |
||
| 479 | * |
||
| 480 | * @return $this |
||
| 481 | */ |
||
| 482 | protected function flush(FlowStatusInterface $flowStatus = null) |
||
| 497 | |||
| 498 | /** |
||
| 499 | * Actually flush nodes |
||
| 500 | * |
||
| 501 | * @param FlowStatusInterface $flowStatus |
||
| 502 | * |
||
| 503 | * @return $this |
||
| 504 | */ |
||
| 505 | protected function flushNodes(FlowStatusInterface $flowStatus) |
||
| 523 | } |
||
| 524 |
This check looks for the generic type
arrayas a return type and suggests a more specific type. This type is inferred from the actual code.