Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like FeedExporter often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use FeedExporter, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
17 | class FeedExporter |
||
18 | { |
||
19 | /** |
||
20 | * @var EventDispatcherInterface |
||
21 | */ |
||
22 | protected $dispatcher; |
||
23 | |||
24 | /** |
||
25 | * @var Filesystem |
||
26 | */ |
||
27 | protected $filesystem; |
||
28 | |||
29 | /** |
||
30 | * @var string |
||
31 | */ |
||
32 | protected $cacheDir; |
||
33 | |||
34 | /** |
||
35 | * @var string |
||
36 | */ |
||
37 | protected $exportDir; |
||
38 | |||
39 | /** |
||
40 | * @var FeedWriterFactory |
||
41 | */ |
||
42 | protected $writerFactory; |
||
43 | |||
44 | /** |
||
45 | * @var FeedTypeInterface[] |
||
46 | */ |
||
47 | protected $types = []; |
||
48 | |||
49 | /** |
||
50 | * @var array |
||
51 | */ |
||
52 | protected $templateHashes = []; |
||
53 | |||
54 | /** |
||
55 | * @var FeedWriter[] |
||
56 | */ |
||
57 | protected $writers = []; |
||
58 | |||
59 | /** |
||
60 | * @param string $cacheDir |
||
61 | * @param string $exportDir |
||
62 | * @param FeedWriterFactory $writerFactory |
||
63 | * @param Filesystem $filesystem |
||
64 | * @param EventDispatcherInterface $dispatcher |
||
65 | */ |
||
66 | 4 | public function __construct($cacheDir, $exportDir, FeedWriterFactory $writerFactory, Filesystem $filesystem, EventDispatcherInterface $dispatcher = null) |
|
74 | |||
75 | /** |
||
76 | * @param object $item |
||
77 | * @param FeedTypeInterface[] $types |
||
78 | * @param bool $force |
||
79 | * |
||
80 | * @return bool |
||
81 | */ |
||
82 | 2 | public function cacheItem($item, array $types = [], $force = false) |
|
83 | { |
||
84 | 2 | if (false === $this->supports($item)) { |
|
85 | return false; |
||
86 | } |
||
87 | |||
88 | 2 | if (empty($types)) { |
|
89 | 2 | $types = $this->getTypesForItem($item); |
|
90 | } |
||
91 | |||
92 | 2 | foreach ($types as $type) { |
|
93 | 2 | $template = $type->getTemplate(); |
|
94 | 2 | $cacheFile = $this->getItemCacheFilename($item, $type); |
|
95 | |||
96 | 2 | if (!file_exists($cacheFile) || $force) { |
|
97 | 2 | $xml = $this->getWriter($type)->renderItem($item, $template); |
|
98 | |||
99 | 2 | $this->filesystem->dumpFile($cacheFile, $xml, null); |
|
|
|||
100 | 2 | $this->filesystem->chmod($cacheFile, 0666, umask()); |
|
101 | } |
||
102 | } |
||
103 | |||
104 | 2 | return true; |
|
105 | } |
||
106 | |||
107 | /** |
||
108 | * Clears cached exports for an item. |
||
109 | * |
||
110 | * @param object $item |
||
111 | * @param FeedTypeInterface[] $types |
||
112 | */ |
||
113 | public function clearCache($item, array $types = []) |
||
114 | { |
||
115 | if (false === $this->supports($item)) { |
||
116 | return; |
||
117 | } |
||
118 | |||
119 | if (empty($types)) { |
||
120 | $types = $this->getTypesForItem($item); |
||
121 | } |
||
122 | |||
123 | foreach ($types as $type) { |
||
124 | $cacheFile = $this->getItemCacheFilename($item, $type); |
||
125 | $this->filesystem->remove($cacheFile); |
||
126 | } |
||
127 | } |
||
128 | |||
129 | /** |
||
130 | * @param FeedTypeInterface $type |
||
131 | * @param bool $force |
||
132 | * |
||
133 | * @return bool |
||
134 | */ |
||
135 | public function exportFeed(FeedTypeInterface $type, $force = false) |
||
136 | { |
||
137 | $file = $this->getFeedFilename($type, false); |
||
138 | $gzFile = $this->getFeedFilename($type, true); |
||
139 | $tmpFile = $this->getFeedCacheFilename($type); |
||
140 | $gzTmpFile = $tmpFile . '.gz'; |
||
141 | |||
142 | // check if we are up-to-date |
||
143 | if (false === $force && file_exists($file) && $this->isFresh($file, $type->getTtl())) { |
||
144 | return false; |
||
145 | } |
||
146 | |||
147 | $qb = $type->getQueryBuilder('x'); |
||
148 | $count = $this->getNumberOfResults($qb); |
||
149 | |||
150 | $this->dispatch(ExportEvents::PRE_EXPORT_FEED, new ExportFeedEvent($file, $type, $count)); |
||
151 | |||
152 | $this->filesystem->mkdir(dirname($file)); |
||
153 | |||
154 | $writer = $this->getWriter($type); |
||
155 | $writer->start($tmpFile, $this->getNamespaceAttributes($type)); |
||
156 | |||
157 | $num = 0; |
||
158 | foreach ($qb->getQuery()->iterate() as list($item)) { |
||
159 | $this->dispatch( |
||
160 | ExportEvents::PRE_EXPORT_ITEM, |
||
161 | new ExportProgressEvent($num, $count) |
||
162 | ); |
||
163 | |||
164 | $this->cacheItem($item, [$type]); |
||
165 | |||
166 | $cacheFile = $this->getItemCacheFilename($item, $type); |
||
167 | $writer->writeContent(file_get_contents($cacheFile)); |
||
168 | |||
169 | $this->dispatch(ExportEvents::POST_EXPORT_ITEM, new ExportProgressEvent($num, $count)); |
||
170 | |||
171 | if ($num++ % 2000 === 0) { |
||
172 | $this->pingDatabase($qb->getEntityManager()); |
||
173 | } |
||
174 | |||
175 | $qb->getEntityManager()->detach($item); |
||
176 | } |
||
177 | |||
178 | $writer->finish(); |
||
179 | |||
180 | $this->gzip($tmpFile, $gzTmpFile); |
||
181 | rename($tmpFile, $file); |
||
182 | rename($gzTmpFile, $gzFile); |
||
183 | |||
184 | $this->dispatch(ExportEvents::POST_EXPORT_FEED, new ExportFeedEvent($file, $type, $count)); |
||
185 | |||
186 | return true; |
||
187 | } |
||
188 | |||
189 | /** |
||
190 | * @param FeedTypeInterface $type |
||
191 | * @param string $alias |
||
192 | */ |
||
193 | 6 | public function registerType(FeedTypeInterface $type, $alias) |
|
197 | |||
198 | /** |
||
199 | * @param object $item |
||
200 | * |
||
201 | * @return bool |
||
202 | */ |
||
203 | 2 | public function supports($item) |
|
204 | { |
||
205 | 2 | foreach ($this->types as $type) { |
|
206 | 2 | if ($type->supports($item)) { |
|
207 | 2 | return true; |
|
208 | } |
||
209 | } |
||
210 | |||
211 | return false; |
||
212 | } |
||
213 | |||
214 | /** |
||
215 | * @param string $name |
||
216 | * |
||
217 | * @throws \OutOfBoundsException when the type is not registered |
||
218 | * @return FeedTypeInterface |
||
219 | * |
||
220 | */ |
||
221 | 2 | public function getType($name) |
|
222 | { |
||
223 | 2 | if (array_key_exists($name, $this->types)) { |
|
224 | 2 | return $this->types[$name]; |
|
225 | } |
||
226 | |||
227 | throw new \OutOfBoundsException( |
||
228 | sprintf( |
||
229 | 'Export type "%s" is not supported. You can add it by creating a service which implements %s, ' . |
||
230 | 'and tag it with tree_house.io.export.feed_type', |
||
231 | $name, |
||
232 | FeedTypeInterface::class |
||
233 | ) |
||
234 | ); |
||
235 | } |
||
236 | |||
237 | /** |
||
238 | * @param string $name |
||
239 | * |
||
240 | * @return bool |
||
241 | */ |
||
242 | 2 | public function hasType($name) |
|
246 | |||
247 | /** |
||
248 | * @return FeedTypeInterface[] |
||
249 | */ |
||
250 | 2 | public function getTypes() |
|
254 | |||
255 | /** |
||
256 | * @return EventDispatcherInterface |
||
257 | */ |
||
258 | public function getDispatcher() |
||
262 | |||
263 | /** |
||
264 | * Returns the location of the generated feed file. This is the location where the definitive |
||
265 | * exported feed will be cached and served from. |
||
266 | * |
||
267 | * @param FeedTypeInterface $type |
||
268 | * @param bool $gzip |
||
269 | * |
||
270 | * @return string |
||
271 | */ |
||
272 | View Code Duplication | public function getFeedFilename(FeedTypeInterface $type, $gzip = false) |
|
273 | { |
||
274 | $path = [ |
||
275 | $this->exportDir, |
||
276 | sprintf('%s.%s', $type->getName(), ($gzip ? 'xml.gz' : 'xml')), |
||
277 | ]; |
||
278 | |||
279 | return implode(DIRECTORY_SEPARATOR, $path); |
||
280 | } |
||
281 | |||
282 | /** |
||
283 | * Returns the location of the feed file to export. This is the location where the actual |
||
284 | * exporting will take place and where all the separate listing XML files are cached. |
||
285 | * |
||
286 | * @param FeedTypeInterface $type |
||
287 | * @param bool $gzip |
||
288 | * |
||
289 | * @return string |
||
290 | */ |
||
291 | View Code Duplication | public function getFeedCacheFilename(FeedTypeInterface $type, $gzip = false) |
|
292 | { |
||
293 | $path = [ |
||
294 | $this->cacheDir, |
||
295 | sprintf('%s.%s', $type->getName(), ($gzip ? 'xml.gz' : 'xml')), |
||
296 | ]; |
||
297 | |||
298 | return implode(DIRECTORY_SEPARATOR, $path); |
||
299 | } |
||
300 | |||
301 | /** |
||
302 | * @param object $item |
||
303 | * @param FeedTypeInterface $type |
||
304 | * |
||
305 | * @return string |
||
306 | */ |
||
307 | 4 | public function getItemCacheFilename($item, FeedTypeInterface $type) |
|
308 | { |
||
309 | 4 | $class = DoctrineClassUtils::getClass($item); |
|
310 | |||
311 | 4 | $hash = hash('crc32b', sprintf('%s-%d', $class, $item->getId())); |
|
312 | $path = [ |
||
313 | 4 | $this->cacheDir, |
|
314 | 4 | $hash{0}, |
|
315 | 4 | $hash{1}, |
|
316 | 4 | $hash{2}, |
|
317 | 4 | substr($hash, 3), |
|
318 | 4 | sprintf('%s.xml', $this->getTemplateHash($type)), |
|
319 | ]; |
||
320 | |||
321 | 4 | return implode(DIRECTORY_SEPARATOR, $path); |
|
322 | } |
||
323 | |||
324 | /** |
||
325 | * @param object $item |
||
326 | * |
||
327 | * @return FeedTypeInterface[] |
||
328 | */ |
||
329 | 2 | protected function getTypesForItem($item) |
|
330 | { |
||
331 | 2 | $types = []; |
|
332 | |||
333 | 2 | foreach ($this->types as $type) { |
|
334 | 2 | if ($type->supports($item)) { |
|
335 | 2 | $types[] = $type; |
|
336 | } |
||
337 | } |
||
338 | |||
339 | 2 | return $types; |
|
340 | } |
||
341 | |||
342 | /** |
||
343 | * @param string $file |
||
344 | * @param int $ttl time to life in minutes |
||
345 | * |
||
346 | * @return bool |
||
347 | */ |
||
348 | protected function isFresh($file, $ttl) |
||
358 | |||
359 | /** |
||
360 | * @param QueryBuilder $builder |
||
361 | * |
||
362 | * @return int |
||
363 | */ |
||
364 | protected function getNumberOfResults(QueryBuilder $builder) |
||
365 | { |
||
366 | $countQb = clone $builder; |
||
367 | |||
368 | // remove some parts which are not needed in the count query, but could slow it down |
||
369 | foreach (['groupBy', 'orderBy'] as $field) { |
||
370 | if ($countQb->getDQLPart($field)) { |
||
371 | $countQb->resetDQLPart($field); |
||
372 | } |
||
373 | } |
||
374 | |||
375 | if (null !== $countQb->getMaxResults()) { |
||
376 | return $countQb->getMaxResults(); |
||
377 | } |
||
378 | |||
379 | $aliases = $countQb->getRootAliases(); |
||
380 | $rootAlias = reset($aliases); |
||
381 | |||
382 | $query = $countQb->select('COUNT(' . $rootAlias . ')')->getQuery(); |
||
383 | |||
384 | return (int) $query->getSingleScalarResult(); |
||
385 | } |
||
386 | |||
387 | /** |
||
388 | * @param FeedTypeInterface $type |
||
389 | * |
||
390 | * @return null|string |
||
391 | */ |
||
392 | protected function getNamespaceAttributes(FeedTypeInterface $type) |
||
393 | { |
||
394 | $namespaces = $type->getNamespaces(); |
||
395 | |||
396 | if (empty($namespaces)) { |
||
397 | return null; |
||
398 | } |
||
399 | |||
400 | $str = ''; |
||
401 | foreach ($namespaces as $name => $schemaLocation) { |
||
402 | $str .= sprintf( |
||
403 | 'xmlns="%s" xmlns:xsi="%s" xsi:schemaLocation="%s %s" ', |
||
404 | $name, |
||
405 | 'http://www.w3.org/2001/XMLSchema-instance', |
||
406 | $name, |
||
407 | $schemaLocation |
||
408 | ); |
||
409 | } |
||
410 | |||
411 | return trim($str); |
||
412 | } |
||
413 | |||
414 | /** |
||
415 | * @param FeedTypeInterface $type |
||
416 | * |
||
417 | * @return string |
||
418 | */ |
||
419 | 4 | protected function getTemplateHash(FeedTypeInterface $type) |
|
420 | { |
||
421 | 4 | if (!array_key_exists($type->getName(), $this->templateHashes)) { |
|
422 | // concatenate all variables that can change the template in the hash |
||
423 | 4 | $hash = $type->getRootNode() . $type->getItemNode(); |
|
424 | |||
425 | // use the canonical path if we're using a template reference, otherwise just use the template name |
||
426 | 4 | $template = $type->getTemplate(); |
|
427 | 4 | if ($template instanceof TemplateReferenceInterface && file_exists($template->getPath())) { |
|
428 | 2 | $hash .= md5_file($template->getPath()); |
|
429 | } else { |
||
430 | 4 | $hash .= $template; |
|
431 | } |
||
432 | |||
433 | 4 | $this->templateHashes[$type->getName()] = md5($hash); |
|
434 | } |
||
435 | |||
436 | 4 | return $this->templateHashes[$type->getName()]; |
|
437 | } |
||
438 | |||
439 | /** |
||
440 | * Returns cached instance of a FeedWriter for a specific type. |
||
441 | * |
||
442 | * @param FeedTypeInterface $type |
||
443 | * |
||
444 | * @return FeedWriter |
||
445 | */ |
||
446 | 2 | protected function getWriter(FeedTypeInterface $type) |
|
447 | { |
||
448 | 2 | if (!array_key_exists($type->getName(), $this->writers)) { |
|
449 | 2 | $this->writers[$type->getName()] = $this->writerFactory->createWriter($type); |
|
450 | } |
||
451 | |||
452 | 2 | return $this->writers[$type->getName()]; |
|
453 | } |
||
454 | |||
455 | /** |
||
456 | * pings database to keep connection alive. |
||
457 | * |
||
458 | * @param EntityManager $manager |
||
459 | */ |
||
460 | protected function pingDatabase($manager) |
||
471 | |||
472 | /** |
||
473 | * Encodes a file using gzip compression. |
||
474 | * |
||
475 | * @param string $source The source file |
||
476 | * @param string $destination The encoded destination file |
||
477 | */ |
||
478 | protected function gzip($source, $destination) |
||
479 | { |
||
480 | $fp = fopen($source, 'r'); |
||
481 | $zp = gzopen($destination, 'wb9'); |
||
482 | |||
483 | while (!feof($fp)) { |
||
484 | gzwrite($zp, fgets($fp)); |
||
490 | |||
491 | /** |
||
492 | * @param string $eventName |
||
493 | * @param Event $event |
||
494 | */ |
||
495 | protected function dispatch($eventName, Event $event = null) |
||
501 | } |
||
502 |
This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.
If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.
In this case you can add the
@ignore
PhpDoc annotation to the duplicate definition and it will be ignored.