| Total Complexity | 48 |
| Total Lines | 275 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like PublishDateExtractor often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PublishDateExtractor, and based on these observations, apply Extract Interface, too.
| 1 | <?php declare(strict_types=1); |
||
| 16 | class PublishDateExtractor extends AbstractModule implements ModuleInterface { |
||
| 17 | use ArticleMutatorTrait; |
||
| 18 | |||
| 19 | /** @inheritdoc */ |
||
| 20 | public function run(Article $article): self { |
||
| 21 | $this->article($article); |
||
| 22 | |||
| 23 | $dt = $this->getDateFromSchemaOrg(); |
||
| 24 | |||
| 25 | if (is_null($dt)) { |
||
| 26 | $dt = $this->getDateFromOpenGraph(); |
||
| 27 | } |
||
| 28 | |||
| 29 | if (is_null($dt)) { |
||
| 30 | $dt = $this->getDateFromURL(); |
||
| 31 | } |
||
| 32 | |||
| 33 | if (is_null($dt)) { |
||
| 34 | $dt = $this->getDateFromDublinCore(); |
||
| 35 | } |
||
| 36 | |||
| 37 | if (is_null($dt)) { |
||
| 38 | $dt = $this->getDateFromParsely(); |
||
| 39 | } |
||
| 40 | |||
| 41 | $article->setPublishDate($dt); |
||
| 42 | |||
| 43 | return $this; |
||
| 44 | } |
||
| 45 | |||
| 46 | /** |
||
| 47 | * @return \DateTime|null |
||
| 48 | */ |
||
| 49 | private function getDateFromURL(): ?\DateTime { |
||
| 65 | } |
||
| 66 | |||
| 67 | /** |
||
| 68 | * Check for and determine dates from Schema.org's datePublished property. |
||
| 69 | * |
||
| 70 | * Checks HTML tags (e.g. <meta>, <time>, etc.) and JSON-LD. |
||
| 71 | * |
||
| 72 | * @return \DateTime|null |
||
| 73 | * |
||
| 74 | * @see https://schema.org/datePublished |
||
| 75 | */ |
||
| 76 | private function getDateFromSchemaOrg(): ?\DateTime { |
||
| 77 | $dt = null; |
||
| 78 | |||
| 79 | // Check for HTML tags (<meta>, <time>, etc.) |
||
| 80 | $nodes = $this->article()->getRawDoc()->find('*[itemprop="datePublished"]'); |
||
| 81 | |||
| 82 | /* @var $node Element */ |
||
| 83 | foreach ($nodes as $node) { |
||
| 84 | try { |
||
| 85 | if ($node->hasAttribute('datetime')) { |
||
| 86 | $dt = new \DateTime($node->getAttribute('datetime')); |
||
| 87 | break; |
||
| 88 | } |
||
| 89 | if ($node->hasAttribute('content')) { |
||
| 90 | $dt = new \DateTime($node->getAttribute('content')); |
||
| 91 | break; |
||
| 92 | } |
||
| 93 | } |
||
| 94 | catch (\Exception $e) { |
||
| 95 | // Do nothing here in case the node has unrecognizable date information. |
||
| 96 | } |
||
| 97 | } |
||
| 98 | |||
| 99 | if (!is_null($dt)) { |
||
| 100 | return $dt; |
||
| 101 | } |
||
| 102 | |||
| 103 | // Check for JSON-LD |
||
| 104 | $nodes = $this->article()->getRawDoc()->find('script[type="application/ld+json"]'); |
||
| 105 | |||
| 106 | /* @var $node Element */ |
||
| 107 | foreach ($nodes as $node) { |
||
| 108 | try { |
||
| 109 | $json = json_decode($node->text()); |
||
| 110 | |||
| 111 | // Extract the published date from the Schema.org meta data |
||
| 112 | if (isset($json->{'@graph'}) && is_array($json->{'@graph'})) { |
||
| 113 | foreach ($json->{'@graph'} as $graphData) { |
||
| 114 | $graphData = (array)$graphData; |
||
| 115 | |||
| 116 | if (!isset($graphData['datePublished'])) { |
||
| 117 | continue; |
||
| 118 | } |
||
| 119 | |||
| 120 | $date = @$graphData['datePublished']; |
||
| 121 | |||
| 122 | try { |
||
| 123 | $dt = new \DateTime($date); |
||
| 124 | } catch (\Error $ex) { |
||
| 125 | // Do nothing here in case the node has unrecognizable date information. |
||
| 126 | } |
||
| 127 | } |
||
| 128 | } |
||
| 129 | |||
| 130 | if (isset($json->datePublished)) { |
||
| 131 | $date = is_array($json->datePublished) |
||
| 132 | ? array_shift($json->datePublished) |
||
| 133 | : $json->datePublished; |
||
| 134 | |||
| 135 | try { |
||
| 136 | $dt = new \DateTime($date); |
||
| 137 | } catch (\Error $ex) { |
||
| 138 | // Do nothing here in case the node has unrecognizable date information. |
||
| 139 | } |
||
| 140 | |||
| 141 | break; |
||
| 142 | } |
||
| 143 | } |
||
| 144 | catch (\Exception $e) { |
||
| 145 | // Do nothing here in case the node has unrecognizable date information. |
||
| 146 | } |
||
| 147 | } |
||
| 148 | |||
| 149 | return $dt; |
||
| 150 | } |
||
| 151 | |||
| 152 | /** |
||
| 153 | * Check for and determine dates based on Dublin Core standards. |
||
| 154 | * |
||
| 155 | * @return \DateTime|null |
||
| 156 | * |
||
| 157 | * @see http://dublincore.org/documents/dcmi-terms/#elements-date |
||
| 158 | * @see http://dublincore.org/documents/2000/07/16/usageguide/qualified-html.shtml |
||
| 159 | */ |
||
| 160 | private function getDateFromDublinCore(): ?\DateTime { |
||
| 182 | } |
||
| 183 | |||
| 184 | /** |
||
| 185 | * Check for and determine dates based on OpenGraph standards. |
||
| 186 | * |
||
| 187 | * @return \DateTime|null |
||
| 188 | * |
||
| 189 | * @see http://ogp.me/ |
||
| 190 | * @see http://ogp.me/#type_article |
||
| 191 | */ |
||
| 192 | private function getDateFromOpenGraph(): ?\DateTime { |
||
| 210 | } |
||
| 211 | |||
| 212 | /** |
||
| 213 | * Check for and determine dates based on Parsely metadata. |
||
| 214 | * |
||
| 215 | * Checks JSON-LD, <meta> tags and parsely-page. |
||
| 216 | * |
||
| 217 | * @return \DateTime|null |
||
| 218 | * |
||
| 219 | * @see https://www.parsely.com/help/integration/jsonld/ |
||
| 220 | * @see https://www.parsely.com/help/integration/metatags/ |
||
| 221 | * @see https://www.parsely.com/help/integration/ppage/ |
||
| 222 | */ |
||
| 223 | private function getDateFromParsely(): ?\DateTime { |
||
| 291 | } |
||
| 292 | } |
||
| 293 |