Total Complexity | 40 |
Total Lines | 244 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like PublishDateExtractor often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PublishDateExtractor, and based on these observations, apply Extract Interface, too.
1 | <?php declare(strict_types=1); |
||
16 | class PublishDateExtractor extends AbstractModule implements ModuleInterface { |
||
17 | use ArticleMutatorTrait; |
||
18 | |||
19 | /** @inheritdoc */ |
||
20 | public function run(Article $article): self { |
||
21 | $this->article($article); |
||
22 | |||
23 | $dt = null; |
||
|
|||
24 | |||
25 | $dt = $this->getDateFromSchemaOrg(); |
||
26 | |||
27 | if (is_null($dt)) { |
||
28 | $dt = $this->getDateFromOpenGraph(); |
||
29 | } |
||
30 | |||
31 | if (is_null($dt)) { |
||
32 | $dt = $this->getDateFromURL(); |
||
33 | } |
||
34 | |||
35 | if (is_null($dt)) { |
||
36 | $dt = $this->getDateFromDublinCore(); |
||
37 | } |
||
38 | |||
39 | if (is_null($dt)) { |
||
40 | $dt = $this->getDateFromParsely(); |
||
41 | } |
||
42 | |||
43 | $article->setPublishDate($dt); |
||
44 | |||
45 | return $this; |
||
46 | } |
||
47 | |||
48 | /** |
||
49 | * @return \DateTime|null |
||
50 | */ |
||
51 | private function getDateFromURL(): ?\DateTime { |
||
52 | // Determine date based on URL |
||
53 | if (preg_match('@(?:[\d]{4})(?<delimiter>[/-])(?:[\d]{2})\k<delimiter>(?:[\d]{2})@U', $this->article()->getFinalUrl(), $matches)) { |
||
54 | $dt = \DateTime::createFromFormat('Y' . $matches['delimiter'] . 'm' . $matches['delimiter'] . 'd', $matches[0]); |
||
55 | $dt->setTime(0, 0, 0); |
||
56 | |||
57 | if ($dt === false) { |
||
58 | return null; |
||
59 | } |
||
60 | |||
61 | return $dt; |
||
62 | } |
||
63 | |||
64 | /** @todo Add more date detection methods */ |
||
65 | |||
66 | return null; |
||
67 | } |
||
68 | |||
69 | /** |
||
70 | * Check for and determine dates from Schema.org's datePublished property. |
||
71 | * |
||
72 | * Checks HTML tags (e.g. <meta>, <time>, etc.) and JSON-LD. |
||
73 | * |
||
74 | * @return \DateTime|null |
||
75 | * |
||
76 | * @see https://schema.org/datePublished |
||
77 | */ |
||
78 | private function getDateFromSchemaOrg(): ?\DateTime { |
||
79 | $dt = null; |
||
80 | |||
81 | // Check for HTML tags (<meta>, <time>, etc.) |
||
82 | $nodes = $this->article()->getRawDoc()->find('*[itemprop="datePublished"]'); |
||
83 | |||
84 | /* @var $node Element */ |
||
85 | foreach ($nodes as $node) { |
||
86 | try { |
||
87 | if ($node->hasAttribute('datetime')) { |
||
88 | $dt = new \DateTime($node->getAttribute('datetime')); |
||
89 | break; |
||
90 | } |
||
91 | if ($node->hasAttribute('content')) { |
||
92 | $dt = new \DateTime($node->getAttribute('content')); |
||
93 | break; |
||
94 | } |
||
95 | } |
||
96 | catch (\Exception $e) { |
||
97 | // Do nothing here in case the node has unrecognizable date information. |
||
98 | } |
||
99 | } |
||
100 | |||
101 | if (!is_null($dt)) { |
||
102 | return $dt; |
||
103 | } |
||
104 | |||
105 | // Check for JSON-LD |
||
106 | $nodes = $this->article()->getRawDoc()->find('script[type="application/ld+json"]'); |
||
107 | |||
108 | /* @var $node Element */ |
||
109 | foreach ($nodes as $node) { |
||
110 | try { |
||
111 | $json = json_decode($node->text()); |
||
112 | if (isset($json->datePublished)) { |
||
113 | $dt = new \DateTime($json->datePublished); |
||
114 | break; |
||
115 | } |
||
116 | } |
||
117 | catch (\Exception $e) { |
||
118 | // Do nothing here in case the node has unrecognizable date information. |
||
119 | } |
||
120 | } |
||
121 | |||
122 | return $dt; |
||
123 | } |
||
124 | |||
125 | /** |
||
126 | * Check for and determine dates based on Dublin Core standards. |
||
127 | * |
||
128 | * @return \DateTime|null |
||
129 | * |
||
130 | * @see http://dublincore.org/documents/dcmi-terms/#elements-date |
||
131 | * @see http://dublincore.org/documents/2000/07/16/usageguide/qualified-html.shtml |
||
132 | */ |
||
133 | private function getDateFromDublinCore(): ?\DateTime { |
||
155 | } |
||
156 | |||
157 | /** |
||
158 | * Check for and determine dates based on OpenGraph standards. |
||
159 | * |
||
160 | * @return \DateTime|null |
||
161 | * |
||
162 | * @see http://ogp.me/ |
||
163 | * @see http://ogp.me/#type_article |
||
164 | */ |
||
165 | private function getDateFromOpenGraph(): ?\DateTime { |
||
183 | } |
||
184 | |||
185 | /** |
||
186 | * Check for and determine dates based on Parsely metadata. |
||
187 | * |
||
188 | * Checks JSON-LD, <meta> tags and parsely-page. |
||
189 | * |
||
190 | * @return \DateTime|null |
||
191 | * |
||
192 | * @see https://www.parsely.com/help/integration/jsonld/ |
||
193 | * @see https://www.parsely.com/help/integration/metatags/ |
||
194 | * @see https://www.parsely.com/help/integration/ppage/ |
||
195 | */ |
||
196 | private function getDateFromParsely(): ?\DateTime { |
||
260 | } |
||
261 | } |
||
262 |