Total Complexity | 48 |
Total Lines | 275 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like PublishDateExtractor often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PublishDateExtractor, and based on these observations, apply Extract Interface, too.
1 | <?php declare(strict_types=1); |
||
16 | class PublishDateExtractor extends AbstractModule implements ModuleInterface { |
||
17 | use ArticleMutatorTrait; |
||
18 | |||
19 | /** @inheritdoc */ |
||
20 | public function run(Article $article): self { |
||
21 | $this->article($article); |
||
22 | |||
23 | $dt = $this->getDateFromSchemaOrg(); |
||
24 | |||
25 | if (is_null($dt)) { |
||
26 | $dt = $this->getDateFromOpenGraph(); |
||
27 | } |
||
28 | |||
29 | if (is_null($dt)) { |
||
30 | $dt = $this->getDateFromURL(); |
||
31 | } |
||
32 | |||
33 | if (is_null($dt)) { |
||
34 | $dt = $this->getDateFromDublinCore(); |
||
35 | } |
||
36 | |||
37 | if (is_null($dt)) { |
||
38 | $dt = $this->getDateFromParsely(); |
||
39 | } |
||
40 | |||
41 | $article->setPublishDate($dt); |
||
42 | |||
43 | return $this; |
||
44 | } |
||
45 | |||
46 | /** |
||
47 | * @return \DateTime|null |
||
48 | */ |
||
49 | private function getDateFromURL(): ?\DateTime { |
||
65 | } |
||
66 | |||
67 | /** |
||
68 | * Check for and determine dates from Schema.org's datePublished property. |
||
69 | * |
||
70 | * Checks HTML tags (e.g. <meta>, <time>, etc.) and JSON-LD. |
||
71 | * |
||
72 | * @return \DateTime|null |
||
73 | * |
||
74 | * @see https://schema.org/datePublished |
||
75 | */ |
||
76 | private function getDateFromSchemaOrg(): ?\DateTime { |
||
77 | $dt = null; |
||
78 | |||
79 | // Check for HTML tags (<meta>, <time>, etc.) |
||
80 | $nodes = $this->article()->getRawDoc()->find('*[itemprop="datePublished"]'); |
||
81 | |||
82 | /* @var $node Element */ |
||
83 | foreach ($nodes as $node) { |
||
84 | try { |
||
85 | if ($node->hasAttribute('datetime')) { |
||
86 | $dt = new \DateTime($node->getAttribute('datetime')); |
||
87 | break; |
||
88 | } |
||
89 | if ($node->hasAttribute('content')) { |
||
90 | $dt = new \DateTime($node->getAttribute('content')); |
||
91 | break; |
||
92 | } |
||
93 | } |
||
94 | catch (\Exception $e) { |
||
95 | // Do nothing here in case the node has unrecognizable date information. |
||
96 | } |
||
97 | } |
||
98 | |||
99 | if (!is_null($dt)) { |
||
100 | return $dt; |
||
101 | } |
||
102 | |||
103 | // Check for JSON-LD |
||
104 | $nodes = $this->article()->getRawDoc()->find('script[type="application/ld+json"]'); |
||
105 | |||
106 | /* @var $node Element */ |
||
107 | foreach ($nodes as $node) { |
||
108 | try { |
||
109 | $json = json_decode($node->text()); |
||
110 | |||
111 | // Extract the published date from the Schema.org meta data |
||
112 | if (isset($json->{'@graph'}) && is_array($json->{'@graph'})) { |
||
113 | foreach ($json->{'@graph'} as $graphData) { |
||
114 | $graphData = (array)$graphData; |
||
115 | |||
116 | if (!isset($graphData['datePublished'])) { |
||
117 | continue; |
||
118 | } |
||
119 | |||
120 | $date = @$graphData['datePublished']; |
||
121 | |||
122 | try { |
||
123 | $dt = new \DateTime($date); |
||
124 | } catch (\Error $ex) { |
||
125 | // Do nothing here in case the node has unrecognizable date information. |
||
126 | } |
||
127 | } |
||
128 | } |
||
129 | |||
130 | if (isset($json->datePublished)) { |
||
131 | $date = is_array($json->datePublished) |
||
132 | ? array_shift($json->datePublished) |
||
133 | : $json->datePublished; |
||
134 | |||
135 | try { |
||
136 | $dt = new \DateTime($date); |
||
137 | } catch (\Error $ex) { |
||
138 | // Do nothing here in case the node has unrecognizable date information. |
||
139 | } |
||
140 | |||
141 | break; |
||
142 | } |
||
143 | } |
||
144 | catch (\Exception $e) { |
||
145 | // Do nothing here in case the node has unrecognizable date information. |
||
146 | } |
||
147 | } |
||
148 | |||
149 | return $dt; |
||
150 | } |
||
151 | |||
152 | /** |
||
153 | * Check for and determine dates based on Dublin Core standards. |
||
154 | * |
||
155 | * @return \DateTime|null |
||
156 | * |
||
157 | * @see http://dublincore.org/documents/dcmi-terms/#elements-date |
||
158 | * @see http://dublincore.org/documents/2000/07/16/usageguide/qualified-html.shtml |
||
159 | */ |
||
160 | private function getDateFromDublinCore(): ?\DateTime { |
||
182 | } |
||
183 | |||
184 | /** |
||
185 | * Check for and determine dates based on OpenGraph standards. |
||
186 | * |
||
187 | * @return \DateTime|null |
||
188 | * |
||
189 | * @see http://ogp.me/ |
||
190 | * @see http://ogp.me/#type_article |
||
191 | */ |
||
192 | private function getDateFromOpenGraph(): ?\DateTime { |
||
210 | } |
||
211 | |||
212 | /** |
||
213 | * Check for and determine dates based on Parsely metadata. |
||
214 | * |
||
215 | * Checks JSON-LD, <meta> tags and parsely-page. |
||
216 | * |
||
217 | * @return \DateTime|null |
||
218 | * |
||
219 | * @see https://www.parsely.com/help/integration/jsonld/ |
||
220 | * @see https://www.parsely.com/help/integration/metatags/ |
||
221 | * @see https://www.parsely.com/help/integration/ppage/ |
||
222 | */ |
||
223 | private function getDateFromParsely(): ?\DateTime { |
||
291 | } |
||
292 | } |
||
293 |