Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
1 | <?php |
||
15 | /** |
||
16 | * Consumer that extracts Open Graph data from either a URL or a HTML string. |
||
17 | */ |
||
18 | class Consumer |
||
19 | { |
||
20 | private ?ClientInterface $client; |
||
|
|||
21 | private ?RequestFactoryInterface $requestFactory; |
||
22 | |||
23 | /** |
||
24 | * When enabled, crawler will read content of title and meta description if no |
||
25 | * Open Graph data is provided by target page. |
||
26 | */ |
||
27 | public bool $useFallbackMode = false; |
||
28 | |||
29 | /** |
||
30 | * When enabled, crawler will throw exceptions for some crawling errors like unexpected |
||
31 | * Open Graph elements. |
||
32 | */ |
||
33 | public bool $debug = false; |
||
34 | |||
35 | /** |
||
36 | * @param ClientInterface|null $client A PSR-18 ClientInterface implementation. |
||
37 | * @param RequestFactoryInterface|null $requestFactory A PSR-17 RequestFactoryInterface implementation. |
||
38 | */ |
||
39 | 16 | public function __construct(?ClientInterface $client = null, ?RequestFactoryInterface $requestFactory = null) |
|
40 | { |
||
41 | 16 | $this->client = $client; |
|
42 | 16 | $this->requestFactory = $requestFactory; |
|
43 | 16 | } |
|
44 | |||
45 | /** |
||
46 | * Fetches HTML content from the given URL and then crawls it for Open Graph data. |
||
47 | * |
||
48 | * @param string $url URL to be crawled. |
||
49 | * |
||
50 | * @return ObjectBase |
||
51 | * |
||
52 | * @throws ClientExceptionInterface |
||
53 | */ |
||
54 | public function loadUrl(string $url): ObjectBase |
||
55 | { |
||
56 | if ($this->client === null) { |
||
57 | throw new LogicException( |
||
58 | "To use loadUrl() you must provide \$client and \$requestFactory when instantiating the consumer." |
||
59 | ); |
||
60 | } |
||
61 | |||
62 | $request = $this->requestFactory->createRequest("GET", $url); |
||
63 | $response = $this->client->sendRequest($request); |
||
64 | |||
65 | return $this->loadHtml($response->getBody()->getContents(), $url); |
||
66 | } |
||
67 | |||
68 | /** |
||
69 | * Crawls the given HTML string for OpenGraph data. |
||
70 | * |
||
71 | * @param string $html HTML string, usually whole content of crawled web resource. |
||
72 | * @param string $fallbackUrl URL to use when fallback mode is enabled. |
||
73 | * |
||
74 | * @return ObjectBase |
||
75 | */ |
||
76 | 16 | public function loadHtml(string $html, string $fallbackUrl = null): ObjectBase |
|
77 | { |
||
78 | // Extract all data that can be found |
||
79 | 16 | $page = $this->extractOpenGraphData($html); |
|
80 | |||
81 | // Use the user's URL as fallback |
||
82 | 13 | if ($this->useFallbackMode && $page->url === null) { |
|
83 | 1 | $page->url = $fallbackUrl; |
|
84 | } |
||
85 | |||
86 | // Return result |
||
87 | 13 | return $page; |
|
88 | } |
||
89 | |||
90 | 16 | private function extractOpenGraphData(string $content): ObjectBase |
|
91 | { |
||
92 | 16 | $crawler = new Crawler; |
|
93 | 16 | $crawler->addHTMLContent($content, 'UTF-8'); |
|
94 | |||
95 | 16 | $properties = []; |
|
96 | 16 | foreach(['name', 'property'] as $t) |
|
97 | { |
||
98 | // Get all meta-tags starting with "og:" |
||
99 | 16 | $ogMetaTags = $crawler->filter("meta[{$t}^='og:']"); |
|
100 | // Create clean property array |
||
101 | 16 | $props = Linq::from($ogMetaTags) |
|
102 | 16 | ->select( |
|
103 | function (DOMElement $tag) use ($t) { |
||
104 | 13 | $name = strtolower(trim($tag->getAttribute($t))); |
|
105 | 13 | $value = trim($tag->getAttribute("content")); |
|
106 | 13 | return new Property($name, $value); |
|
107 | 16 | } |
|
108 | ) |
||
109 | 16 | ->toArray(); |
|
110 | 16 | $properties = array_merge($properties, $props); |
|
111 | |||
112 | } |
||
113 | |||
114 | // Create new object of the correct type |
||
115 | 16 | $typeProperty = Linq::from($properties) |
|
116 | 16 | ->firstOrNull( |
|
117 | function (Property $property) { |
||
118 | 13 | return $property->key === Property::TYPE; |
|
119 | 16 | } |
|
120 | ); |
||
121 | 16 | switch ($typeProperty !== null ? $typeProperty->value : null) { |
|
122 | default: |
||
123 | 16 | $object = new Website(); |
|
124 | 16 | break; |
|
125 | } |
||
126 | |||
127 | // Assign all properties to the object |
||
128 | 16 | $object->assignProperties($properties, $this->debug); |
|
129 | |||
130 | // Fallback for url |
||
131 | 13 | if ($this->useFallbackMode && !$object->url) { |
|
132 | 2 | $urlElement = $crawler->filter("link[rel='canonical']")->first(); |
|
133 | 2 | if ($urlElement->count() > 0) { |
|
134 | 1 | $object->url = trim($urlElement->attr("href")); |
|
135 | } |
||
136 | } |
||
137 | |||
138 | // Fallback for title |
||
139 | 13 | if ($this->useFallbackMode && !$object->title) { |
|
140 | 2 | $titleElement = $crawler->filter("title")->first(); |
|
141 | 2 | if ($titleElement->count() > 0) { |
|
157 |