Total Complexity | 42 |
Total Lines | 334 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like DownloadImages often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DownloadImages, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
19 | class DownloadImages |
||
20 | { |
||
21 | const REGENERATE_PICTURES_QUALITY = 80; |
||
22 | |||
23 | private $client; |
||
24 | private $baseFolder; |
||
25 | private $logger; |
||
26 | private $mimeGuesser; |
||
27 | private $wallabagUrl; |
||
28 | |||
29 | public function __construct(HttpClient $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, MessageFactory $messageFactory = null) |
||
38 | } |
||
39 | |||
40 | public function getBaseFolder() |
||
41 | { |
||
42 | return $this->baseFolder; |
||
43 | } |
||
44 | |||
45 | /** |
||
46 | * Process the html and extract images URLs from it. |
||
47 | * |
||
48 | * @param string $html |
||
49 | * |
||
50 | * @return string[] |
||
51 | */ |
||
52 | public static function extractImagesUrlsFromHtml($html) |
||
60 | } |
||
61 | |||
62 | /** |
||
63 | * Process the html and extract image from it, save them to local and return the updated html. |
||
64 | * |
||
65 | * @param int $entryId ID of the entry |
||
66 | * @param string $html |
||
67 | * @param string $url Used as a base path for relative image and folder |
||
68 | * |
||
69 | * @return string |
||
70 | */ |
||
71 | public function processHtml($entryId, $html, $url) |
||
72 | { |
||
73 | $imagesUrls = self::extractImagesUrlsFromHtml($html); |
||
74 | |||
75 | $relativePath = $this->getRelativePath($entryId); |
||
76 | |||
77 | // download and save the image to the folder |
||
78 | foreach ($imagesUrls as $image) { |
||
79 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); |
||
80 | |||
81 | if (false === $imagePath) { |
||
82 | continue; |
||
83 | } |
||
84 | |||
85 | // if image contains "&" and we can't find it in the html it might be because it's encoded as & |
||
86 | if (false !== stripos($image, '&') && false === stripos($html, $image)) { |
||
87 | $image = str_replace('&', '&', $image); |
||
88 | } |
||
89 | |||
90 | $html = str_replace($image, $imagePath, $html); |
||
91 | } |
||
92 | |||
93 | return $html; |
||
94 | } |
||
95 | |||
96 | /** |
||
97 | * Process a single image: |
||
98 | * - retrieve it |
||
99 | * - re-saved it (for security reason) |
||
100 | * - return the new local path. |
||
101 | * |
||
102 | * @param int $entryId ID of the entry |
||
103 | * @param string $imagePath Path to the image to retrieve |
||
104 | * @param string $url Url from where the image were found |
||
105 | * @param string $relativePath Relative local path to saved the image |
||
106 | * |
||
107 | * @return string|false Relative url to access the image from the web |
||
108 | */ |
||
109 | public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) |
||
110 | { |
||
111 | if (null === $imagePath) { |
||
|
|||
112 | return false; |
||
113 | } |
||
114 | |||
115 | if (null === $relativePath) { |
||
116 | $relativePath = $this->getRelativePath($entryId); |
||
117 | } |
||
118 | |||
119 | $this->logger->debug('DownloadImages: working on image: ' . $imagePath); |
||
120 | |||
121 | $folderPath = $this->baseFolder . '/' . $relativePath; |
||
122 | |||
123 | // build image path |
||
124 | $absolutePath = $this->getAbsoluteLink($url, $imagePath); |
||
125 | if (false === $absolutePath) { |
||
126 | $this->logger->error('DownloadImages: Can not determine the absolute path for that image, skipping.'); |
||
127 | |||
128 | return false; |
||
129 | } |
||
130 | |||
131 | try { |
||
132 | $res = $this->client->get($absolutePath); |
||
133 | } catch (\Exception $e) { |
||
134 | $this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]); |
||
135 | |||
136 | return false; |
||
137 | } |
||
138 | |||
139 | $ext = $this->getExtensionFromResponse($res, $imagePath); |
||
140 | if (false === $res) { |
||
141 | return false; |
||
142 | } |
||
143 | |||
144 | $hashImage = hash('crc32', $absolutePath); |
||
145 | $localPath = $folderPath . '/' . $hashImage . '.' . $ext; |
||
146 | |||
147 | try { |
||
148 | $im = imagecreatefromstring((string) $res->getBody()); |
||
149 | } catch (\Exception $e) { |
||
150 | $im = false; |
||
151 | } |
||
152 | |||
153 | if (false === $im) { |
||
154 | $this->logger->error('DownloadImages: Error while regenerating image', ['path' => $localPath]); |
||
155 | |||
156 | return false; |
||
157 | } |
||
158 | |||
159 | switch ($ext) { |
||
160 | case 'gif': |
||
161 | // use Imagick if available to keep GIF animation |
||
162 | if (class_exists('\\Imagick')) { |
||
163 | try { |
||
164 | $imagick = new \Imagick(); |
||
165 | $imagick->readImageBlob($res->getBody()); |
||
166 | $imagick->setImageFormat('gif'); |
||
167 | $imagick->writeImages($localPath, true); |
||
168 | } catch (\Exception $e) { |
||
169 | // if Imagick fail, fallback to the default solution |
||
170 | imagegif($im, $localPath); |
||
171 | } |
||
172 | } else { |
||
173 | imagegif($im, $localPath); |
||
174 | } |
||
175 | |||
176 | $this->logger->debug('DownloadImages: Re-creating gif'); |
||
177 | break; |
||
178 | case 'jpeg': |
||
179 | case 'jpg': |
||
180 | imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); |
||
181 | $this->logger->debug('DownloadImages: Re-creating jpg'); |
||
182 | break; |
||
183 | case 'png': |
||
184 | imagealphablending($im, false); |
||
185 | imagesavealpha($im, true); |
||
186 | imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); |
||
187 | $this->logger->debug('DownloadImages: Re-creating png'); |
||
188 | } |
||
189 | |||
190 | imagedestroy($im); |
||
191 | |||
192 | return $this->wallabagUrl . '/assets/images/' . $relativePath . '/' . $hashImage . '.' . $ext; |
||
193 | } |
||
194 | |||
195 | /** |
||
196 | * Remove all images for the given entry id. |
||
197 | * |
||
198 | * @param int $entryId ID of the entry |
||
199 | */ |
||
200 | public function removeImages($entryId) |
||
201 | { |
||
202 | $relativePath = $this->getRelativePath($entryId); |
||
203 | $folderPath = $this->baseFolder . '/' . $relativePath; |
||
204 | |||
205 | $finder = new Finder(); |
||
206 | $finder |
||
207 | ->files() |
||
208 | ->ignoreDotFiles(true) |
||
209 | ->in($folderPath); |
||
210 | |||
211 | foreach ($finder as $file) { |
||
212 | @unlink($file->getRealPath()); |
||
213 | } |
||
214 | |||
215 | @rmdir($folderPath); |
||
216 | } |
||
217 | |||
218 | /** |
||
219 | * Generate the folder where we are going to save images based on the entry url. |
||
220 | * |
||
221 | * @param int $entryId ID of the entry |
||
222 | * @param bool $createFolder Should we create the folder for the given id? |
||
223 | * |
||
224 | * @return string |
||
225 | */ |
||
226 | public function getRelativePath($entryId, $createFolder = true) |
||
227 | { |
||
228 | $hashId = hash('crc32', $entryId); |
||
229 | $relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId; |
||
230 | $folderPath = $this->baseFolder . '/' . $relativePath; |
||
231 | |||
232 | if (!file_exists($folderPath) && $createFolder) { |
||
233 | mkdir($folderPath, 0777, true); |
||
234 | } |
||
235 | |||
236 | $this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]); |
||
237 | |||
238 | return $relativePath; |
||
239 | } |
||
240 | |||
241 | /** |
||
242 | * Get images urls from the srcset image attribute. |
||
243 | * |
||
244 | * @return array An array of urls |
||
245 | */ |
||
246 | private static function getSrcsetUrls(Crawler $imagesCrawler) |
||
247 | { |
||
248 | $urls = []; |
||
249 | $iterator = $imagesCrawler->getIterator(); |
||
250 | |||
251 | while ($iterator->valid()) { |
||
252 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); |
||
253 | |||
254 | if ('' !== $srcsetAttribute) { |
||
255 | // Couldn't start with " OR ' OR a white space |
||
256 | // Could be one or more white space |
||
257 | // Must be one or more digits followed by w OR x |
||
258 | $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; |
||
259 | preg_match_all($pattern, $srcsetAttribute, $matches); |
||
260 | |||
261 | $srcset = \call_user_func_array('array_merge', $matches); |
||
262 | $srcsetUrls = array_map(function ($src) { |
||
263 | return trim(explode(' ', $src, 2)[0]); |
||
264 | }, $srcset); |
||
265 | $urls = array_merge($srcsetUrls, $urls); |
||
266 | } |
||
267 | |||
268 | $iterator->next(); |
||
269 | } |
||
270 | |||
271 | return $urls; |
||
272 | } |
||
273 | |||
274 | /** |
||
275 | * Setup base folder where all images are going to be saved. |
||
276 | */ |
||
277 | private function setFolder() |
||
278 | { |
||
279 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder |
||
280 | if (!file_exists($this->baseFolder)) { |
||
281 | mkdir($this->baseFolder, 0755, true); |
||
282 | } |
||
283 | } |
||
284 | |||
285 | /** |
||
286 | * Make an $url absolute based on the $base. |
||
287 | * |
||
288 | * @see Graby->makeAbsoluteStr |
||
289 | * |
||
290 | * @param string $base Base url |
||
291 | * @param string $url Url to make it absolute |
||
292 | * |
||
293 | * @return false|string |
||
294 | */ |
||
295 | private function getAbsoluteLink($base, $url) |
||
312 | } |
||
313 | |||
314 | /** |
||
315 | * Retrieve and validate the extension from the response of the url of the image. |
||
316 | * |
||
317 | * @param ResponseInterface $res Http Response |
||
318 | * @param string $imagePath Path from the src image from the content (used for log only) |
||
319 | * |
||
320 | * @return string|false Extension name or false if validation failed |
||
321 | */ |
||
322 | private function getExtensionFromResponse(ResponseInterface $res, $imagePath) |
||
353 | } |
||
354 | } |
||
355 |