1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Wallabag\CoreBundle\Helper; |
4
|
|
|
|
5
|
|
|
use Graby\Graby; |
6
|
|
|
use Psr\Log\LoggerInterface; |
7
|
|
|
use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; |
8
|
|
|
use Symfony\Component\Validator\Constraints\Locale as LocaleConstraint; |
9
|
|
|
use Symfony\Component\Validator\Constraints\Url as UrlConstraint; |
10
|
|
|
use Symfony\Component\Validator\Validator\ValidatorInterface; |
11
|
|
|
use Wallabag\CoreBundle\Entity\Entry; |
12
|
|
|
use Wallabag\CoreBundle\Tools\Utils; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* This kind of proxy class take care of getting the content from an url |
16
|
|
|
* and update the entry with what it found. |
17
|
|
|
*/ |
18
|
|
|
class ContentProxy |
19
|
|
|
{ |
20
|
|
|
protected $graby; |
21
|
|
|
protected $tagger; |
22
|
|
|
protected $validator; |
23
|
|
|
protected $logger; |
24
|
|
|
protected $mimeGuesser; |
25
|
|
|
protected $fetchingErrorMessage; |
26
|
|
|
protected $eventDispatcher; |
27
|
|
|
protected $storeArticleHeaders; |
28
|
|
|
|
29
|
|
|
public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false) |
30
|
|
|
{ |
31
|
|
|
$this->graby = $graby; |
32
|
|
|
$this->tagger = $tagger; |
33
|
|
|
$this->validator = $validator; |
34
|
|
|
$this->logger = $logger; |
35
|
|
|
$this->mimeGuesser = new MimeTypeExtensionGuesser(); |
36
|
|
|
$this->fetchingErrorMessage = $fetchingErrorMessage; |
37
|
|
|
$this->storeArticleHeaders = $storeArticleHeaders; |
38
|
|
|
} |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* Update entry using either fetched or provided content. |
42
|
|
|
* |
43
|
|
|
* @param Entry $entry Entry to update |
44
|
|
|
* @param string $url Url of the content |
45
|
|
|
* @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url |
46
|
|
|
* @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby |
47
|
|
|
*/ |
48
|
|
|
public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) |
49
|
|
|
{ |
50
|
|
|
if (!empty($content['html'])) { |
51
|
|
|
$content['html'] = $this->graby->cleanupHtml($content['html'], $url); |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { |
55
|
|
|
$fetchedContent = $this->graby->fetchContent($url); |
56
|
|
|
|
57
|
|
|
// when content is imported, we have information in $content |
58
|
|
|
// in case fetching content goes bad, we'll keep the imported information instead of overriding them |
59
|
|
|
if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) { |
60
|
|
|
$content = $fetchedContent; |
61
|
|
|
} |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
// be sure to keep the url in case of error |
65
|
|
|
// so we'll be able to refetch it in the future |
66
|
|
|
$content['url'] = !empty($content['url']) ? $content['url'] : $url; |
67
|
|
|
|
68
|
|
|
$this->stockEntry($entry, $content); |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Use a Symfony validator to ensure the language is well formatted. |
73
|
|
|
* |
74
|
|
|
* @param Entry $entry |
75
|
|
|
* @param string $value Language to validate and save |
76
|
|
|
*/ |
77
|
|
|
public function updateLanguage(Entry $entry, $value) |
78
|
|
|
{ |
79
|
|
|
// some lang are defined as fr-FR, es-ES. |
80
|
|
|
// replacing - by _ might increase language support |
81
|
|
|
$value = str_replace('-', '_', $value); |
82
|
|
|
|
83
|
|
|
$errors = $this->validator->validate( |
84
|
|
|
$value, |
85
|
|
|
(new LocaleConstraint()) |
86
|
|
|
); |
87
|
|
|
|
88
|
|
|
if (0 === count($errors)) { |
89
|
|
|
$entry->setLanguage($value); |
90
|
|
|
|
91
|
|
|
return; |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
$this->logger->warning('Language validation failed. ' . (string) $errors); |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* Use a Symfony validator to ensure the preview picture is a real url. |
99
|
|
|
* |
100
|
|
|
* @param Entry $entry |
101
|
|
|
* @param string $value URL to validate and save |
102
|
|
|
*/ |
103
|
|
|
public function updatePreviewPicture(Entry $entry, $value) |
104
|
|
|
{ |
105
|
|
|
$errors = $this->validator->validate( |
106
|
|
|
$value, |
107
|
|
|
(new UrlConstraint()) |
108
|
|
|
); |
109
|
|
|
|
110
|
|
|
if (0 === count($errors)) { |
111
|
|
|
$entry->setPreviewPicture($value); |
112
|
|
|
|
113
|
|
|
return; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
$this->logger->warning('PreviewPicture validation failed. ' . (string) $errors); |
117
|
|
|
} |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* Update date. |
121
|
|
|
* |
122
|
|
|
* @param Entry $entry |
123
|
|
|
* @param string $value Date to validate and save |
124
|
|
|
*/ |
125
|
|
|
public function updatePublishedAt(Entry $entry, $value) |
126
|
|
|
{ |
127
|
|
|
$date = $value; |
128
|
|
|
|
129
|
|
|
// is it a timestamp? |
130
|
|
|
if (false !== filter_var($date, FILTER_VALIDATE_INT)) { |
131
|
|
|
$date = '@' . $date; |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
try { |
135
|
|
|
// is it already a DateTime? |
136
|
|
|
// (it's inside the try/catch in case of fail to be parse time string) |
137
|
|
|
if (!$date instanceof \DateTime) { |
138
|
|
|
$date = new \DateTime($date); |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
$entry->setPublishedAt($date); |
142
|
|
|
} catch (\Exception $e) { |
143
|
|
|
$this->logger->warning('Error while defining date', ['e' => $e, 'url' => $entry->getUrl(), 'date' => $value]); |
144
|
|
|
} |
145
|
|
|
} |
146
|
|
|
|
147
|
|
|
/** |
148
|
|
|
* Helper to extract and save host from entry url. |
149
|
|
|
* |
150
|
|
|
* @param Entry $entry |
151
|
|
|
*/ |
152
|
|
|
public function setEntryDomainName(Entry $entry) |
153
|
|
|
{ |
154
|
|
|
$domainName = parse_url($entry->getUrl(), PHP_URL_HOST); |
155
|
|
|
if (false !== $domainName) { |
156
|
|
|
$entry->setDomainName($domainName); |
157
|
|
|
} |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* Helper to set a default title using: |
162
|
|
|
* - url basename, if applicable |
163
|
|
|
* - hostname. |
164
|
|
|
* |
165
|
|
|
* @param Entry $entry |
166
|
|
|
*/ |
167
|
|
|
public function setDefaultEntryTitle(Entry $entry) |
168
|
|
|
{ |
169
|
|
|
$url = parse_url($entry->getUrl()); |
170
|
|
|
$path = pathinfo($url['path'], PATHINFO_BASENAME); |
171
|
|
|
|
172
|
|
|
if (empty($path)) { |
173
|
|
|
$path = $url['host']; |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
$entry->setTitle($path); |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
/** |
180
|
|
|
* Stock entry with fetched or imported content. |
181
|
|
|
* Will fall back to OpenGraph data if available. |
182
|
|
|
* |
183
|
|
|
* @param Entry $entry Entry to stock |
184
|
|
|
* @param array $content Array with at least title, url & html |
185
|
|
|
*/ |
186
|
|
|
private function stockEntry(Entry $entry, array $content) |
187
|
|
|
{ |
188
|
|
|
$entry->setUrl($content['url']); |
189
|
|
|
|
190
|
|
|
$this->setEntryDomainName($entry); |
191
|
|
|
|
192
|
|
|
if (!empty($content['title'])) { |
193
|
|
|
$entry->setTitle($content['title']); |
194
|
|
|
} elseif (!empty($content['open_graph']['og_title'])) { |
195
|
|
|
$entry->setTitle($content['open_graph']['og_title']); |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
$html = $content['html']; |
199
|
|
|
if (false === $html) { |
200
|
|
|
$html = $this->fetchingErrorMessage; |
201
|
|
|
|
202
|
|
|
if (!empty($content['open_graph']['og_description'])) { |
203
|
|
|
$html .= '<p><i>But we found a short description: </i></p>'; |
204
|
|
|
$html .= $content['open_graph']['og_description']; |
205
|
|
|
} |
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
$entry->setContent($html); |
209
|
|
|
$entry->setReadingTime(Utils::getReadingTime($html)); |
210
|
|
|
|
211
|
|
|
if (!empty($content['status'])) { |
212
|
|
|
$entry->setHttpStatus($content['status']); |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
if (!empty($content['authors']) && is_array($content['authors'])) { |
216
|
|
|
$entry->setPublishedBy($content['authors']); |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
if (!empty($content['all_headers']) && $this->storeArticleHeaders) { |
220
|
|
|
$entry->setHeaders($content['all_headers']); |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
if (!empty($content['date'])) { |
224
|
|
|
$this->updatePublishedAt($entry, $content['date']); |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
if (!empty($content['language'])) { |
228
|
|
|
$this->updateLanguage($entry, $content['language']); |
229
|
|
|
} |
230
|
|
|
|
231
|
|
|
if (!empty($content['open_graph']['og_image'])) { |
232
|
|
|
$this->updatePreviewPicture($entry, $content['open_graph']['og_image']); |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
// if content is an image, define it as a preview too |
236
|
|
|
if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
237
|
|
|
$this->updatePreviewPicture($entry, $content['url']); |
238
|
|
|
} |
239
|
|
|
|
240
|
|
|
if (!empty($content['content_type'])) { |
241
|
|
|
$entry->setMimetype($content['content_type']); |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
try { |
245
|
|
|
$this->tagger->tag($entry); |
246
|
|
|
} catch (\Exception $e) { |
247
|
|
|
$this->logger->error('Error while trying to automatically tag an entry.', [ |
248
|
|
|
'entry_url' => $content['url'], |
249
|
|
|
'error_msg' => $e->getMessage(), |
250
|
|
|
]); |
251
|
|
|
} |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
/** |
255
|
|
|
* Validate that the given content has at least a title, an html and a url. |
256
|
|
|
* |
257
|
|
|
* @param array $content |
258
|
|
|
* |
259
|
|
|
* @return bool true if valid otherwise false |
260
|
|
|
*/ |
261
|
|
|
private function validateContent(array $content) |
262
|
|
|
{ |
263
|
|
|
return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); |
264
|
|
|
} |
265
|
|
|
} |
266
|
|
|
|