|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* @package s9e\TextFormatter |
|
5
|
|
|
* @copyright Copyright (c) 2010-2017 The s9e Authors |
|
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
|
7
|
|
|
*/ |
|
8
|
|
|
namespace s9e\TextFormatter\Plugins\MediaEmbed; |
|
9
|
|
|
|
|
10
|
|
|
use InvalidArgumentException; |
|
11
|
|
|
use RuntimeException; |
|
12
|
|
|
use s9e\TextFormatter\Configurator\Helpers\RegexpBuilder; |
|
13
|
|
|
use s9e\TextFormatter\Configurator\Items\Attribute; |
|
14
|
|
|
use s9e\TextFormatter\Configurator\Items\AttributePreprocessor; |
|
15
|
|
|
use s9e\TextFormatter\Configurator\Items\Tag; |
|
16
|
|
|
use s9e\TextFormatter\Plugins\ConfiguratorBase; |
|
17
|
|
|
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\Collections\CachedDefinitionCollection; |
|
18
|
|
|
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\Collections\SiteCollection; |
|
19
|
|
|
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\TemplateBuilder; |
|
20
|
|
|
|
|
21
|
|
|
class Configurator extends ConfiguratorBase |
|
22
|
|
|
{ |
|
23
|
|
|
/** |
|
24
|
|
|
* @var array List of filters that are explicitly allowed in attribute definitions |
|
25
|
|
|
*/ |
|
26
|
|
|
public $allowedFilters = [ |
|
27
|
|
|
'hexdec', |
|
28
|
|
|
'stripslashes', |
|
29
|
|
|
'urldecode' |
|
30
|
|
|
]; |
|
31
|
|
|
|
|
32
|
|
|
/** |
|
33
|
|
|
* @var string String to be appended to the templates used to render media sites |
|
34
|
|
|
*/ |
|
35
|
|
|
protected $appendTemplate = ''; |
|
36
|
|
|
|
|
37
|
|
|
/** |
|
38
|
|
|
* @var bool Whether to replace unformatted URLs in text with embedded content |
|
39
|
|
|
*/ |
|
40
|
|
|
public $captureURLs = true; |
|
41
|
|
|
|
|
42
|
|
|
/** |
|
43
|
|
|
* @var SiteCollection Site collection |
|
44
|
|
|
*/ |
|
45
|
|
|
protected $collection; |
|
46
|
|
|
|
|
47
|
|
|
/** |
|
48
|
|
|
* @var bool Whether to create the MEDIA BBCode |
|
49
|
|
|
*/ |
|
50
|
|
|
protected $createMediaBBCode = true; |
|
51
|
|
|
|
|
52
|
|
|
/** |
|
53
|
|
|
* @var bool Whether to create a BBCode for each site |
|
54
|
|
|
*/ |
|
55
|
|
|
public $createIndividualBBCodes = false; |
|
56
|
|
|
|
|
57
|
|
|
/** |
|
58
|
|
|
* @var Configurator\Collections\SiteDefinitionCollection Default sites |
|
59
|
|
|
*/ |
|
60
|
|
|
public $defaultSites; |
|
61
|
|
|
|
|
62
|
|
|
/** |
|
63
|
|
|
* @var string Name of the tag used to handle embeddable URLs |
|
64
|
|
|
*/ |
|
65
|
|
|
protected $tagName = 'MEDIA'; |
|
66
|
|
|
|
|
67
|
|
|
/** |
|
68
|
|
|
* @var TemplateBuilder |
|
69
|
|
|
*/ |
|
70
|
|
|
protected $templateBuilder; |
|
71
|
|
|
|
|
72
|
|
|
/** |
|
73
|
|
|
* {@inheritdoc} |
|
74
|
|
|
*/ |
|
75
|
49 |
|
protected function setUp() |
|
76
|
|
|
{ |
|
77
|
|
|
// Create a collection to store the configured sites |
|
78
|
49 |
|
$this->collection = new SiteCollection; |
|
79
|
|
|
|
|
80
|
|
|
// Register the collection as a variable to be used during parsing |
|
81
|
49 |
|
$this->configurator->registeredVars['mediasites'] = $this->collection; |
|
82
|
|
|
|
|
83
|
|
|
// Create a MEDIA tag |
|
84
|
49 |
|
$tag = $this->configurator->tags->add($this->tagName); |
|
85
|
|
|
|
|
86
|
|
|
// This tag should not need to be closed and should not contain itself |
|
87
|
49 |
|
$tag->rules->autoClose(); |
|
88
|
49 |
|
$tag->rules->denyChild($this->tagName); |
|
89
|
|
|
|
|
90
|
|
|
// Empty this tag's filter chain and add our tag filter |
|
91
|
49 |
|
$tag->filterChain->clear(); |
|
92
|
49 |
|
$tag->filterChain |
|
93
|
49 |
|
->append([__NAMESPACE__ . '\\Parser', 'filterTag']) |
|
94
|
49 |
|
->addParameterByName('parser') |
|
95
|
49 |
|
->addParameterByName('mediasites') |
|
96
|
49 |
|
->setJS(file_get_contents(__DIR__ . '/Parser/tagFilter.js')); |
|
97
|
|
|
|
|
98
|
|
|
// Create a [MEDIA] BBCode if applicable |
|
99
|
49 |
|
if ($this->createMediaBBCode) |
|
100
|
|
|
{ |
|
101
|
48 |
|
$this->configurator->BBCodes->set( |
|
102
|
48 |
|
$this->tagName, |
|
103
|
|
|
[ |
|
104
|
48 |
|
'contentAttributes' => ['url'], |
|
105
|
|
|
'defaultAttribute' => 'site' |
|
106
|
|
|
] |
|
107
|
|
|
); |
|
108
|
|
|
} |
|
109
|
|
|
|
|
110
|
49 |
|
if (!isset($this->defaultSites)) |
|
111
|
|
|
{ |
|
112
|
49 |
|
$this->defaultSites = new CachedDefinitionCollection; |
|
113
|
|
|
} |
|
114
|
|
|
|
|
115
|
49 |
|
$this->templateBuilder = new TemplateBuilder; |
|
116
|
49 |
|
} |
|
117
|
|
|
|
|
118
|
|
|
/** |
|
119
|
|
|
* {@inheritdoc} |
|
120
|
|
|
*/ |
|
121
|
8 |
|
public function asConfig() |
|
122
|
|
|
{ |
|
123
|
8 |
|
if (!$this->captureURLs || !count($this->collection)) |
|
124
|
|
|
{ |
|
125
|
2 |
|
return; |
|
126
|
|
|
} |
|
127
|
|
|
|
|
128
|
6 |
|
$regexp = 'https?:\\/\\/'; |
|
129
|
6 |
|
$schemes = $this->getSchemes(); |
|
130
|
6 |
|
if (!empty($schemes)) |
|
131
|
|
|
{ |
|
132
|
3 |
|
$regexp = '(?>' . RegexpBuilder::fromList($schemes) . ':|' . $regexp . ')'; |
|
133
|
|
|
} |
|
134
|
|
|
|
|
135
|
|
|
return [ |
|
136
|
6 |
|
'quickMatch' => (empty($schemes)) ? '://' : ':', |
|
137
|
6 |
|
'regexp' => '/\\b' . $regexp . '[^["\'\\s]+/Si', |
|
138
|
6 |
|
'tagName' => $this->tagName |
|
139
|
|
|
]; |
|
140
|
|
|
} |
|
141
|
|
|
|
|
142
|
|
|
//========================================================================== |
|
143
|
|
|
// Public API |
|
144
|
|
|
//========================================================================== |
|
145
|
|
|
|
|
146
|
|
|
/** |
|
147
|
|
|
* Add a media site |
|
148
|
|
|
* |
|
149
|
|
|
* @param string $siteId Site's ID |
|
150
|
|
|
* @param array $siteConfig Site's config |
|
151
|
|
|
* @return Tag Tag created for this site |
|
152
|
|
|
*/ |
|
153
|
44 |
|
public function add($siteId, array $siteConfig = null) |
|
154
|
|
|
{ |
|
155
|
|
|
// Normalize the site ID |
|
156
|
44 |
|
$siteId = $this->normalizeId($siteId); |
|
157
|
|
|
|
|
158
|
|
|
// Normalize or retrieve the site definition |
|
159
|
43 |
|
$siteConfig = (isset($siteConfig)) ? $this->defaultSites->normalizeValue($siteConfig) : $this->defaultSites->get($siteId); |
|
160
|
|
|
|
|
161
|
|
|
// Add this site to the list |
|
162
|
42 |
|
$this->collection[$siteId] = $siteConfig; |
|
163
|
|
|
|
|
164
|
|
|
// Create the tag for this site |
|
165
|
42 |
|
$tag = new Tag; |
|
166
|
|
|
|
|
167
|
|
|
// This tag should not need to be closed and should not contain itself or the MEDIA tag. |
|
168
|
|
|
// We allow URL as a child to be used as fallback |
|
169
|
42 |
|
$tag->rules->allowChild('URL'); |
|
170
|
42 |
|
$tag->rules->autoClose(); |
|
171
|
42 |
|
$tag->rules->denyChild($siteId); |
|
172
|
42 |
|
$tag->rules->denyChild($this->tagName); |
|
173
|
|
|
|
|
174
|
|
|
// Store attributes' configuration, starting with a default "url" attribute to store the |
|
175
|
|
|
// original URL if applicable |
|
176
|
|
|
$attributes = [ |
|
177
|
42 |
|
'url' => ['type' => 'url'] |
|
178
|
|
|
]; |
|
179
|
|
|
|
|
180
|
|
|
// Process the "scrape" directives |
|
181
|
42 |
|
$attributes += $this->addScrapes($tag, $siteConfig['scrape']); |
|
182
|
|
|
|
|
183
|
|
|
// Add each "extract" as an attribute preprocessor |
|
184
|
42 |
|
foreach ($siteConfig['extract'] as $regexp) |
|
185
|
|
|
{ |
|
186
|
|
|
// Get the attributes filled by this regexp |
|
187
|
34 |
|
$attrRegexps = $tag->attributePreprocessors->add('url', $regexp)->getAttributes(); |
|
188
|
|
|
|
|
189
|
|
|
// For each named subpattern in the regexp, ensure that an attribute exists and |
|
190
|
|
|
// create it otherwise, using the subpattern as regexp filter |
|
191
|
34 |
|
foreach ($attrRegexps as $attrName => $attrRegexp) |
|
192
|
|
|
{ |
|
193
|
34 |
|
$attributes[$attrName]['regexp'] = $attrRegexp; |
|
194
|
|
|
} |
|
195
|
|
|
} |
|
196
|
|
|
|
|
197
|
|
|
// Overwrite attribute declarations |
|
198
|
42 |
|
if (isset($siteConfig['attributes'])) |
|
199
|
|
|
{ |
|
200
|
13 |
|
foreach ($siteConfig['attributes'] as $attrName => $attrConfig) |
|
201
|
|
|
{ |
|
202
|
13 |
|
foreach ($attrConfig as $configName => $configValue) |
|
203
|
|
|
{ |
|
204
|
13 |
|
$attributes[$attrName][$configName] = $configValue; |
|
205
|
|
|
} |
|
206
|
|
|
} |
|
207
|
|
|
} |
|
208
|
|
|
|
|
209
|
|
|
// Create the attributes |
|
210
|
42 |
|
$hasRequiredAttribute = false; |
|
211
|
42 |
|
foreach ($attributes as $attrName => $attrConfig) |
|
212
|
|
|
{ |
|
213
|
42 |
|
$attribute = $this->addAttribute($tag, $attrName, $attrConfig); |
|
214
|
42 |
|
$hasRequiredAttribute |= $attribute->required; |
|
215
|
|
|
} |
|
216
|
|
|
|
|
217
|
|
|
// If there is an attribute named "id" we'll append its regexp to the list of attribute |
|
218
|
|
|
// preprocessors in order to support both forms [site]<url>[/site] and [site]<id>[/site] |
|
219
|
40 |
|
if (isset($attributes['id']['regexp'])) |
|
220
|
|
|
{ |
|
221
|
|
|
// Add a named capture around the whole match |
|
222
|
34 |
|
$attrRegexp = preg_replace('(\\^(.*)\\$)s', "^(?'id'$1)$", $attributes['id']['regexp']); |
|
223
|
|
|
|
|
224
|
34 |
|
$tag->attributePreprocessors->add('url', $attrRegexp); |
|
225
|
|
|
} |
|
226
|
|
|
|
|
227
|
|
|
// If the tag definition does not have a required attribute, we use a filter to invalidate |
|
228
|
|
|
// the tag at parsing time if it does not have a non-default attribute. In other words, if |
|
229
|
|
|
// no attribute value is extracted, the tag is invalidated |
|
230
|
40 |
|
if (!$hasRequiredAttribute) |
|
231
|
|
|
{ |
|
232
|
10 |
|
$tag->filterChain |
|
233
|
10 |
|
->append([__NAMESPACE__ . '\\Parser', 'hasNonDefaultAttribute']) |
|
234
|
10 |
|
->setJS(file_get_contents(__DIR__ . '/Parser/hasNonDefaultAttribute.js')); |
|
235
|
|
|
} |
|
236
|
|
|
|
|
237
|
|
|
// Create a template for this media site based on the preferred rendering method |
|
238
|
40 |
|
$tag->template = $this->templateBuilder->build($siteId, $siteConfig) . $this->appendTemplate; |
|
239
|
|
|
|
|
240
|
|
|
// Normalize the tag's template |
|
241
|
40 |
|
$this->configurator->templateNormalizer->normalizeTag($tag); |
|
242
|
|
|
|
|
243
|
|
|
// Check the tag's safety |
|
244
|
40 |
|
$this->configurator->templateChecker->checkTag($tag); |
|
245
|
|
|
|
|
246
|
|
|
// Now add the tag to the list |
|
247
|
39 |
|
$this->configurator->tags->add($siteId, $tag); |
|
248
|
|
|
|
|
249
|
|
|
// Create a BBCode for this site if applicable |
|
250
|
39 |
|
if ($this->createIndividualBBCodes) |
|
251
|
|
|
{ |
|
252
|
1 |
|
$this->configurator->BBCodes->add( |
|
253
|
1 |
|
$siteId, |
|
254
|
|
|
[ |
|
255
|
1 |
|
'defaultAttribute' => 'url', |
|
256
|
|
|
'contentAttributes' => ['url'] |
|
257
|
|
|
] |
|
258
|
|
|
); |
|
259
|
|
|
} |
|
260
|
|
|
|
|
261
|
39 |
|
return $tag; |
|
262
|
|
|
} |
|
263
|
|
|
|
|
264
|
|
|
/** |
|
265
|
|
|
* Set a string to be appended to the templates used to render media sites |
|
266
|
|
|
* |
|
267
|
|
|
* @param string $template |
|
268
|
|
|
* @return void |
|
269
|
|
|
*/ |
|
270
|
2 |
|
public function appendTemplate($template = '') |
|
271
|
|
|
{ |
|
272
|
2 |
|
$this->appendTemplate = $this->configurator->templateNormalizer->normalizeTemplate($template); |
|
273
|
2 |
|
} |
|
274
|
|
|
|
|
275
|
|
|
//========================================================================== |
|
276
|
|
|
// Internal methods |
|
277
|
|
|
//========================================================================== |
|
278
|
|
|
|
|
279
|
|
|
/** |
|
280
|
|
|
* Add an attribute to given tag |
|
281
|
|
|
* |
|
282
|
|
|
* @param Tag $tag |
|
283
|
|
|
* @param string $attrName |
|
284
|
|
|
* @param array $attrConfig |
|
285
|
|
|
* @return Attribute |
|
286
|
|
|
*/ |
|
287
|
42 |
|
protected function addAttribute(Tag $tag, $attrName, array $attrConfig) |
|
288
|
|
|
{ |
|
289
|
42 |
|
$attribute = $tag->attributes->add($attrName); |
|
290
|
42 |
|
if (isset($attrConfig['preFilter'])) |
|
291
|
|
|
{ |
|
292
|
2 |
|
$this->appendFilter($attribute, $attrConfig['preFilter']); |
|
293
|
|
|
} |
|
294
|
|
|
|
|
295
|
|
|
// Add a filter depending on the attribute's type or regexp |
|
296
|
42 |
|
if (isset($attrConfig['type'])) |
|
297
|
|
|
{ |
|
298
|
|
|
// If "type" is "url", get the "#url" filter |
|
299
|
42 |
|
$filter = $this->configurator->attributeFilters['#' . $attrConfig['type']]; |
|
300
|
42 |
|
$attribute->filterChain->append($filter); |
|
301
|
|
|
} |
|
302
|
38 |
|
elseif (isset($attrConfig['regexp'])) |
|
303
|
|
|
{ |
|
304
|
38 |
|
$attribute->filterChain->append('#regexp')->setRegexp($attrConfig['regexp']); |
|
305
|
|
|
} |
|
306
|
|
|
|
|
307
|
42 |
|
if (isset($attrConfig['required'])) |
|
308
|
|
|
{ |
|
309
|
6 |
|
$attribute->required = $attrConfig['required']; |
|
310
|
|
|
} |
|
311
|
|
|
else |
|
312
|
|
|
{ |
|
313
|
|
|
// Non-id attributes are marked as optional |
|
314
|
42 |
|
$attribute->required = ($attrName === 'id'); |
|
315
|
|
|
} |
|
316
|
|
|
|
|
317
|
42 |
|
if (isset($attrConfig['postFilter'])) |
|
318
|
|
|
{ |
|
319
|
2 |
|
$this->appendFilter($attribute, $attrConfig['postFilter']); |
|
320
|
|
|
} |
|
321
|
|
|
|
|
322
|
42 |
|
if (isset($attrConfig['defaultValue'])) |
|
323
|
|
|
{ |
|
324
|
1 |
|
$attribute->defaultValue = $attrConfig['defaultValue']; |
|
325
|
|
|
} |
|
326
|
|
|
|
|
327
|
42 |
|
return $attribute; |
|
328
|
|
|
} |
|
329
|
|
|
|
|
330
|
|
|
/** |
|
331
|
|
|
* Add the defined scrapes to given tag |
|
332
|
|
|
* |
|
333
|
|
|
* @param array $scrapes Scraping definitions |
|
334
|
|
|
* @return array Attributes created from scraped data |
|
335
|
|
|
*/ |
|
336
|
42 |
|
protected function addScrapes(Tag $tag, array $scrapes) |
|
337
|
|
|
{ |
|
338
|
42 |
|
$attributes = []; |
|
339
|
42 |
|
$scrapeConfig = []; |
|
340
|
42 |
|
foreach ($scrapes as $scrape) |
|
341
|
|
|
{ |
|
342
|
|
|
// Collect the names of the attributes filled by this scrape. At runtime, we will |
|
343
|
|
|
// not scrape the content of the link if all of the attributes already have a value |
|
344
|
13 |
|
$attrNames = []; |
|
345
|
13 |
|
foreach ($scrape['extract'] as $extractRegexp) |
|
346
|
|
|
{ |
|
347
|
|
|
// Use an attribute preprocessor so we can reuse its routines |
|
348
|
13 |
|
$attributePreprocessor = new AttributePreprocessor($extractRegexp); |
|
349
|
|
|
|
|
350
|
13 |
|
foreach ($attributePreprocessor->getAttributes() as $attrName => $attrRegexp) |
|
351
|
|
|
{ |
|
352
|
13 |
|
$attrNames[] = $attrName; |
|
353
|
13 |
|
$attributes[$attrName]['regexp'] = $attrRegexp; |
|
354
|
|
|
} |
|
355
|
|
|
} |
|
356
|
|
|
|
|
357
|
|
|
// Deduplicate and sort the attribute names so that they look tidy |
|
358
|
13 |
|
$attrNames = array_unique($attrNames); |
|
359
|
13 |
|
sort($attrNames); |
|
360
|
|
|
|
|
361
|
|
|
// Prepare the scrape config and add the URL if applicable |
|
362
|
13 |
|
$entry = [$scrape['match'], $scrape['extract'], $attrNames]; |
|
363
|
13 |
|
if (isset($scrape['url'])) |
|
364
|
|
|
{ |
|
365
|
1 |
|
$entry[] = $scrape['url']; |
|
366
|
|
|
} |
|
367
|
|
|
|
|
368
|
|
|
// Add this scrape to the config |
|
369
|
13 |
|
$scrapeConfig[] = $entry; |
|
370
|
|
|
} |
|
371
|
|
|
|
|
372
|
|
|
// Add the scrape filter to this tag, execute it right before attributes are filtered, |
|
373
|
|
|
// which should be after attribute preprocessors are run. The offset is hardcoded here |
|
374
|
|
|
// for convenience (and because we know the filterChain is in its default state) and |
|
375
|
|
|
// since scraping is impossible in JavaScript without a PHP proxy, we just make it |
|
376
|
|
|
// return true in order to keep the tag valid |
|
377
|
42 |
|
$tag->filterChain->insert(1, __NAMESPACE__ . '\\Parser::scrape') |
|
378
|
42 |
|
->addParameterByName('scrapeConfig') |
|
379
|
42 |
|
->addParameterByName('cacheDir') |
|
380
|
42 |
|
->setVar('scrapeConfig', $scrapeConfig) |
|
381
|
42 |
|
->setJS('returnTrue'); |
|
382
|
|
|
|
|
383
|
42 |
|
return $attributes; |
|
384
|
|
|
} |
|
385
|
|
|
|
|
386
|
|
|
/** |
|
387
|
|
|
* Append a filter to an attribute's filterChain |
|
388
|
|
|
* |
|
389
|
|
|
* @param Attribute $attribute Target attribute |
|
390
|
|
|
* @param string $filter Filter's name |
|
391
|
|
|
* @return void |
|
392
|
|
|
*/ |
|
393
|
4 |
|
protected function appendFilter(Attribute $attribute, $filter) |
|
394
|
|
|
{ |
|
395
|
4 |
|
if (!in_array($filter, $this->allowedFilters, true)) |
|
396
|
|
|
{ |
|
397
|
2 |
|
throw new RuntimeException("Filter '" . $filter . "' is not allowed"); |
|
398
|
|
|
} |
|
399
|
|
|
|
|
400
|
2 |
|
$attribute->filterChain->append($this->configurator->attributeFilters[$filter]); |
|
401
|
2 |
|
} |
|
402
|
|
|
|
|
403
|
|
|
/** |
|
404
|
|
|
* Return the list of custom schemes supported via media sites |
|
405
|
|
|
* |
|
406
|
|
|
* @return string[] |
|
407
|
|
|
*/ |
|
408
|
6 |
|
protected function getSchemes() |
|
409
|
|
|
{ |
|
410
|
6 |
|
$schemes = []; |
|
411
|
6 |
|
foreach ($this->collection as $site) |
|
412
|
|
|
{ |
|
413
|
6 |
|
if (isset($site['scheme'])) |
|
414
|
|
|
{ |
|
415
|
3 |
|
foreach ((array) $site['scheme'] as $scheme) |
|
416
|
|
|
{ |
|
417
|
6 |
|
$schemes[] = $scheme; |
|
418
|
|
|
} |
|
419
|
|
|
} |
|
420
|
|
|
} |
|
421
|
|
|
|
|
422
|
6 |
|
return $schemes; |
|
423
|
|
|
} |
|
424
|
|
|
|
|
425
|
|
|
/** |
|
426
|
|
|
* Validate and normalize a site ID |
|
427
|
|
|
* |
|
428
|
|
|
* @param string $siteId |
|
429
|
|
|
* @return string |
|
430
|
|
|
*/ |
|
431
|
44 |
|
protected function normalizeId($siteId) |
|
432
|
|
|
{ |
|
433
|
44 |
|
$siteId = strtolower($siteId); |
|
434
|
|
|
|
|
435
|
44 |
|
if (!preg_match('(^[a-z0-9]+$)', $siteId)) |
|
436
|
|
|
{ |
|
437
|
1 |
|
throw new InvalidArgumentException('Invalid site ID'); |
|
438
|
|
|
} |
|
439
|
|
|
|
|
440
|
43 |
|
return $siteId; |
|
441
|
|
|
} |
|
442
|
|
|
} |