1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @package s9e\TextFormatter |
5
|
|
|
* @copyright Copyright (c) 2010-2017 The s9e Authors |
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
7
|
|
|
*/ |
8
|
|
|
namespace s9e\TextFormatter\Plugins\MediaEmbed; |
9
|
|
|
|
10
|
|
|
use InvalidArgumentException; |
11
|
|
|
use RuntimeException; |
12
|
|
|
use s9e\TextFormatter\Configurator\Helpers\RegexpBuilder; |
13
|
|
|
use s9e\TextFormatter\Configurator\Items\Attribute; |
14
|
|
|
use s9e\TextFormatter\Configurator\Items\AttributePreprocessor; |
15
|
|
|
use s9e\TextFormatter\Configurator\Items\Tag; |
16
|
|
|
use s9e\TextFormatter\Plugins\ConfiguratorBase; |
17
|
|
|
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\Collections\CachedDefinitionCollection; |
18
|
|
|
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\Collections\SiteCollection; |
19
|
|
|
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\TemplateBuilder; |
20
|
|
|
|
21
|
|
|
class Configurator extends ConfiguratorBase |
22
|
|
|
{ |
23
|
|
|
/** |
24
|
|
|
* @var array List of filters that are explicitly allowed in attribute definitions |
25
|
|
|
*/ |
26
|
|
|
public $allowedFilters = [ |
27
|
|
|
'hexdec', |
28
|
|
|
'stripslashes', |
29
|
|
|
'urldecode' |
30
|
|
|
]; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @var string String to be appended to the templates used to render media sites |
34
|
|
|
*/ |
35
|
|
|
protected $appendTemplate = ''; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* @var bool Whether to replace unformatted URLs in text with embedded content |
39
|
|
|
*/ |
40
|
|
|
public $captureURLs = true; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* @var SiteCollection Site collection |
44
|
|
|
*/ |
45
|
|
|
protected $collection; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* @var bool Whether to create the MEDIA BBCode |
49
|
|
|
*/ |
50
|
|
|
protected $createMediaBBCode = true; |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* @var bool Whether to create a BBCode for each site |
54
|
|
|
*/ |
55
|
|
|
public $createIndividualBBCodes = false; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @var Configurator\Collections\SiteDefinitionCollection Default sites |
59
|
|
|
*/ |
60
|
|
|
public $defaultSites; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* @var string Name of the tag used to handle embeddable URLs |
64
|
|
|
*/ |
65
|
|
|
protected $tagName = 'MEDIA'; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @var TemplateBuilder |
69
|
|
|
*/ |
70
|
|
|
protected $templateBuilder; |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* {@inheritdoc} |
74
|
|
|
*/ |
75
|
49 |
|
protected function setUp() |
76
|
|
|
{ |
77
|
|
|
// Create a collection to store the configured sites |
78
|
49 |
|
$this->collection = new SiteCollection; |
79
|
|
|
|
80
|
|
|
// Register the collection as a variable to be used during parsing |
81
|
49 |
|
$this->configurator->registeredVars['mediasites'] = $this->collection; |
82
|
|
|
|
83
|
|
|
// Create a MEDIA tag |
84
|
49 |
|
$tag = $this->configurator->tags->add($this->tagName); |
85
|
|
|
|
86
|
|
|
// This tag should not need to be closed and should not contain itself |
87
|
49 |
|
$tag->rules->autoClose(); |
88
|
49 |
|
$tag->rules->denyChild($this->tagName); |
89
|
|
|
|
90
|
|
|
// Empty this tag's filter chain and add our tag filter |
91
|
49 |
|
$tag->filterChain->clear(); |
92
|
49 |
|
$tag->filterChain |
93
|
49 |
|
->append([__NAMESPACE__ . '\\Parser', 'filterTag']) |
94
|
49 |
|
->addParameterByName('parser') |
95
|
49 |
|
->addParameterByName('mediasites') |
96
|
49 |
|
->setJS(file_get_contents(__DIR__ . '/Parser/tagFilter.js')); |
97
|
|
|
|
98
|
|
|
// Create a [MEDIA] BBCode if applicable |
99
|
49 |
|
if ($this->createMediaBBCode) |
100
|
|
|
{ |
101
|
48 |
|
$this->configurator->BBCodes->set( |
102
|
48 |
|
$this->tagName, |
103
|
|
|
[ |
104
|
48 |
|
'contentAttributes' => ['url'], |
105
|
|
|
'defaultAttribute' => 'site' |
106
|
|
|
] |
107
|
|
|
); |
108
|
|
|
} |
109
|
|
|
|
110
|
49 |
|
if (!isset($this->defaultSites)) |
111
|
|
|
{ |
112
|
49 |
|
$this->defaultSites = new CachedDefinitionCollection; |
113
|
|
|
} |
114
|
|
|
|
115
|
49 |
|
$this->templateBuilder = new TemplateBuilder; |
116
|
49 |
|
} |
117
|
|
|
|
118
|
|
|
/** |
119
|
|
|
* {@inheritdoc} |
120
|
|
|
*/ |
121
|
8 |
|
public function asConfig() |
122
|
|
|
{ |
123
|
8 |
|
if (!$this->captureURLs || !count($this->collection)) |
124
|
|
|
{ |
125
|
2 |
|
return; |
126
|
|
|
} |
127
|
|
|
|
128
|
6 |
|
$regexp = 'https?:\\/\\/'; |
129
|
6 |
|
$schemes = $this->getSchemes(); |
130
|
6 |
|
if (!empty($schemes)) |
131
|
|
|
{ |
132
|
3 |
|
$regexp = '(?>' . RegexpBuilder::fromList($schemes) . ':|' . $regexp . ')'; |
133
|
|
|
} |
134
|
|
|
|
135
|
|
|
return [ |
136
|
6 |
|
'quickMatch' => (empty($schemes)) ? '://' : ':', |
137
|
6 |
|
'regexp' => '/\\b' . $regexp . '[^["\'\\s]+/Si', |
138
|
6 |
|
'tagName' => $this->tagName |
139
|
|
|
]; |
140
|
|
|
} |
141
|
|
|
|
142
|
|
|
//========================================================================== |
143
|
|
|
// Public API |
144
|
|
|
//========================================================================== |
145
|
|
|
|
146
|
|
|
/** |
147
|
|
|
* Add a media site |
148
|
|
|
* |
149
|
|
|
* @param string $siteId Site's ID |
150
|
|
|
* @param array $siteConfig Site's config |
151
|
|
|
* @return Tag Tag created for this site |
152
|
|
|
*/ |
153
|
44 |
|
public function add($siteId, array $siteConfig = null) |
154
|
|
|
{ |
155
|
|
|
// Normalize the site ID |
156
|
44 |
|
$siteId = $this->normalizeId($siteId); |
157
|
|
|
|
158
|
|
|
// Normalize or retrieve the site definition |
159
|
43 |
|
$siteConfig = (isset($siteConfig)) ? $this->defaultSites->normalizeValue($siteConfig) : $this->defaultSites->get($siteId); |
160
|
|
|
|
161
|
|
|
// Add this site to the list |
162
|
42 |
|
$this->collection[$siteId] = $siteConfig; |
163
|
|
|
|
164
|
|
|
// Create the tag for this site |
165
|
42 |
|
$tag = new Tag; |
166
|
|
|
|
167
|
|
|
// This tag should not need to be closed and should not contain itself or the MEDIA tag. |
168
|
|
|
// We allow URL as a child to be used as fallback |
169
|
42 |
|
$tag->rules->allowChild('URL'); |
170
|
42 |
|
$tag->rules->autoClose(); |
171
|
42 |
|
$tag->rules->denyChild($siteId); |
172
|
42 |
|
$tag->rules->denyChild($this->tagName); |
173
|
|
|
|
174
|
|
|
// Store attributes' configuration, starting with a default "url" attribute to store the |
175
|
|
|
// original URL if applicable |
176
|
|
|
$attributes = [ |
177
|
42 |
|
'url' => ['type' => 'url'] |
178
|
|
|
]; |
179
|
|
|
|
180
|
|
|
// Process the "scrape" directives |
181
|
42 |
|
$attributes += $this->addScrapes($tag, $siteConfig['scrape']); |
182
|
|
|
|
183
|
|
|
// Add each "extract" as an attribute preprocessor |
184
|
42 |
|
foreach ($siteConfig['extract'] as $regexp) |
185
|
|
|
{ |
186
|
|
|
// Get the attributes filled by this regexp |
187
|
34 |
|
$attrRegexps = $tag->attributePreprocessors->add('url', $regexp)->getAttributes(); |
188
|
|
|
|
189
|
|
|
// For each named subpattern in the regexp, ensure that an attribute exists and |
190
|
|
|
// create it otherwise, using the subpattern as regexp filter |
191
|
34 |
|
foreach ($attrRegexps as $attrName => $attrRegexp) |
192
|
|
|
{ |
193
|
34 |
|
$attributes[$attrName]['regexp'] = $attrRegexp; |
194
|
|
|
} |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
// Overwrite attribute declarations |
198
|
42 |
|
if (isset($siteConfig['attributes'])) |
199
|
|
|
{ |
200
|
13 |
|
foreach ($siteConfig['attributes'] as $attrName => $attrConfig) |
201
|
|
|
{ |
202
|
13 |
|
foreach ($attrConfig as $configName => $configValue) |
203
|
|
|
{ |
204
|
13 |
|
$attributes[$attrName][$configName] = $configValue; |
205
|
|
|
} |
206
|
|
|
} |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
// Create the attributes |
210
|
42 |
|
$hasRequiredAttribute = false; |
211
|
42 |
|
foreach ($attributes as $attrName => $attrConfig) |
212
|
|
|
{ |
213
|
42 |
|
$attribute = $this->addAttribute($tag, $attrName, $attrConfig); |
214
|
42 |
|
$hasRequiredAttribute |= $attribute->required; |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
// If there is an attribute named "id" we'll append its regexp to the list of attribute |
218
|
|
|
// preprocessors in order to support both forms [site]<url>[/site] and [site]<id>[/site] |
219
|
40 |
|
if (isset($attributes['id']['regexp'])) |
220
|
|
|
{ |
221
|
|
|
// Add a named capture around the whole match |
222
|
34 |
|
$attrRegexp = preg_replace('(\\^(.*)\\$)s', "^(?'id'$1)$", $attributes['id']['regexp']); |
223
|
|
|
|
224
|
34 |
|
$tag->attributePreprocessors->add('url', $attrRegexp); |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
// If the tag definition does not have a required attribute, we use a filter to invalidate |
228
|
|
|
// the tag at parsing time if it does not have a non-default attribute. In other words, if |
229
|
|
|
// no attribute value is extracted, the tag is invalidated |
230
|
40 |
|
if (!$hasRequiredAttribute) |
231
|
|
|
{ |
232
|
10 |
|
$tag->filterChain |
233
|
10 |
|
->append([__NAMESPACE__ . '\\Parser', 'hasNonDefaultAttribute']) |
234
|
10 |
|
->setJS(file_get_contents(__DIR__ . '/Parser/hasNonDefaultAttribute.js')); |
235
|
|
|
} |
236
|
|
|
|
237
|
|
|
// Create a template for this media site based on the preferred rendering method |
238
|
40 |
|
$tag->template = $this->templateBuilder->build($siteId, $siteConfig) . $this->appendTemplate; |
239
|
|
|
|
240
|
|
|
// Normalize the tag's template |
241
|
40 |
|
$this->configurator->templateNormalizer->normalizeTag($tag); |
242
|
|
|
|
243
|
|
|
// Check the tag's safety |
244
|
40 |
|
$this->configurator->templateChecker->checkTag($tag); |
245
|
|
|
|
246
|
|
|
// Now add the tag to the list |
247
|
39 |
|
$this->configurator->tags->add($siteId, $tag); |
248
|
|
|
|
249
|
|
|
// Create a BBCode for this site if applicable |
250
|
39 |
|
if ($this->createIndividualBBCodes) |
251
|
|
|
{ |
252
|
1 |
|
$this->configurator->BBCodes->add( |
253
|
1 |
|
$siteId, |
254
|
|
|
[ |
255
|
1 |
|
'defaultAttribute' => 'url', |
256
|
|
|
'contentAttributes' => ['url'] |
257
|
|
|
] |
258
|
|
|
); |
259
|
|
|
} |
260
|
|
|
|
261
|
39 |
|
return $tag; |
262
|
|
|
} |
263
|
|
|
|
264
|
|
|
/** |
265
|
|
|
* Set a string to be appended to the templates used to render media sites |
266
|
|
|
* |
267
|
|
|
* @param string $template |
268
|
|
|
* @return void |
269
|
|
|
*/ |
270
|
2 |
|
public function appendTemplate($template = '') |
271
|
|
|
{ |
272
|
2 |
|
$this->appendTemplate = $this->configurator->templateNormalizer->normalizeTemplate($template); |
273
|
2 |
|
} |
274
|
|
|
|
275
|
|
|
//========================================================================== |
276
|
|
|
// Internal methods |
277
|
|
|
//========================================================================== |
278
|
|
|
|
279
|
|
|
/** |
280
|
|
|
* Add an attribute to given tag |
281
|
|
|
* |
282
|
|
|
* @param Tag $tag |
283
|
|
|
* @param string $attrName |
284
|
|
|
* @param array $attrConfig |
285
|
|
|
* @return Attribute |
286
|
|
|
*/ |
287
|
42 |
|
protected function addAttribute(Tag $tag, $attrName, array $attrConfig) |
288
|
|
|
{ |
289
|
42 |
|
$attribute = $tag->attributes->add($attrName); |
290
|
42 |
|
if (isset($attrConfig['preFilter'])) |
291
|
|
|
{ |
292
|
2 |
|
$this->appendFilter($attribute, $attrConfig['preFilter']); |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
// Add a filter depending on the attribute's type or regexp |
296
|
42 |
|
if (isset($attrConfig['type'])) |
297
|
|
|
{ |
298
|
|
|
// If "type" is "url", get the "#url" filter |
299
|
42 |
|
$filter = $this->configurator->attributeFilters['#' . $attrConfig['type']]; |
300
|
42 |
|
$attribute->filterChain->append($filter); |
301
|
|
|
} |
302
|
38 |
|
elseif (isset($attrConfig['regexp'])) |
303
|
|
|
{ |
304
|
38 |
|
$attribute->filterChain->append('#regexp')->setRegexp($attrConfig['regexp']); |
305
|
|
|
} |
306
|
|
|
|
307
|
42 |
|
if (isset($attrConfig['required'])) |
308
|
|
|
{ |
309
|
6 |
|
$attribute->required = $attrConfig['required']; |
310
|
|
|
} |
311
|
|
|
else |
312
|
|
|
{ |
313
|
|
|
// Non-id attributes are marked as optional |
314
|
42 |
|
$attribute->required = ($attrName === 'id'); |
315
|
|
|
} |
316
|
|
|
|
317
|
42 |
|
if (isset($attrConfig['postFilter'])) |
318
|
|
|
{ |
319
|
2 |
|
$this->appendFilter($attribute, $attrConfig['postFilter']); |
320
|
|
|
} |
321
|
|
|
|
322
|
42 |
|
if (isset($attrConfig['defaultValue'])) |
323
|
|
|
{ |
324
|
1 |
|
$attribute->defaultValue = $attrConfig['defaultValue']; |
325
|
|
|
} |
326
|
|
|
|
327
|
42 |
|
return $attribute; |
328
|
|
|
} |
329
|
|
|
|
330
|
|
|
/** |
331
|
|
|
* Add the defined scrapes to given tag |
332
|
|
|
* |
333
|
|
|
* @param array $scrapes Scraping definitions |
334
|
|
|
* @return array Attributes created from scraped data |
335
|
|
|
*/ |
336
|
42 |
|
protected function addScrapes(Tag $tag, array $scrapes) |
337
|
|
|
{ |
338
|
42 |
|
$attributes = []; |
339
|
42 |
|
$scrapeConfig = []; |
340
|
42 |
|
foreach ($scrapes as $scrape) |
341
|
|
|
{ |
342
|
|
|
// Collect the names of the attributes filled by this scrape. At runtime, we will |
343
|
|
|
// not scrape the content of the link if all of the attributes already have a value |
344
|
13 |
|
$attrNames = []; |
345
|
13 |
|
foreach ($scrape['extract'] as $extractRegexp) |
346
|
|
|
{ |
347
|
|
|
// Use an attribute preprocessor so we can reuse its routines |
348
|
13 |
|
$attributePreprocessor = new AttributePreprocessor($extractRegexp); |
349
|
|
|
|
350
|
13 |
|
foreach ($attributePreprocessor->getAttributes() as $attrName => $attrRegexp) |
351
|
|
|
{ |
352
|
13 |
|
$attrNames[] = $attrName; |
353
|
13 |
|
$attributes[$attrName]['regexp'] = $attrRegexp; |
354
|
|
|
} |
355
|
|
|
} |
356
|
|
|
|
357
|
|
|
// Deduplicate and sort the attribute names so that they look tidy |
358
|
13 |
|
$attrNames = array_unique($attrNames); |
359
|
13 |
|
sort($attrNames); |
360
|
|
|
|
361
|
|
|
// Prepare the scrape config and add the URL if applicable |
362
|
13 |
|
$entry = [$scrape['match'], $scrape['extract'], $attrNames]; |
363
|
13 |
|
if (isset($scrape['url'])) |
364
|
|
|
{ |
365
|
1 |
|
$entry[] = $scrape['url']; |
366
|
|
|
} |
367
|
|
|
|
368
|
|
|
// Add this scrape to the config |
369
|
13 |
|
$scrapeConfig[] = $entry; |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
// Add the scrape filter to this tag, execute it right before attributes are filtered, |
373
|
|
|
// which should be after attribute preprocessors are run. The offset is hardcoded here |
374
|
|
|
// for convenience (and because we know the filterChain is in its default state) and |
375
|
|
|
// since scraping is impossible in JavaScript without a PHP proxy, we just make it |
376
|
|
|
// return true in order to keep the tag valid |
377
|
42 |
|
$tag->filterChain->insert(1, __NAMESPACE__ . '\\Parser::scrape') |
378
|
42 |
|
->addParameterByName('scrapeConfig') |
379
|
42 |
|
->addParameterByName('cacheDir') |
380
|
42 |
|
->setVar('scrapeConfig', $scrapeConfig) |
381
|
42 |
|
->setJS('returnTrue'); |
382
|
|
|
|
383
|
42 |
|
return $attributes; |
384
|
|
|
} |
385
|
|
|
|
386
|
|
|
/** |
387
|
|
|
* Append a filter to an attribute's filterChain |
388
|
|
|
* |
389
|
|
|
* @param Attribute $attribute Target attribute |
390
|
|
|
* @param string $filter Filter's name |
391
|
|
|
* @return void |
392
|
|
|
*/ |
393
|
4 |
|
protected function appendFilter(Attribute $attribute, $filter) |
394
|
|
|
{ |
395
|
4 |
|
if (!in_array($filter, $this->allowedFilters, true)) |
396
|
|
|
{ |
397
|
2 |
|
throw new RuntimeException("Filter '" . $filter . "' is not allowed"); |
398
|
|
|
} |
399
|
|
|
|
400
|
2 |
|
$attribute->filterChain->append($this->configurator->attributeFilters[$filter]); |
401
|
2 |
|
} |
402
|
|
|
|
403
|
|
|
/** |
404
|
|
|
* Return the list of custom schemes supported via media sites |
405
|
|
|
* |
406
|
|
|
* @return string[] |
407
|
|
|
*/ |
408
|
6 |
|
protected function getSchemes() |
409
|
|
|
{ |
410
|
6 |
|
$schemes = []; |
411
|
6 |
|
foreach ($this->collection as $site) |
412
|
|
|
{ |
413
|
6 |
|
if (isset($site['scheme'])) |
414
|
|
|
{ |
415
|
3 |
|
foreach ((array) $site['scheme'] as $scheme) |
416
|
|
|
{ |
417
|
6 |
|
$schemes[] = $scheme; |
418
|
|
|
} |
419
|
|
|
} |
420
|
|
|
} |
421
|
|
|
|
422
|
6 |
|
return $schemes; |
423
|
|
|
} |
424
|
|
|
|
425
|
|
|
/** |
426
|
|
|
* Validate and normalize a site ID |
427
|
|
|
* |
428
|
|
|
* @param string $siteId |
429
|
|
|
* @return string |
430
|
|
|
*/ |
431
|
44 |
|
protected function normalizeId($siteId) |
432
|
|
|
{ |
433
|
44 |
|
$siteId = strtolower($siteId); |
434
|
|
|
|
435
|
44 |
|
if (!preg_match('(^[a-z0-9]+$)', $siteId)) |
436
|
|
|
{ |
437
|
1 |
|
throw new InvalidArgumentException('Invalid site ID'); |
438
|
|
|
} |
439
|
|
|
|
440
|
43 |
|
return $siteId; |
441
|
|
|
} |
442
|
|
|
} |