Completed
Push — master ( ffde0c...06920f )
by Josh
15:07
created

Configurator::addScrapes()   B

Complexity

Conditions 5
Paths 7

Size

Total Lines 49
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 22
CRAP Score 5

Importance

Changes 0
Metric Value
dl 0
loc 49
ccs 22
cts 22
cp 1
rs 8.5906
c 0
b 0
f 0
cc 5
eloc 22
nc 7
nop 2
crap 5
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\MediaEmbed;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Configurator\Helpers\RegexpBuilder;
13
use s9e\TextFormatter\Configurator\Items\Attribute;
14
use s9e\TextFormatter\Configurator\Items\AttributePreprocessor;
15
use s9e\TextFormatter\Configurator\Items\Tag;
16
use s9e\TextFormatter\Plugins\ConfiguratorBase;
17
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\Collections\CachedDefinitionCollection;
18
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\Collections\SiteCollection;
19
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\TemplateBuilder;
20
21
class Configurator extends ConfiguratorBase
22
{
23
	/**
24
	* @var array List of filters that are explicitly allowed in attribute definitions
25
	*/
26
	public $allowedFilters = [
27
		'hexdec',
28
		'stripslashes',
29
		'urldecode'
30
	];
31
32
	/**
33
	* @var SiteCollection Site collection
34
	*/
35
	protected $collection;
36
37
	/**
38
	* @var bool Whether to create the MEDIA BBCode
39
	*/
40
	protected $createMediaBBCode = true;
41
42
	/**
43
	* @var Configurator\Collections\SiteDefinitionCollection Default sites
44
	*/
45
	public $defaultSites;
46
47
	/**
48
	* @var string Name of the tag used to handle embeddable URLs
49
	*/
50
	protected $tagName = 'MEDIA';
51
52
	/**
53
	* @var TemplateBuilder
54
	*/
55
	protected $templateBuilder;
56
57
	/**
58
	* {@inheritdoc}
59
	*/
60 44
	protected function setUp()
61
	{
62
		// Create a collection to store the configured sites
63 44
		$this->collection = new SiteCollection;
64
65
		// Register the collection as a variable to be used during parsing
66 44
		$this->configurator->registeredVars['mediasites'] = $this->collection;
67
68
		// Create a MEDIA tag
69 44
		$tag = $this->configurator->tags->add($this->tagName);
70
71
		// This tag should not need to be closed and should not contain itself
72 44
		$tag->rules->autoClose();
73 44
		$tag->rules->denyChild($this->tagName);
74
75
		// Empty this tag's filter chain and add our tag filter
76 44
		$tag->filterChain->clear();
77 44
		$tag->filterChain
78 44
		    ->append([__NAMESPACE__ . '\\Parser', 'filterTag'])
79 44
		    ->addParameterByName('parser')
80 44
		    ->addParameterByName('mediasites')
81 44
		    ->setJS(file_get_contents(__DIR__ . '/Parser/tagFilter.js'));
82
83
		// Create a [MEDIA] BBCode if applicable
84 44
		if ($this->createMediaBBCode)
85
		{
86 43
			$this->configurator->BBCodes->set(
87 43
				$this->tagName,
88
				[
89 43
					'contentAttributes' => ['url'],
90
					'defaultAttribute'  => 'site'
91
				]
92
			);
93
		}
94
95 44
		if (!isset($this->defaultSites))
96
		{
97 44
			$this->defaultSites = new CachedDefinitionCollection;
98
		}
99
100 44
		$this->templateBuilder = new TemplateBuilder;
101 44
	}
102
103
	/**
104
	* {@inheritdoc}
105
	*/
106 7
	public function asConfig()
107
	{
108 7
		if (!count($this->collection))
109
		{
110 1
			return;
111
		}
112
113 6
		$regexp  = 'https?:\\/\\/';
114 6
		$schemes = $this->getSchemes();
115 6
		if (!empty($schemes))
116
		{
117 3
			$regexp = '(?>' . RegexpBuilder::fromList($schemes) . ':|' . $regexp . ')';
118
		}
119
120
		return [
121 6
			'quickMatch' => (empty($schemes)) ? '://' : ':',
122 6
			'regexp'     => '/\\b' . $regexp . '[^["\'\\s]+/Si',
123 6
			'tagName'    => $this->tagName
124
		];
125
	}
126
127
	//==========================================================================
128
	// Public API
129
	//==========================================================================
130
131
	/**
132
	* Add a media site
133
	*
134
	* @param  string $siteId     Site's ID
135
	* @param  array  $siteConfig Site's config
136
	* @return Tag                Tag created for this site
137
	*/
138 40
	public function add($siteId, array $siteConfig = null)
139
	{
140
		// Normalize the site ID
141 40
		$siteId = $this->normalizeId($siteId);
142
143
		// Normalize or retrieve the site definition
144 39
		$siteConfig = (isset($siteConfig)) ? $this->defaultSites->normalizeValue($siteConfig) : $this->defaultSites->get($siteId);
145
146
		// Add this site to the list
147 38
		$this->collection[$siteId] = $siteConfig;
148
149
		// Create the tag for this site
150 38
		$tag = new Tag;
151
152
		// This tag should not need to be closed and should not contain itself or the MEDIA tag.
153
		// We allow URL as a child to be used as fallback
154 38
		$tag->rules->allowChild('URL');
155 38
		$tag->rules->autoClose();
156 38
		$tag->rules->denyChild($siteId);
157 38
		$tag->rules->denyChild($this->tagName);
158
159
		// Store attributes' configuration, starting with a default "url" attribute to store the
160
		// original URL if applicable
161
		$attributes = [
162 38
			'url' => ['type' => 'url']
163
		];
164
165
		// Process the "scrape" directives
166 38
		$attributes += $this->addScrapes($tag, $siteConfig['scrape']);
167
168
		// Add each "extract" as an attribute preprocessor
169 38
		foreach ($siteConfig['extract'] as $regexp)
170
		{
171
			// Get the attributes filled by this regexp
172 30
			$attrRegexps = $tag->attributePreprocessors->add('url', $regexp)->getAttributes();
173
174
			// For each named subpattern in the regexp, ensure that an attribute exists and
175
			// create it otherwise, using the subpattern as regexp filter
176 30
			foreach ($attrRegexps as $attrName => $attrRegexp)
177
			{
178 30
				$attributes[$attrName]['regexp'] = $attrRegexp;
179
			}
180
		}
181
182
		// Overwrite attribute declarations
183 38
		if (isset($siteConfig['attributes']))
184
		{
185 9
			foreach ($siteConfig['attributes'] as $attrName => $attrConfig)
186
			{
187 9
				foreach ($attrConfig as $configName => $configValue)
188
				{
189 9
					$attributes[$attrName][$configName] = $configValue;
190
				}
191
			}
192
		}
193
194
		// Create the attributes
195 38
		$hasRequiredAttribute = false;
196 38
		foreach ($attributes as $attrName => $attrConfig)
197
		{
198 38
			$attribute = $this->addAttribute($tag, $attrName, $attrConfig);
199 38
			$hasRequiredAttribute |= $attribute->required;
200
		}
201
202
		// If there is an attribute named "id" we'll append its regexp to the list of attribute
203
		// preprocessors in order to support both forms [site]<url>[/site] and [site]<id>[/site]
204 36
		if (isset($attributes['id']['regexp']))
205
		{
206
			// Add a named capture around the whole match
207 30
			$attrRegexp = preg_replace('(\\^(.*)\\$)s', "^(?'id'$1)$", $attributes['id']['regexp']);
208
209 30
			$tag->attributePreprocessors->add('url', $attrRegexp);
210
		}
211
212
		// If the tag definition does not have a required attribute, we use a filter to invalidate
213
		// the tag at parsing time if it does not have a non-default attribute. In other words, if
214
		// no attribute value is extracted, the tag is invalidated
215 36
		if (!$hasRequiredAttribute)
216
		{
217 10
			$tag->filterChain
218 10
				->append([__NAMESPACE__ . '\\Parser', 'hasNonDefaultAttribute'])
219 10
				->setJS(file_get_contents(__DIR__ . '/Parser/hasNonDefaultAttribute.js'));
220
		}
221
222
		// Create a template for this media site based on the preferred rendering method
223 36
		$tag->template = $this->templateBuilder->build($siteId, $siteConfig);
224
225
		// Normalize the tag's template
226 36
		$this->configurator->templateNormalizer->normalizeTag($tag);
227
228
		// Check the tag's safety
229 36
		$this->configurator->templateChecker->checkTag($tag);
230
231
		// Now add the tag to the list
232 35
		$this->configurator->tags->add($siteId, $tag);
233
234 35
		return $tag;
235
	}
236
237
	//==========================================================================
238
	// Internal methods
239
	//==========================================================================
240
241
	/**
242
	* Add an attribute to given tag
243
	*
244
	* @param  Tag       $tag
245
	* @param  string    $attrName
246
	* @param  array     $attrConfig
247
	* @return Attribute
248
	*/
249 38
	protected function addAttribute(Tag $tag, $attrName, array $attrConfig)
250
	{
251 38
		$attribute = $tag->attributes->add($attrName);
252 38
		if (isset($attrConfig['preFilter']))
253
		{
254 2
			$this->appendFilter($attribute, $attrConfig['preFilter']);
255
		}
256
257
		// Add a filter depending on the attribute's type or regexp
258 38
		if (isset($attrConfig['type']))
259
		{
260
			// If "type" is "url", get the "#url" filter
261 38
			$filter = $this->configurator->attributeFilters['#' . $attrConfig['type']];
262 38
			$attribute->filterChain->append($filter);
263
		}
264 34
		elseif (isset($attrConfig['regexp']))
265
		{
266 34
			$attribute->filterChain->append('#regexp')->setRegexp($attrConfig['regexp']);
267
		}
268
269 38
		if (isset($attrConfig['required']))
270
		{
271 6
			$attribute->required = $attrConfig['required'];
272
		}
273
		else
274
		{
275
			// Non-id attributes are marked as optional
276 38
			$attribute->required = ($attrName === 'id');
277
		}
278
279 38
		if (isset($attrConfig['postFilter']))
280
		{
281 2
			$this->appendFilter($attribute, $attrConfig['postFilter']);
282
		}
283
284 38
		if (isset($attrConfig['defaultValue']))
285
		{
286 1
			$attribute->defaultValue = $attrConfig['defaultValue'];
287
		}
288
289 38
		return $attribute;
290
	}
291
292
	/**
293
	* Add the defined scrapes to given tag
294
	*
295
	* @param  array $scrapes Scraping definitions
296
	* @return array          Attributes created from scraped data
297
	*/
298 38
	protected function addScrapes(Tag $tag, array $scrapes)
299
	{
300 38
		$attributes   = [];
301 38
		$scrapeConfig = [];
302 38
		foreach ($scrapes as $scrape)
303
		{
304
			// Collect the names of the attributes filled by this scrape. At runtime, we will
305
			// not scrape the content of the link if all of the attributes already have a value
306 9
			$attrNames = [];
307 9
			foreach ($scrape['extract'] as $extractRegexp)
308
			{
309
				// Use an attribute preprocessor so we can reuse its routines
310 9
				$attributePreprocessor = new AttributePreprocessor($extractRegexp);
311
312 9
				foreach ($attributePreprocessor->getAttributes() as $attrName => $attrRegexp)
313
				{
314 9
					$attrNames[] = $attrName;
315 9
					$attributes[$attrName]['regexp'] = $attrRegexp;
316
				}
317
			}
318
319
			// Deduplicate and sort the attribute names so that they look tidy
320 9
			$attrNames = array_unique($attrNames);
321 9
			sort($attrNames);
322
323
			// Prepare the scrape config and add the URL if applicable
324 9
			$entry = [$scrape['match'], $scrape['extract'], $attrNames];
325 9
			if (isset($scrape['url']))
326
			{
327 1
				$entry[] = $scrape['url'];
328
			}
329
330
			// Add this scrape to the config
331 9
			$scrapeConfig[] = $entry;
332
		}
333
334
		// Add the scrape filter to this tag, execute it right before attributes are filtered,
335
		// which should be after attribute preprocessors are run. The offset is hardcoded here
336
		// for convenience (and because we know the filterChain is in its default state) and
337
		// since scraping is impossible in JavaScript without a PHP proxy, we just make it
338
		// return true in order to keep the tag valid
339 38
		$tag->filterChain->insert(1, __NAMESPACE__ . '\\Parser::scrape')
340 38
		                 ->addParameterByName('scrapeConfig')
341 38
		                 ->addParameterByName('cacheDir')
342 38
		                 ->setVar('scrapeConfig', $scrapeConfig)
343 38
		                 ->setJS('returnTrue');
344
345 38
		return $attributes;
346
	}
347
348
	/**
349
	* Append a filter to an attribute's filterChain
350
	*
351
	* @param  Attribute $attribute Target attribute
352
	* @param  string    $filter    Filter's name
353
	* @return void
354
	*/
355 4
	protected function appendFilter(Attribute $attribute, $filter)
356
	{
357 4
		if (!in_array($filter, $this->allowedFilters, true))
358
		{
359 2
			throw new RuntimeException("Filter '" . $filter . "' is not allowed");
360
		}
361
362 2
		$attribute->filterChain->append($this->configurator->attributeFilters[$filter]);
363 2
	}
364
365
	/**
366
	* Return the list of custom schemes supported via media sites
367
	*
368
	* @return string[]
369
	*/
370 6
	protected function getSchemes()
371
	{
372 6
		$schemes = [];
373 6
		foreach ($this->collection as $site)
374
		{
375 6
			if (isset($site['scheme']))
376
			{
377 3
				foreach ((array) $site['scheme'] as $scheme)
378
				{
379 6
					$schemes[] = $scheme;
380
				}
381
			}
382
		}
383
384 6
		return $schemes;
385
	}
386
387
	/**
388
	* Validate and normalize a site ID
389
	*
390
	* @param  string $siteId
391
	* @return string
392
	*/
393 40
	protected function normalizeId($siteId)
394
	{
395 40
		$siteId = strtolower($siteId);
396
397 40
		if (!preg_match('(^[a-z0-9]+$)', $siteId))
398
		{
399 1
			throw new InvalidArgumentException('Invalid site ID');
400
		}
401
402 39
		return $siteId;
403
	}
404
}