Completed
Push — master ( 788152...8092ea )
by Josh
14:09
created

Configurator::convertScrapeConfig()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 7
ccs 4
cts 4
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 4
nc 1
nop 1
crap 1
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\MediaEmbed;
9
10
use InvalidArgumentException;
11
use RuntimeException;
12
use s9e\TextFormatter\Configurator\Items\Regexp;
13
use s9e\TextFormatter\Configurator\Items\Tag;
14
use s9e\TextFormatter\Configurator\JavaScript\Dictionary;
15
use s9e\TextFormatter\Plugins\ConfiguratorBase;
16
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\Collections\CachedDefinitionCollection;
17
use s9e\TextFormatter\Plugins\MediaEmbed\Configurator\TemplateBuilder;
18
19
class Configurator extends ConfiguratorBase
20
{
21
	/**
22
	* @var array List of filters that are explicitly allowed in attribute definitions
23
	*/
24
	public $allowedFilters = ['stripslashes', 'urldecode'];
25
26
	/**
27
	* @var bool Whether to create the MEDIA BBCode
28
	*/
29
	protected $createMediaBBCode = true;
30
31
	/**
32
	* @var Configurator\Collections\SiteDefinitionCollection Default sites
33
	*/
34
	public $defaultSites;
35
36
	/**
37
	* {@inheritdoc}
38
	*/
39
	protected $quickMatch = '://';
40
41
	/**
42
	* {@inheritdoc}
43
	*/
44
	protected $regexp = '/\\bhttps?:\\/\\/[^["\'\\s]+/Si';
45
46
	/**
47
	* @var array Configured sites
48
	*/
49
	protected $sites = [];
50
51
	/**
52
	* @var string Name of the tag used to handle embeddable URLs
53
	*/
54
	protected $tagName = 'MEDIA';
55
56
	/**
57
	* @var TemplateBuilder
58
	*/
59
	protected $templateBuilder;
60
61
	/**
62
	* {@inheritdoc}
63
	*/
64 24
	protected function setUp()
65
	{
66 24
		$this->defaultSites    = new CachedDefinitionCollection;
67 24
		$this->templateBuilder = new TemplateBuilder;
68
69
		// Create a MEDIA tag
70 24
		$this->createMediaTag();
71
72
		// Create a [MEDIA] BBCode if applicable
73 24
		if ($this->createMediaBBCode)
74
		{
75 23
			$this->configurator->BBCodes->set($this->tagName, ['contentAttributes' => ['url']]);
76
		}
77 24
	}
78
79
	/**
80
	* {@inheritdoc}
81
	*/
82 5
	public function asConfig()
83
	{
84 5
		if (empty($this->sites))
85
		{
86 1
			return;
87
		}
88
89
		return [
90 4
			'quickMatch' => $this->quickMatch,
91 4
			'regexp'     => $this->regexp,
92 4
			'tagName'    => $this->tagName
93
		];
94
	}
95
96
	/**
97
	* Add a media site
98
	*
99
	* @param  string $siteId     Site's ID
100
	* @param  array  $siteConfig Site's config
101
	* @return Tag                Tag created for this site
102
	*/
103 20
	public function add($siteId, array $siteConfig = null)
104
	{
105
		// Normalize or retrieve the site definition
106 20
		$siteId = $this->normalizeId($siteId);
107 19
		if (isset($siteConfig))
108
		{
109 15
			$siteConfig = $this->defaultSites->normalizeValue($siteConfig);
110
		}
111
		else
112
		{
113 4
			$siteConfig = $this->defaultSites->get($siteId);
114
		}
115 18
		$siteConfig['extract'] = $this->convertRegexps($siteConfig['extract']);
116 18
		$siteConfig['scrape']  = $this->convertScrapes($siteConfig['scrape']);
117
118
		// Check the safety of attribute filters
119 18
		$this->checkAttributeFilters($siteConfig['attributes']);
120
121
		// Create the tag for this site
122 17
		$tag = new Tag([
123 17
			'attributes' => $this->getAttributesConfig($siteConfig),
124
			'rules'      => [
125 17
				'allowChild' => 'URL',
126
				'autoClose'  => true,
127 17
				'denyChild'  => [$siteId, $this->tagName]
128
			],
129 17
			'template'   => $this->templateBuilder->build($siteId, $siteConfig)
130
		]);
131
132 17
		$this->configurator->templateNormalizer->normalizeTag($tag);
133 17
		$this->configurator->templateChecker->checkTag($tag);
134 16
		$this->configurator->tags->add($siteId, $tag);
135 16
		$this->sites[$siteId] = $siteConfig;
136
137 16
		return $tag;
138
	}
139
140
	/**
141
	* {@inheritdoc}
142
	*/
143 2
	public function finalize()
144
	{
145 2
		$hosts = [];
146 2
		$sites = [];
147 2
		foreach ($this->sites as $siteId => $siteConfig)
148
		{
149 1
			foreach ($siteConfig['host'] as $host)
150
			{
151 1
				$hosts[$host] = $siteId;
152
			}
153 1
			$sites[$siteId] = [$siteConfig['extract'], $siteConfig['scrape']];
154
		}
155
156 2
		$this->configurator->registeredVars['MediaEmbed.hosts'] = new Dictionary($hosts);
157 2
		$this->configurator->registeredVars['MediaEmbed.sites'] = new Dictionary($sites);
158 2
	}
159
160
	/**
161
	* Check the safety of given attributes
162
	*
163
	* @param  array $attributes
164
	* @return void
165
	*/
166 18
	protected function checkAttributeFilters(array $attributes)
167
	{
168 18
		foreach ($attributes as $attrConfig)
169
		{
170 6
			if (empty($attrConfig['filterChain']))
171
			{
172 3
				continue;
173
			}
174 5
			foreach ($attrConfig['filterChain'] as $filter)
175
			{
176 5
				if (substr($filter, 0, 1) !== '#' && !in_array($filter, $this->allowedFilters, true))
177
				{
178 5
					throw new RuntimeException("Filter '$filter' is not allowed in media sites");
179
				}
180
			}
181
		}
182 17
	}
183
184
	/**
185
	* Convert given regexp to a [regexp, map] pair
186
	*
187
	* @param  string $regexp Original regexp
188
	* @return array          [regexp, [list of captures' names]]
189
	*/
190 16
	protected function convertRegexp($regexp)
191
	{
192 16
		$regexp = new Regexp($regexp);
193
194 16
		return [$regexp, $regexp->getCaptureNames()];
195
	}
196
197
	/**
198
	* Convert a list of regexps
199
	*
200
	* @param  string[] $regexps Original list
201
	* @return array[]           Converted list
202
	*/
203 18
	protected function convertRegexps(array $regexps)
204
	{
205 18
		return array_map([$this, 'convertRegexp'], $regexps);
206
	}
207
208
	/**
209
	* Convert all regexps in a scraping config
210
	*
211
	* @param  array $config Original config
212
	* @return array         Converted config
213
	*/
214 4
	protected function convertScrapeConfig(array $config)
215
	{
216 4
		$config['extract'] = $this->convertRegexps($config['extract']);
217 4
		$config['match']   = $this->convertRegexps($config['match']);
218
219 4
		return $config;
220
	}
221
222
	/**
223
	* Convert all regexps in a list of scraping configs
224
	*
225
	* @param  array[] $scrapes Original config
226
	* @return array[]          Converted config
227
	*/
228 18
	protected function convertScrapes(array $scrapes)
229
	{
230 18
		return array_map([$this, 'convertScrapeConfig'], $scrapes);
231
	}
232
233
	/**
234
	* Create the default MEDIA tag
235
	*
236
	* @return void
237
	*/
238 24
	protected function createMediaTag()
239
	{
240 24
		$tag = $this->configurator->tags->add($this->tagName);
241
242
		// This tag should not need to be closed and should not contain itself
243 24
		$tag->rules->autoClose();
244 24
		$tag->rules->denyChild($this->tagName);
245
246
		// Empty this tag's filter chain and add our tag filter
247 24
		$tag->filterChain->clear();
248 24
		$tag->filterChain
249 24
		    ->append(__NAMESPACE__ . '\\Parser::filterTag')
250 24
		    ->resetParameters()
251 24
		    ->addParameterByName('tag')
252 24
		    ->addParameterByName('parser')
253 24
		    ->addParameterByName('MediaEmbed.hosts')
254 24
		    ->addParameterByName('MediaEmbed.sites')
255 24
		    ->addParameterByName('cacheDir')
256 24
		    ->setJS(file_get_contents(__DIR__ . '/Parser/tagFilter.js'));
257 24
	}
258
259
	/**
260
	* Return the list of named captures from a list of [regexp, map] pairs
261
	*
262
	* @param  array[] $regexps List of [regexp, map] pairs
263
	* @return string[]
264
	*/
265 17
	protected function getAttributeNamesFromRegexps(array $regexps)
266
	{
267 17
		$attrNames = [];
268 17
		foreach ($regexps as list($regexp, $map))
269
		{
270 15
			$attrNames += array_flip(array_filter($map));
271
		}
272
273 17
		return $attrNames;
274
	}
275
276
	/**
277
	* Get the attributes config for given site config
278
	*
279
	* @param  array $siteConfig Site's config
280
	* @return array             Map of [attrName => attrConfig]
281
	*/
282 17
	protected function getAttributesConfig(array $siteConfig)
283
	{
284 17
		$attrNames = $this->getAttributeNamesFromRegexps($siteConfig['extract']);
285 17
		foreach ($siteConfig['scrape'] as $scrapeConfig)
286
		{
287 4
			$attrNames += $this->getAttributeNamesFromRegexps($scrapeConfig['extract']);
288
		}
289
290 17
		$attributes = $siteConfig['attributes'] + array_fill_keys(array_keys($attrNames), []);
291 17
		foreach ($attributes as &$attrConfig)
292
		{
293 15
			$attrConfig += ['required' => false];
294
		}
295 17
		unset($attrConfig);
296
297 17
		return $attributes;
298
	}
299
300
	/**
301
	* Validate and normalize a site ID
302
	*
303
	* @param  string $siteId
304
	* @return string
305
	*/
306 20
	protected function normalizeId($siteId)
307
	{
308 20
		$siteId = strtolower($siteId);
309
310 20
		if (!preg_match('(^[a-z0-9]+$)', $siteId))
311
		{
312 1
			throw new InvalidArgumentException('Invalid site ID');
313
		}
314
315 19
		return $siteId;
316
	}
317
}