Completed
Push — master ( 788152...8092ea )
by Josh
14:09
created

Parser   A

Complexity

Total Complexity 30

Size/Duplication

Total Lines 225
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 5

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 30
lcom 2
cbo 5
dl 0
loc 225
ccs 71
cts 71
cp 1
rs 10
c 0
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
A parse() 0 13 2
A filterTag() 0 19 4
B addNamedCaptures() 0 17 6
A createTag() 0 19 2
A getAttributes() 0 11 2
A getHttpClient() 0 9 3
A getSiteIdFromUrl() 0 14 4
A interpolateVars() 0 11 2
A scrape() 0 15 4
A wget() 0 4 1
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\MediaEmbed;
9
10
use s9e\TextFormatter\Parser as TagStack;
11
use s9e\TextFormatter\Parser\Tag;
12
use s9e\TextFormatter\Plugins\ParserBase;
13
use s9e\TextFormatter\Utils\Http;
14
15
class Parser extends ParserBase
16
{
17
	/**
18
	* @var \s9e\TextFormatter\Utils\Http\Client Client used to perform HTTP request
19
	*/
20
	protected static $client;
21
22
	/**
23
	* {@inheritdoc}
24
	*/
25 398
	public function parse($text, array $matches)
26
	{
27 398
		foreach ($matches as $m)
28
		{
29 398
			$tagName = $this->config['tagName'];
30 398
			$url     = $m[0][0];
31 398
			$pos     = $m[0][1];
32 398
			$len     = strlen($url);
33
34
			// Give that tag priority over other tags such as Autolink's
35 398
			$this->parser->addSelfClosingTag($tagName, $pos, $len, -10)->setAttribute('url', $url);
36
		}
37 398
	}
38
39
	/**
40
	* Filter a MEDIA tag
41
	*
42
	* This will always invalidate the original tag, and possibly replace it with the tag that
43
	* corresponds to the media site
44
	*
45
	* @param  Tag         $tag      The original tag
46
	* @param  TagStack    $tagStack Parser instance, so that we can add the new tag to the stack
47
	* @param  array       $hosts    Map of [hostname => siteId]
48
	* @param  array       $sites    Map of [siteId => siteConfig]
49
	* @param  string|null $cacheDir Path to the cache directory
50
	* @return void
51
	*/
52 400
	public static function filterTag(Tag $tag, TagStack $tagStack, array $hosts, array $sites, $cacheDir)
53
	{
54
		// Always invalidate this tag
55 400
		$tag->invalidate();
56
57 400
		if ($tag->hasAttribute('url'))
58
		{
59 400
			$url    = $tag->getAttribute('url');
60 400
			$siteId = self::getSiteIdFromUrl($url, $hosts);
61 400
			if (isset($sites[$siteId]))
62
			{
63 397
				$attributes = self::getAttributes($url, $sites[$siteId], $cacheDir);
64 397
				if (!empty($attributes))
65
				{
66 370
					self::createTag(strtoupper($siteId), $tagStack, $tag)->setAttributes($attributes);
67
				}
68
			}
69
		}
70 400
	}
71
72
	/**
73
	* Add named captures from a set of regular expressions to a set of attributes
74
	*
75
	* @param  array   &$attributes Associative array of strings
76
	* @param  string   $string     Text to match
77
	* @param  array[]  $regexps    List of [regexp, map] pairs
78
	* @return bool                 Whether any regexp matched
79
	*/
80 397
	protected static function addNamedCaptures(array &$attributes, $string, array $regexps)
81
	{
82 397
		$matched = 0;
83 397
		foreach ($regexps as list($regexp, $map))
84
		{
85 397
			$matched += preg_match($regexp, $string, $m);
86 397
			foreach ($map as $i => $name)
87
			{
88 397
				if (isset($m[$i]) && $m[$i] !== '' && $name !== '')
89
				{
90 397
					$attributes[$name] = $m[$i];
91
				}
92
			}
93
		}
94
95 397
		return (bool) $matched;
96
	}
97
98
	/**
99
	* Create a tag for a media embed
100
	*
101
	* @param  string   $tagName  Tag's name
102
	* @param  TagStack $tagStack
103
	* @param  Tag      $tag      Reference tag
104
	* @return Tag                New tag
105
	*/
106 370
	protected static function createTag($tagName, TagStack $tagStack, Tag $tag)
107
	{
108 370
		$startPos = $tag->getPos();
109 370
		$endTag   = $tag->getEndTag();
110 370
		if ($endTag)
111
		{
112 10
			$startLen = $tag->getLen();
113 10
			$endPos   = $endTag->getPos();
114 10
			$endLen   = $endTag->getLen();
115
		}
116
		else
117
		{
118 370
			$startLen = 0;
119 370
			$endPos   = $tag->getPos() + $tag->getLen();
120 370
			$endLen   = 0;
121
		}
122
123 370
		return $tagStack->addTagPair($tagName, $startPos, $startLen, $endPos, $endLen, $tag->getSortPriority());
124
	}
125
126
	/**
127
	* Return a set of attributes for given URL based on a site's config
128
	*
129
	* @param  string      $url      Original URL
130
	* @param  array       $config   Site config
131
	* @param  string|null $cacheDir Path to the cache directory
132
	* @return array                 Associative array of attributes
133
	*/
134 397
	protected static function getAttributes($url, array $config, $cacheDir)
135
	{
136 397
		$attributes = [];
137 397
		self::addNamedCaptures($attributes, $url, $config[0]);
138 397
		foreach ($config[1] as $scrapeConfig)
139
		{
140 135
			self::scrape($attributes, $url, $scrapeConfig, $cacheDir);
141
		}
142
143 397
		return $attributes;
144
	}
145
146
	/**
147
	* Return a cached instance of the HTTP client
148
	*
149
	* @param  string|null $cacheDir
150
	* @return \s9e\TextFormatter\Utils\Http\Client
151
	*/
152 61
	protected static function getHttpClient($cacheDir)
153
	{
154 61
		if (!isset(self::$client))
155
		{
156 1
			self::$client = (isset($cacheDir)) ? Http::getCachingClient($cacheDir) : Http::getClient();
157
		}
158
159 61
		return self::$client;
160
	}
161
162
	/**
163
	* Return the siteId that corresponds to given URL
164
	*
165
	* @param  string  $url   Original URL
166
	* @param  array   $hosts Map of [hostname => siteId]
167
	* @return string         URL's siteId, or an empty string
168
	*/
169 400
	protected static function getSiteIdFromUrl($url, array $hosts)
170
	{
171 400
		$host = (preg_match('(^https?://([^/]+))', strtolower($url), $m)) ? $m[1] : '';
172 400
		while ($host > '')
173
		{
174 398
			if (isset($hosts[$host]))
175
			{
176 397
				return $hosts[$host];
177
			}
178 262
			$host = preg_replace('(^[^.]*.)', '', $host);
179
		}
180
181 5
		return '';
182
	}
183
184
	/**
185
	* Interpolate {@vars} in given string
186
	*
187
	* @param  string $str  Original string
188
	* @param  array  $vars Associative array
189
	* @return string       Interpolated string
190
	*/
191 18
	protected static function interpolateVars($str, array $vars)
192
	{
193 18
		return preg_replace_callback(
194 18
			'(\\{@(\\w+)\\})',
195 18
			function ($m) use ($vars)
196
			{
197 18
				return (isset($vars[$m[1]])) ? $vars[$m[1]] : '';
198 18
			},
199 18
			$str
200
		);
201
	}
202
203
	/**
204
	* Scrape values and add them to current attributes
205
	*
206
	* @param  array       &$attributes Attributes
207
	* @param  string|null  $cacheDir   Path to the cache directory
208
	* @param  string       $url        Original URL
209
	* @param  array        $config     Scraping config
210
	* @return void
211
	*/
212 135
	protected static function scrape(array &$attributes, $url, array $config, $cacheDir)
213
	{
214 135
		$vars = [];
215 135
		if (self::addNamedCaptures($vars, $url, $config['match']))
216
		{
217 61
			if (isset($config['url']))
218
			{
219 18
				$url = self::interpolateVars($config['url'], $vars + $attributes);
220
			}
221 61
			if (preg_match('(^https?://[^#]+)i', $url, $m))
222
			{
223 61
				self::addNamedCaptures($attributes, self::wget($m[0], $cacheDir), $config['extract']);
224
			}
225
		}
226 135
	}
227
228
	/**
229
	* Retrieve external content
230
	*
231
	* @param  string      $url      URL
232
	* @param  string|null $cacheDir Path to the cache directory
233
	* @return string                External content
234
	*/
235 61
	protected static function wget($url, $cacheDir)
236
	{
237 61
		return @self::getHttpClient($cacheDir)->get($url, ['User-Agent: PHP (not Mozilla)']);
238
	}
239
}