Completed
Push — master ( ca198b...7b909f )
by Josh
17:30
created

NodeLocator::getElementsByRegexp()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 2
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use DOMDocument;
11
use DOMXPath;
12
13
abstract class NodeLocator
14
{
15
	/**
16
	* Return all attributes (literal or generated) that match given regexp
17
	*
18
	* @param  DOMDocument $dom    Document
19
	* @param  string      $regexp Regexp
20
	* @return DOMNode[]           List of DOMNode instances
21
	*/
22
	public static function getAttributesByRegexp(DOMDocument $dom, $regexp)
23
	{
24
		return self::getNodesByRegexp($dom, $regexp, 'attribute', '@');
25
	}
26
27
	/**
28
	* Return all DOMNodes whose content is CSS
29
	*
30
	* @param  DOMDocument $dom Document
31
	* @return DOMNode[]        List of DOMNode instances
32
	*/
33
	public static function getCSSNodes(DOMDocument $dom)
34
	{
35
		$regexp = '/^style$/i';
36
		$nodes  = array_merge(
37
			self::getAttributesByRegexp($dom, $regexp),
38
			self::getElementsByRegexp($dom, '/^style$/i')
39
		);
40
41
		return $nodes;
42
	}
43
44
	/**
45
	* Return all elements (literal or generated) that match given regexp
46
	*
47
	* @param  DOMDocument $dom    Document
48
	* @param  string      $regexp Regexp
49
	* @return DOMNode[]           List of DOMNode instances
50
	*/
51
	public static function getElementsByRegexp(DOMDocument $dom, $regexp)
52
	{
53
		return self::getNodesByRegexp($dom, $regexp, 'element', '');
54
	}
55
56
	/**
57
	* Return all DOMNodes whose content is JavaScript
58
	*
59
	* @param  DOMDocument $dom Document
60
	* @return DOMNode[]        List of DOMNode instances
61
	*/
62
	public static function getJSNodes(DOMDocument $dom)
63
	{
64
		$regexp = '/^(?:data-s9e-livepreview-postprocess$|on)/i';
65
		$nodes  = array_merge(
66
			self::getAttributesByRegexp($dom, $regexp),
67
			self::getElementsByRegexp($dom, '/^script$/i')
68
		);
69
70
		return $nodes;
71
	}
72
73
	/**
74
	* Return all elements (literal or generated) that match given regexp
75
	*
76
	* Will return all <param/> descendants of <object/> and all attributes of <embed/> whose name
77
	* matches given regexp. This method will NOT catch <param/> elements whose 'name' attribute is
78
	* set via an <xsl:attribute/>
79
	*
80
	* @param  DOMDocument $dom    Document
81
	* @param  string      $regexp
82
	* @return DOMNode[]           List of DOMNode instances
83
	*/
84
	public static function getObjectParamsByRegexp(DOMDocument $dom, $regexp)
85
	{
86
		$xpath = new DOMXPath($dom);
87
		$nodes = [];
88
89
		// Collect attributes from <embed/> elements
90
		foreach (self::getAttributesByRegexp($dom, $regexp) as $attribute)
91
		{
92
			if ($attribute->nodeType === XML_ATTRIBUTE_NODE)
93
			{
94
				if (strtolower($attribute->parentNode->localName) === 'embed')
95
				{
96
					$nodes[] = $attribute;
97
				}
98
			}
99
			elseif ($xpath->evaluate('count(ancestor::embed)', $attribute))
100
			{
101
				// Assuming <xsl:attribute/> or <xsl:copy-of/>
102
				$nodes[] = $attribute;
103
			}
104
		}
105
106
		// Collect <param/> descendants of <object/> elements
107
		foreach ($xpath->query('//object//param') as $param)
108
		{
109
			if (preg_match($regexp, $param->getAttribute('name')))
110
			{
111
				$nodes[] = $param;
112
			}
113
		}
114
115
		return $nodes;
116
	}
117
118
	/**
119
	* Return all DOMNodes whose content is an URL
120
	*
121
	* NOTE: it will also return HTML4 nodes whose content is an URI
122
	*
123
	* @param  DOMDocument $dom Document
124
	* @return DOMNode[]        List of DOMNode instances
125
	*/
126
	public static function getURLNodes(DOMDocument $dom)
127
	{
128
		$regexp = '/(?:^(?:action|background|c(?:ite|lassid|odebase)|data|formaction|href|icon|longdesc|manifest|p(?:ing|luginspage|oster|rofile)|usemap)|src)$/i';
129
		$nodes  = self::getAttributesByRegexp($dom, $regexp);
130
131
		/**
132
		* @link http://helpx.adobe.com/flash/kb/object-tag-syntax-flash-professional.html
133
		* @link http://www.sitepoint.com/control-internet-explorer/
134
		*/
135
		foreach (self::getObjectParamsByRegexp($dom, '/^(?:dataurl|movie)$/i') as $param)
136
		{
137
			$node = $param->getAttributeNode('value');
138
			if ($node)
139
			{
140
				$nodes[] = $node;
141
			}
142
		}
143
144
		return $nodes;
145
	}
146
147
	/**
148
	* Return all nodes (literal or generated) that match given regexp
149
	*
150
	* @param  DOMDocument $dom    Document
151
	* @param  string      $regexp Regexp
152
	* @param  string      $type   Node type ('element' or 'attribute')
153
	* @param  string      $prefix Prefix used in XPath ('' or '@')
154
	* @return DOMNode[]           List of DOMNode instances
155
	*/
156
	protected static function getNodesByRegexp(DOMDocument $dom, $regexp, $type, $prefix)
157
	{
158
		$candidates = [];
159
		$xpath      = new DOMXPath($dom);
160
161
		// Get natural nodes
162
		foreach ($xpath->query('//' . $prefix . '*') as $node)
163
		{
164
			$candidates[] = [$node, $node->nodeName];
165
		}
166
167
		// Get XSL-generated nodes
168
		foreach ($xpath->query('//xsl:' . $type) as $node)
169
		{
170
			$candidates[] = [$node, $node->getAttribute('name')];
171
		}
172
173
		// Get xsl:copy-of nodes
174
		foreach ($xpath->query('//xsl:copy-of') as $node)
175
		{
176
			if (preg_match('/^' . $prefix . '(\\w+)$/', $node->getAttribute('select'), $m))
177
			{
178
				$candidates[] = [$node, $m[1]];
179
			}
180
		}
181
182
		// Filter candidate nodes
183
		$nodes = [];
184
		foreach ($candidates as list($node, $name))
185
		{
186
			if (preg_match($regexp, $name))
187
			{
188
				$nodes[] = $node;
189
			}
190
		}
191
192
		return $nodes;
193
	}
194
}