MarkdownTestHelper::normalizeElementContent()   F
last analyzed

Complexity

Conditions 55
Paths 8820

Size

Total Lines 116
Code Lines 89

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 55
eloc 89
nc 8820
nop 2
dl 0
loc 116
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
use PHPUnit\Framework\TestCase;
4
5
class MarkdownTestHelper
6
{
7
	/**
8
	 * Takes an input directory containing .text and .(x)html files, and returns an array
9
	 * of .text files and the corresponding output xhtml or html file. Can be used in a unit test data provider.
10
	 *
11
	 * @param string $directory Input directory
12
	 *
13
	 * @return array
14
	 */
15
	public static function getInputOutputPaths($directory) {
16
		$iterator = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($directory));
17
		$regexIterator = new RegexIterator(
18
			$iterator,
19
			'/^.+\.text$/',
20
			RecursiveRegexIterator::GET_MATCH
21
		);
22
23
		$dataValues = [];
24
25
		/** @var SplFileInfo $inputFile */
26
		foreach ($regexIterator as $inputFiles) {
27
			foreach ($inputFiles as $inputMarkdownPath) {
28
				$xhtml = true;
29
				$expectedHtmlPath = substr($inputMarkdownPath, 0, -4) . 'xhtml';
30
				if (!file_exists($expectedHtmlPath)) {
31
					$expectedHtmlPath = substr($inputMarkdownPath, 0, -4) . 'html';
32
					$xhtml = false;
33
				}
34
				$dataValues[] = [$inputMarkdownPath, $expectedHtmlPath, $xhtml];
35
			}
36
		}
37
38
		return $dataValues;
39
	}
40
41
	/**
42
	 * Applies PHPUnit's assertSame after normalizing both strings (e.g. ignoring whitespace differences).
43
	 * Uses logic found originally in MDTest.
44
	 *
45
	 * @param string $string1
46
	 * @param string $string2
47
	 * @param string $message Positive message to print when test fails (e.g. "String1 matches String2")
48
	 * @param bool $xhtml
49
	 */
50
	public static function assertSameNormalized($string1, $string2, $message, $xhtml = true) {
51
52
		$t_result = $string1;
53
		$t_output = $string2;
54
55
		// DOMDocuments
56
		if ($xhtml) {
57
			$document = new DOMDocument();
58
			$doc_result = $document->loadXML('<!DOCTYPE html>' .
59
				"<html xmlns='http://www.w3.org/1999/xhtml'>" .
60
				"<body>$t_result</body></html>");
61
62
			$document2 = new DOMDocument();
63
			$doc_output = $document2->loadXML('<!DOCTYPE html>' .
64
				"<html xmlns='http://www.w3.org/1999/xhtml'>" .
65
				"<body>$t_output</body></html>");
66
67
			if ($doc_result) {
68
				static::normalizeElementContent($document->documentElement, false);
69
				$n_result = $document->saveXML();
70
			} else {
71
				$n_result = '--- Expected Result: XML Parse Error ---';
72
			}
73
			if ($doc_output) {
74
				static::normalizeElementContent($document2->documentElement, false);
75
				$n_output = $document2->saveXML();
76
			} else {
77
				$n_output = '--- Output: XML Parse Error ---';
78
			}
79
		} else {
80
81
			// '@' suppressors used because some tests have invalid HTML (multiple elements with the same id attribute)
82
			// Perhaps isolate to a separate test and remove this?
83
84
			$document = new DOMDocument();
85
			$doc_result = @$document->loadHTML($t_result);
86
87
			$document2 = new DOMDocument();
88
			$doc_output = @$document2->loadHTML($t_output);
89
90
			if ($doc_result) {
91
				static::normalizeElementContent($document->documentElement, false);
92
				$n_result = $document->saveHTML();
93
			} else {
94
				$n_result = '--- Expected Result: HTML Parse Error ---';
95
			}
96
97
			if ($doc_output) {
98
				static::normalizeElementContent($document2->documentElement, false);
99
				$n_output = $document2->saveHTML();
100
			} else {
101
				$n_output = '--- Output: HTML Parse Error ---';
102
			}
103
		}
104
105
		$n_result = preg_replace('{^.*?<body>|</body>.*?$}is', '', $n_result);
106
		$n_output = preg_replace('{^.*?<body>|</body>.*?$}is', '', $n_output);
107
108
		$c_result = $n_result;
109
		$c_output = $n_output;
110
111
		$c_result = trim($c_result) . "\n";
112
		$c_output = trim($c_output) . "\n";
113
114
		// This will throw a test exception if the strings don't exactly match
115
		TestCase::assertSame($c_result, $c_output, $message);
116
	}
117
118
	/**
119
	 * @param DOMElement $element Modifies this element by reference
120
	 * @param bool $whitespace_preserve Preserve Whitespace
121
	 * @return void
122
	 */
123
	protected static function normalizeElementContent($element, $whitespace_preserve) {
124
		#
125
		# Normalize content of HTML DOM $element. The $whitespace_preserve
126
		# argument indicates that whitespace is significant and shouldn't be
127
		# normalized; it should be used for the content of certain elements like
128
		# <pre> or <script>.
129
		#
130
		$node_list = $element->childNodes;
131
		switch (strtolower($element->nodeName)) {
132
			case 'body':
133
			case 'div':
134
			case 'blockquote':
135
			case 'ul':
136
			case 'ol':
137
			case 'dl':
138
			case 'h1':
139
			case 'h2':
140
			case 'h3':
141
			case 'h4':
142
			case 'h5':
143
			case 'h6':
144
				$whitespace = "\n\n";
145
				break;
146
147
			case 'table':
148
				$whitespace = "\n";
149
				break;
150
151
			case 'pre':
152
			case 'script':
153
			case 'style':
154
			case 'title':
155
				$whitespace_preserve = true;
156
				$whitespace = "";
157
				break;
158
159
			default:
160
				$whitespace = "";
161
				break;
162
		}
163
		foreach ($node_list as $node) {
164
			switch ($node->nodeType) {
165
				case XML_ELEMENT_NODE:
166
					static::normalizeElementContent($node, $whitespace_preserve);
167
					static::normalizeElementAttributes($node);
168
169
					switch (strtolower($node->nodeName)) {
170
						case 'p':
171
						case 'div':
172
						case 'hr':
173
						case 'blockquote':
174
						case 'ul':
175
						case 'ol':
176
						case 'dl':
177
						case 'li':
178
						case 'address':
179
						case 'table':
180
						case 'dd':
181
						case 'pre':
182
						case 'h1':
183
						case 'h2':
184
						case 'h3':
185
						case 'h4':
186
						case 'h5':
187
						case 'h6':
188
							$whitespace = "\n\n";
189
							break;
190
191
						case 'tr':
192
						case 'td':
193
						case 'dt':
194
							$whitespace = "\n";
195
							break;
196
197
						default:
198
							$whitespace = "";
199
							break;
200
					}
201
202
					if (($whitespace === "\n\n" || $whitespace === "\n") &&
203
						$node->nextSibling &&
204
						$node->nextSibling->nodeType != XML_TEXT_NODE) {
205
						$element->insertBefore(new DOMText($whitespace), $node->nextSibling);
206
					}
207
					break;
208
209
				case XML_TEXT_NODE:
210
					if (!$whitespace_preserve) {
211
						if (trim($node->data) === "") {
212
							$node->data = $whitespace;
213
						}
214
						else {
215
							$node->data = preg_replace('{\s+}', ' ', $node->data);
216
						}
217
					}
218
					break;
219
			}
220
		}
221
		if (!$whitespace_preserve &&
222
			($whitespace === "\n\n" || $whitespace === "\n")) {
223
			if ($element->firstChild) {
224
				if ($element->firstChild->nodeType == XML_TEXT_NODE) {
225
					$element->firstChild->data =
0 ignored issues
show
Bug introduced by
The property data does not seem to exist on DOMElement.
Loading history...
226
						preg_replace('{^\s+}', "\n", $element->firstChild->data);
227
				}
228
				else {
229
					$element->insertBefore(new DOMText("\n"), $element->firstChild);
230
				}
231
			}
232
			if ($element->lastChild) {
233
				if ($element->lastChild->nodeType == XML_TEXT_NODE) {
234
					$element->lastChild->data =
235
						preg_replace('{\s+$}', "\n", $element->lastChild->data);
236
				}
237
				else {
238
					$element->insertBefore(new DOMText("\n"), null);
239
				}
240
			}
241
		}
242
	}
243
244
	/**
245
	 * @param DOMElement $element Modifies this element by reference
246
	 */
247
	protected static function normalizeElementAttributes (DOMElement $element)
248
	{
249
		#
250
		# Sort attributes by name.
251
		#
252
		// Gather the list of attributes as an array.
253
		$attr_list = array();
254
		foreach ($element->attributes as $attr_node) {
255
			$attr_list[$attr_node->name] = $attr_node;
256
		}
257
258
		// Sort attribute list by name.
259
		ksort($attr_list);
260
261
		// Remove then put back each attribute following sort order.
262
		foreach ($attr_list as $attr_node) {
263
			$element->removeAttributeNode($attr_node);
264
			$element->setAttributeNode($attr_node);
265
		}
266
	}
267
}
268