Parser::setGroupOptions()   F
last analyzed

Complexity

Conditions 18
Paths 6562

Size

Total Lines 55
Code Lines 36

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 36
c 2
b 0
f 0
dl 0
loc 55
rs 0.7
cc 18
nc 6562
nop 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * Parser class.
7
 *
8
 * @package   YetiForcePDF\Html
9
 *
10
 * @copyright YetiForce Sp. z o.o
11
 * @license   MIT
12
 * @author    Rafal Pospiech <[email protected]>
13
 */
14
15
namespace YetiForcePDF\Html;
16
17
use YetiForcePDF\Base;
18
use YetiForcePDF\Layout\PageGroupBox;
19
20
/**
21
 * Class Parser.
22
 */
23
class Parser extends Base
24
{
25
	/**
26
	 * @var \DOMDocument
27
	 */
28
	protected $domDocument;
29
30
	/**
31
	 * @var string
32
	 */
33
	protected $html = '';
34
35
	/**
36
	 * @var array page groups with html content divided
37
	 */
38
	protected $htmlPageGroups = [];
39
40
	/**
41
	 * @var array
42
	 */
43
	protected $pageGroups = [];
44
45
	/**
46
	 * Cleanup html.
47
	 *
48
	 * @param string $html
49
	 * @param string $fromEncoding
50
	 *
51
	 * @return string
52
	 */
53
	protected function cleanUpHtml(string $html)
54
	{
55
		$html = preg_replace('/\r\n/', "\r", $html);
56
		return preg_replace('/\n/', "\r", $html);
57
	}
58
59
	/**
60
	 * Load html string.
61
	 *
62
	 * @param string $html
63
	 * @param string $fromEncoding
64
	 *
65
	 * @return Parser
66
	 */
67
	public function loadHtml(string $html, string $fromEncoding): self
68
	{
69
		$this->html = htmlspecialchars_decode($html, ENT_HTML5);
70
		$this->html = $this->cleanUpHtml($html);
71
72
		// 0x80 - start of unicode range
73
		// 0x10FFFF - end of unicode range
74
		// 0 - do not ommit any unicode char
75
		// ~0 - negated 0 - convert negation of nothing (so convert all)
76
		$this->html = mb_encode_numericentity($this->html, [0x80, 0x10FFFF, 0, ~0], $fromEncoding);
77
78
		return $this;
79
	}
80
81
	/**
82
	 * Get html.
83
	 *
84
	 * @return string
85
	 */
86
	public function getHtml(): string
87
	{
88
		return $this->html;
89
	}
90
91
	/**
92
	 * Remove comment blocks.
93
	 *
94
	 * @param string $html
95
	 *
96
	 * @return string
97
	 */
98
	public function removeComments(string $html)
99
	{
100
		return preg_replace('/<!--((?!-->))[\w\W]+-->/uUi', '', $html);
101
	}
102
103
	/**
104
	 * Divide html into page groups.
105
	 *
106
	 * @param string $html
107
	 *
108
	 * @return array
109
	 */
110
	public function getHtmlPageGroups(string $html)
111
	{
112
		$pageGroups = [];
113
		$matches = [];
114
		preg_match_all('/\<div\s+data-page-group\s?/ui', $html, $matches, PREG_OFFSET_CAPTURE);
115
		$matches = $matches[0];
116
		$groupsCount = \count($matches);
117
		for ($i = 0; $i < $groupsCount; ++$i) {
118
			$start = $matches[$i][1];
119
			if (isset($matches[$i + 1])) {
120
				$stop = $matches[$i + 1][1];
121
				$len = $stop - $start;
122
				$pageGroups[] = substr($html, $start, $len);
123
			} else {
124
				$pageGroups[] = substr($html, $start);
125
			}
126
		}
127
		if (empty($pageGroups)) {
128
			return [$html];
129
		}
130
		return $pageGroups;
131
	}
132
133
	/**
134
	 * Set page group options.
135
	 *
136
	 * @param PageGroupBox $root
137
	 * @param \DOMDocument $domDocument
138
	 *
139
	 * @return $this
140
	 */
141
	public function setGroupOptions(PageGroupBox $root, \DOMDocument $domDocument)
142
	{
143
		$childDomElement = $domDocument->documentElement->firstChild;
144
		if (!$childDomElement instanceof \DOMElement) {
145
			return $this;
146
		}
147
		if ($childDomElement->hasAttribute('data-format')) {
148
			$root->format = $childDomElement->getAttribute('data-format');
149
			if (!$root->format) {
150
				$root->format = 'A4';
151
			}
152
		}
153
		if ($childDomElement->hasAttribute('data-orientation')) {
154
			$root->orientation = $childDomElement->getAttribute('data-orientation');
155
			if (!$root->orientation) {
156
				$root->orientation = 'P';
157
			}
158
		}
159
		if ($childDomElement->hasAttribute('data-margin-left')) {
160
			$root->marginLeft = (float) $childDomElement->getAttribute('data-margin-left');
161
			if (!$root->marginLeft) {
162
				$root->marginLeft = 30;
163
			}
164
		}
165
		if ($childDomElement->hasAttribute('data-margin-right')) {
166
			$root->marginRight = (float) $childDomElement->getAttribute('data-margin-right');
167
			if (!$root->marginRight) {
168
				$root->marginRight = 30;
169
			}
170
		}
171
		if ($childDomElement->hasAttribute('data-margin-top')) {
172
			$root->marginTop = (float) $childDomElement->getAttribute('data-margin-top');
173
			if (!$root->marginTop) {
174
				$root->marginTop = 40;
175
			}
176
		}
177
		if ($childDomElement->hasAttribute('data-margin-bottom')) {
178
			$root->marginBottom = (float) $childDomElement->getAttribute('data-margin-bottom');
179
			if (!$root->marginBottom) {
180
				$root->marginBottom = 40;
181
			}
182
		}
183
		if ($childDomElement->hasAttribute('data-header-top')) {
184
			$root->headerTop = (float) $childDomElement->getAttribute('data-header-top');
185
			if (!$root->headerTop) {
186
				$root->headerTop = 10;
187
			}
188
		}
189
		if ($childDomElement->hasAttribute('data-footer-bottom')) {
190
			$root->footerBottom = (float) $childDomElement->getAttribute('data-footer-bottom');
191
			if (!$root->footerBottom) {
192
				$root->footerBottom = 10;
193
			}
194
		}
195
		return $this;
196
	}
197
198
	/**
199
	 * Convert loaded html to pdf objects.
200
	 */
201
	public function parse()
202
	{
203
		if ('' === $this->html) {
204
			return null;
205
		}
206
		$this->html = $this->removeComments($this->html);
207
		$this->htmlPageGroups = $this->getHtmlPageGroups($this->html);
208
		foreach ($this->htmlPageGroups as $groupIndex => $htmlPageGroup) {
209
			$domDocument = new \DOMDocument();
210
			$domDocument->encoding = 'UTF-8';
211
			$domDocument->strictErrorChecking = false;
212
			$domDocument->substituteEntities = false;
213
			$domDocument->recover = false;
214
			$domDocument->loadHTML('<div id="yetiforcepdf">' . $htmlPageGroup . '</div>', LIBXML_HTML_NOIMPLIED | LIBXML_NOWARNING | LIBXML_NOBLANKS | LIBXML_NOERROR);
215
			$pageGroup = (new PageGroupBox())
216
				->setDocument($this->document)
217
				->setRoot(true)
218
				->init();
219
			$pageGroup->format = $this->document->getDefaultFormat();
220
			$margins = $this->document->getDefaultMargins();
221
			$pageGroup->marginLeft = $margins['left'];
222
			$pageGroup->marginTop = $margins['top'];
223
			$pageGroup->marginRight = $margins['right'];
224
			$pageGroup->marginBottom = $margins['bottom'];
225
			$pageGroup->orientation = $this->document->getDefaultOrientation();
226
			$this->setGroupOptions($pageGroup, $domDocument);
227
			$page = $this->document->addPage($pageGroup->format, $pageGroup->orientation);
228
			$page->setPageNumber(1);
229
			$page->setGroup($groupIndex);
230
			$page->setMargins($pageGroup->marginLeft, $pageGroup->marginTop, $pageGroup->marginRight, $pageGroup->marginBottom);
231
			$rootElement = (new \YetiForcePDF\Html\Element())
232
				->setDocument($this->document)
233
				->setDOMElement($domDocument->documentElement);
234
			// root element must be defined before initialisation
235
			$rootElement->init();
236
			$pageGroup->setElement($rootElement);
237
			$pageGroup->setStyle($rootElement->parseStyle());
238
239
			$pageGroup->buildTree();
240
			$pageGroup->fixTables();
241
			$pageGroup->getStyle()->fixDomTree();
242
			$pageGroup->layout();
243
			$page->setBox($pageGroup);
244
245
			foreach ($this->document->getPages($groupIndex) as $page) {
246
				$page->getBox()->breakPageAfter();
0 ignored issues
show
Bug introduced by
The method breakPageAfter() does not exist on YetiForcePDF\Layout\Box. It seems like you code against a sub-type of YetiForcePDF\Layout\Box such as YetiForcePDF\Layout\BlockBox. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

246
				$page->getBox()->/** @scrutinizer ignore-call */ breakPageAfter();
Loading history...
247
			}
248
			foreach ($this->document->getPages($groupIndex) as $page) {
249
				$page->getBox()->spanAllRows();
0 ignored issues
show
Bug introduced by
The method spanAllRows() does not exist on YetiForcePDF\Layout\Box. It seems like you code against a sub-type of YetiForcePDF\Layout\Box such as YetiForcePDF\Layout\ElementBox. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

249
				$page->getBox()->/** @scrutinizer ignore-call */ spanAllRows();
Loading history...
250
			}
251
			foreach ($this->document->getPages($groupIndex) as $page) {
252
				$page->breakOverflow();
253
			}
254
			$page->getBox()->getStyle()->fixDomTree();
255
			$this->document->fixPageNumbers();
256
			foreach ($this->document->getPages($groupIndex) as $page) {
257
				$this->document->setCurrentPage($page);
258
				$children = [];
259
				$page->setUpAbsoluteBoxes();
260
				$page->getBox()->replacePageNumbers();
0 ignored issues
show
Bug introduced by
The method replacePageNumbers() does not exist on YetiForcePDF\Layout\Box. It seems like you code against a sub-type of YetiForcePDF\Layout\Box such as YetiForcePDF\Layout\BlockBox. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

260
				$page->getBox()->/** @scrutinizer ignore-call */ replacePageNumbers();
Loading history...
261
				$page->getBox()->getAllChildren($children);
262
				foreach ($children as $box) {
263
					if (!$box instanceof \YetiForcePDF\Layout\LineBox && $box->isRenderable()) {
264
						$page->getContentStream()->addRawContent($box->getInstructions());
265
					}
266
				}
267
			}
268
		}
269
	}
270
}
271