1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Groundskeeper\Tokens; |
4
|
|
|
|
5
|
|
|
use Kevintweber\HtmlTokenizer\HtmlTokenizer; |
6
|
|
|
use Kevintweber\HtmlTokenizer\Tokens\Element as BasicElement; |
7
|
|
|
use Kevintweber\HtmlTokenizer\Tokens\Token as BasicToken; |
8
|
|
|
|
9
|
|
|
class Tokenizer |
10
|
|
|
{ |
11
|
|
|
/** @var array */ |
12
|
|
|
private $options; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* Constructor |
16
|
|
|
*/ |
17
|
10 |
|
public function __construct(array $options = array()) |
18
|
|
|
{ |
19
|
10 |
|
$this->options = $options; |
20
|
10 |
|
} |
21
|
|
|
|
22
|
10 |
|
public function tokenize($html) |
23
|
|
|
{ |
24
|
10 |
|
if (!is_string($html)) { |
25
|
|
|
throw new \InvalidArgumentException('Html must be a string.'); |
26
|
|
|
} |
27
|
|
|
|
28
|
10 |
|
$tokenizer = new HtmlTokenizer($this->options['throw-on-error']); |
29
|
10 |
|
$basicTokenCollection = $tokenizer->parse($html); |
30
|
|
|
|
31
|
10 |
|
$cleanableTokens = array(); |
32
|
10 |
|
foreach ($basicTokenCollection as $basicToken) { |
33
|
10 |
|
$cleanableTokens[] = $this->createToken($basicToken); |
34
|
10 |
|
} |
35
|
|
|
|
36
|
10 |
|
return $cleanableTokens; |
37
|
|
|
} |
38
|
|
|
|
39
|
10 |
|
private function createToken(BasicToken $basicToken) |
40
|
|
|
{ |
41
|
10 |
|
switch ($basicToken->getType()) { |
42
|
10 |
|
case 'cdata': |
43
|
2 |
|
return new CData( |
44
|
10 |
|
$basicToken->getParent(), |
|
|
|
|
45
|
2 |
|
$basicToken->getValue() |
|
|
|
|
46
|
2 |
|
); |
47
|
|
|
|
48
|
8 |
|
case 'comment': |
49
|
2 |
|
return new Comment( |
50
|
2 |
|
$basicToken->getParent(), |
|
|
|
|
51
|
2 |
|
$basicToken->getValue() |
|
|
|
|
52
|
2 |
|
); |
53
|
|
|
|
54
|
6 |
|
case 'doctype': |
55
|
2 |
|
return new DocType( |
56
|
2 |
|
$basicToken->getParent(), |
|
|
|
|
57
|
2 |
|
$basicToken->getValue() |
|
|
|
|
58
|
2 |
|
); |
59
|
|
|
|
60
|
4 |
|
case 'element': |
61
|
2 |
|
return static::createElement($basicToken); |
|
|
|
|
62
|
|
|
|
63
|
2 |
|
case 'text': |
64
|
2 |
|
return new Text( |
65
|
2 |
|
$basicToken->getParent(), |
|
|
|
|
66
|
2 |
|
$basicToken->getValue() |
|
|
|
|
67
|
2 |
|
); |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
throw new \RuntimeException( |
71
|
|
|
'Invalid token type: ' . $basicToken->getType() |
72
|
|
|
); |
73
|
|
|
} |
74
|
|
|
|
75
|
2 |
|
private function createElement(BasicElement $basicElement) |
76
|
|
|
{ |
77
|
|
|
$elementClassName = 'Groundskeeper\\Tokens\\Elements\\' . |
78
|
2 |
|
ucfirst(strtolower($basicElement->getName())); |
79
|
2 |
|
if (!class_exists($elementClassName)) { |
80
|
2 |
|
$elementClassName = 'Groundskeeper\\Tokens\\Elements\\Element'; |
81
|
2 |
|
} |
82
|
|
|
|
83
|
2 |
|
$cleanableElement = new $elementClassName( |
84
|
2 |
|
$basicElement->getName(), |
85
|
2 |
|
$basicElement->getAttributes(), |
86
|
2 |
|
$basicElement->getParent() |
87
|
2 |
|
); |
88
|
|
|
|
89
|
2 |
|
foreach ($basicElement->getChildren() as $basicChild) { |
90
|
|
|
$cleanableElement->addChild( |
91
|
|
|
static::importToken($basicChild) |
|
|
|
|
92
|
|
|
); |
93
|
2 |
|
} |
94
|
|
|
|
95
|
2 |
|
return $cleanableElement; |
96
|
|
|
} |
97
|
|
|
} |
98
|
|
|
|
This check looks at variables that are passed out again to other methods.
If the outgoing method call has stricter type requirements than the method itself, an issue is raised.
An additional type check may prevent trouble.