|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Groundskeeper; |
|
4
|
|
|
|
|
5
|
|
|
use Groundskeeper\Tokens\Tokenizer; |
|
6
|
|
|
use Psr\Log\LoggerAwareInterface; |
|
7
|
|
|
use Psr\Log\LoggerInterface; |
|
8
|
|
|
use Psr\Log\NullLogger; |
|
9
|
|
|
|
|
10
|
|
|
class Groundskeeper implements LoggerAwareInterface |
|
11
|
|
|
{ |
|
12
|
|
|
/** @var Configuration */ |
|
13
|
|
|
private $configuration; |
|
14
|
|
|
|
|
15
|
|
|
/** @var LoggerInterface */ |
|
16
|
|
|
private $logger; |
|
17
|
|
|
|
|
18
|
|
|
/** |
|
19
|
|
|
* Constructor |
|
20
|
|
|
* |
|
21
|
|
|
* @param array|Configuration $options |
|
22
|
|
|
*/ |
|
23
|
149 |
|
public function __construct($options = array()) |
|
24
|
|
|
{ |
|
25
|
149 |
|
$this->logger = new NullLogger(); |
|
26
|
149 |
|
if ($options instanceof Configuration) { |
|
27
|
1 |
|
$this->configuration = $options; |
|
28
|
|
|
|
|
29
|
1 |
|
return; |
|
30
|
|
|
} |
|
31
|
|
|
|
|
32
|
149 |
|
if (!is_array($options)) { |
|
33
|
1 |
|
throw new \InvalidArgumentException('Invalid option type.'); |
|
34
|
|
|
} |
|
35
|
|
|
|
|
36
|
148 |
|
$this->configuration = new Configuration($options); |
|
37
|
148 |
|
} |
|
38
|
|
|
|
|
39
|
147 |
|
public function clean(string $html) : string |
|
40
|
|
|
{ |
|
41
|
147 |
|
$tokenizer = new Tokenizer($this->configuration); |
|
42
|
147 |
|
$tokenContainer = $tokenizer->tokenize($html); |
|
43
|
147 |
|
$tokenContainer->remove($this->logger); |
|
44
|
147 |
|
$outputGenerator = $this->getOutputGenerator(); |
|
45
|
|
|
|
|
46
|
|
|
// Clean |
|
47
|
147 |
|
$i = 0; |
|
48
|
147 |
|
$cleanedOutput = ''; |
|
49
|
|
|
do { |
|
50
|
147 |
|
if ($i > 0) { |
|
51
|
82 |
|
$tokenContainer = $tokenizer->tokenize($cleanedOutput); |
|
52
|
|
|
} |
|
53
|
|
|
|
|
54
|
147 |
|
$preCleaningOutput = $outputGenerator($tokenContainer); |
|
55
|
147 |
|
$tokenContainer->clean($this->logger); |
|
56
|
147 |
|
$cleanedOutput = $outputGenerator($tokenContainer); |
|
57
|
147 |
|
++$i; |
|
58
|
147 |
|
} while ($i < 5 && $preCleaningOutput !== $cleanedOutput); |
|
59
|
|
|
|
|
60
|
147 |
|
return $cleanedOutput; |
|
61
|
|
|
} |
|
62
|
|
|
|
|
63
|
1 |
|
public function getConfiguration() : Configuration |
|
64
|
|
|
{ |
|
65
|
1 |
|
return $this->configuration; |
|
66
|
|
|
} |
|
67
|
|
|
|
|
68
|
148 |
|
public function setLogger(LoggerInterface $logger) |
|
69
|
|
|
{ |
|
70
|
148 |
|
$this->logger = $logger; |
|
71
|
148 |
|
} |
|
72
|
|
|
|
|
73
|
147 |
|
private function getOutputGenerator() |
|
74
|
|
|
{ |
|
75
|
147 |
|
$outputClassName = 'Groundskeeper\\Output\\' . ucfirst($this->configuration->get('output')); |
|
76
|
|
|
|
|
77
|
147 |
|
return new $outputClassName(); |
|
78
|
|
|
} |
|
79
|
|
|
} |
|
80
|
|
|
|