1 | <?php |
||
23 | class StreamTokeniser implements TokeniserInterface |
||
24 | { |
||
25 | use StateBuilder; |
||
26 | |||
27 | const BUFFER_SIZE = 128; |
||
28 | |||
29 | /** @var int */ |
||
30 | private $minLength; |
||
31 | /** @var resource */ |
||
32 | private $stream; |
||
33 | /** @var State */ |
||
34 | private $state; |
||
35 | /** @var TokenStore */ |
||
36 | private $tokenStore; |
||
37 | /** @var StreamBuffer */ |
||
38 | private $buffer; |
||
39 | |||
40 | /** |
||
41 | * Tokeniser constructor. |
||
42 | * |
||
43 | * @param CsvConfigurationInterface $config |
||
44 | * @param resource $stream |
||
45 | */ |
||
46 | 30 | public function __construct(CsvConfigurationInterface $config, $stream) |
|
54 | |||
55 | /** |
||
56 | * Loop through the stream, pulling maximum type length each time, find the largest type that matches and create a |
||
57 | * token, then move on length characters |
||
58 | * |
||
59 | * @return Iterator |
||
60 | */ |
||
61 | 29 | public function getTokens() |
|
62 | { |
||
63 | 29 | fseek($this->stream, 0); |
|
64 | 29 | $this->buffer = new StreamBuffer($this->stream, static::BUFFER_SIZE, $this->minLength); |
|
65 | 29 | $this->buffer->read(); |
|
66 | |||
67 | /** @var Token $last */ |
||
68 | 29 | $last = null; |
|
69 | |||
70 | 29 | while (!$this->buffer->isEof()) { |
|
71 | 27 | foreach ($this->state->match($this->buffer) as $token) { |
|
72 | 27 | if ($token[0] == Token::T_BOM) { |
|
73 | 3 | $this->changeEncoding($token[1]); |
|
74 | } |
||
75 | |||
76 | 27 | $this->state = $this->state->getNextState($token[0]); |
|
77 | |||
78 | // merge tokens together to condense T_CONTENT tokens |
||
79 | 27 | if ($token[0] == Token::T_CONTENT) { |
|
80 | 27 | if (!is_null($last)) { |
|
81 | 14 | $last[1] .= $token[1]; |
|
82 | 14 | $last[3] = strlen($last[1]); |
|
83 | } else { |
||
84 | 27 | $last = $token; |
|
85 | } |
||
86 | } else { |
||
87 | 26 | if (!is_null($last)) { |
|
88 | 25 | yield $last; |
|
89 | 25 | $last = null; |
|
90 | } |
||
91 | 26 | yield $token; |
|
92 | } |
||
93 | |||
94 | 27 | $this->buffer->move($token[3]); |
|
95 | 27 | $this->buffer->read(); |
|
96 | } |
||
97 | } |
||
98 | |||
99 | 28 | if (!is_null($last)) { |
|
100 | 7 | yield $last; |
|
101 | } |
||
102 | |||
103 | 28 | fclose($this->stream); |
|
104 | 28 | } |
|
105 | |||
106 | /** |
||
107 | * @param string $content |
||
108 | */ |
||
109 | 3 | private function changeEncoding($content) |
|
116 | } |
||
117 |