1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Antlr\Antlr4\Runtime; |
||
6 | |||
7 | use Antlr\Antlr4\Runtime\Comparison\Equality; |
||
8 | use Antlr\Antlr4\Runtime\Comparison\Equatable; |
||
9 | use Antlr\Antlr4\Runtime\Utils\StringUtils; |
||
10 | |||
11 | /** |
||
12 | * This class implements the {@see IntSet} backed by a sorted array of |
||
13 | * non-overlapping intervals. It is particularly efficient for representing |
||
14 | * large collections of numbers, where the majority of elements appear as part |
||
15 | * of a sequential range of numbers that are all part of the set. For example, |
||
16 | * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }. |
||
17 | * |
||
18 | * This class is able to represent sets containing any combination of values in |
||
19 | * the range {@see Integer::MIN_VALUE} to {@see Integer::MAX_VALUE} |
||
20 | * (inclusive). |
||
21 | */ |
||
22 | final class IntervalSet implements Equatable |
||
23 | { |
||
24 | /** @var array<Interval> */ |
||
25 | protected $intervals = []; |
||
26 | |||
27 | /** @var bool */ |
||
28 | protected $readOnly = false; |
||
29 | |||
30 | /** |
||
31 | * Create a set with a single element, el. |
||
32 | */ |
||
33 | 2 | public static function fromInt(int $number) : self |
|
34 | { |
||
35 | 2 | $set = new self(); |
|
36 | |||
37 | 2 | $set->addOne($number); |
|
38 | |||
39 | 2 | return $set; |
|
40 | } |
||
41 | |||
42 | /** |
||
43 | * Create a set with all ints within range [start..end] (inclusive). |
||
44 | */ |
||
45 | public static function fromRange(int $start, int $end) : self |
||
46 | { |
||
47 | $set = new self(); |
||
48 | |||
49 | $set->addRange($start, $end); |
||
50 | |||
51 | return $set; |
||
52 | } |
||
53 | |||
54 | public function complement(IntervalSet $set) : ?self |
||
55 | { |
||
56 | if ($set->isNull()) { |
||
57 | return null; |
||
58 | } |
||
59 | |||
60 | return $set->subtract($this); |
||
61 | } |
||
62 | |||
63 | public function subtract(IntervalSet $set) : self |
||
64 | { |
||
65 | if ($set->isNull()) { |
||
66 | return $this; |
||
67 | } |
||
68 | |||
69 | return self::subtractSets($this, $set); |
||
70 | } |
||
71 | |||
72 | public function orSet(IntervalSet $other) : self |
||
73 | { |
||
74 | $result = new self(); |
||
75 | |||
76 | $result->addSet($this); |
||
77 | $result->addSet($other); |
||
78 | |||
79 | return $result; |
||
80 | } |
||
81 | |||
82 | /** |
||
83 | * Compute the set difference between two interval sets. The specific |
||
84 | * operation is `left - right`. If either of the input sets is `null`, |
||
85 | * it is treated as though it was an empty set. |
||
86 | */ |
||
87 | public static function subtractSets(IntervalSet $left, IntervalSet $right) : self |
||
88 | { |
||
89 | if ($left->isNull()) { |
||
90 | return new self(); |
||
91 | } |
||
92 | |||
93 | if ($right->isNull()) { |
||
94 | // right set has no elements; just return the copy of the current set |
||
95 | return $left; |
||
96 | } |
||
97 | |||
98 | $result = $left; |
||
99 | $resultI = 0; |
||
100 | $rightI = 0; |
||
101 | while ($resultI < \count($result->intervals) && $rightI < \count($right->intervals)) { |
||
102 | $resultInterval = $result->intervals[$resultI]; |
||
103 | $rightInterval = $right->intervals[$rightI]; |
||
104 | |||
105 | // operation: (resultInterval - rightInterval) and update indexes |
||
106 | |||
107 | if ($rightInterval->stop < $resultInterval->start) { |
||
108 | $rightI++; |
||
109 | |||
110 | continue; |
||
111 | } |
||
112 | |||
113 | if ($rightInterval->start > $resultInterval->stop) { |
||
114 | $resultI++; |
||
115 | |||
116 | continue; |
||
117 | } |
||
118 | |||
119 | $beforeCurrent = null; |
||
120 | $afterCurrent = null; |
||
121 | if ($rightInterval->start > $resultInterval->start) { |
||
122 | $beforeCurrent = new Interval($resultInterval->start, $rightInterval->start - 1); |
||
123 | } |
||
124 | |||
125 | if ($rightInterval->stop < $resultInterval->stop) { |
||
126 | $afterCurrent = new Interval($rightInterval->stop + 1, $resultInterval->stop); |
||
127 | } |
||
128 | |||
129 | if ($beforeCurrent !== null) { |
||
130 | if ($afterCurrent !== null) { |
||
131 | // split the current interval into two |
||
132 | $result->intervals[$resultI] = $beforeCurrent; |
||
133 | $result->intervals[$resultI + 1] = $afterCurrent; |
||
134 | $resultI++; |
||
135 | $rightI++; |
||
136 | |||
137 | continue; |
||
138 | } |
||
139 | |||
140 | // replace the current interval |
||
141 | $result->intervals[$resultI] = $beforeCurrent; |
||
142 | $resultI++; |
||
143 | continue; |
||
144 | } |
||
145 | |||
146 | if ($afterCurrent !== null) { |
||
147 | // replace the current interval |
||
148 | $result->intervals[$resultI] = $afterCurrent; |
||
149 | $rightI++; |
||
150 | |||
151 | continue; |
||
152 | } |
||
153 | |||
154 | // remove the current interval (thus no need to increment resultI) |
||
155 | \array_splice($result->intervals, $resultI, 1); |
||
156 | |||
157 | continue; |
||
158 | } |
||
159 | |||
160 | // If rightI reached right.intervals.size(), no more intervals to subtract from result. |
||
161 | // If resultI reached result.intervals.size(), we would be subtracting from an empty set. |
||
162 | // Either way, we are done. |
||
163 | return $result; |
||
164 | } |
||
165 | |||
166 | public function isReadOnly() : bool |
||
167 | { |
||
168 | return $this->readOnly; |
||
169 | } |
||
170 | |||
171 | 2 | public function setReadOnly(bool $readOnly) : void |
|
172 | { |
||
173 | 2 | $this->readOnly = $readOnly; |
|
174 | 2 | } |
|
175 | |||
176 | public function first() : int |
||
177 | { |
||
178 | if ($this->intervals === null || \count($this->intervals) === 0) { |
||
179 | return Token::INVALID_TYPE; |
||
180 | } |
||
181 | |||
182 | return $this->intervals[0]->start; |
||
183 | } |
||
184 | |||
185 | 7 | public function addOne(int $value) : void |
|
186 | { |
||
187 | 7 | $this->addInterval(new Interval($value, $value)); |
|
188 | 7 | } |
|
189 | |||
190 | 1 | public function addRange(int $left, int $right) : void |
|
191 | { |
||
192 | 1 | $this->addInterval(new Interval($left, $right)); |
|
193 | 1 | } |
|
194 | |||
195 | 7 | protected function addInterval(Interval $addition) : void |
|
196 | { |
||
197 | 7 | if ($this->readOnly) { |
|
198 | throw new \InvalidArgumentException('Can\'t alter readonly IntervalSet.'); |
||
199 | } |
||
200 | |||
201 | 7 | if ($addition->stop < $addition->start) { |
|
202 | return; |
||
203 | } |
||
204 | |||
205 | // find position in list |
||
206 | // Use iterators as we modify list in place |
||
207 | 7 | for ($i = 0, $count = \count($this->intervals); $i < $count; $i++) { |
|
208 | /** @var Interval $resilt */ |
||
209 | 4 | $resilt = $this->intervals[$i]; |
|
210 | |||
211 | 4 | if ($addition->equals($resilt)) { |
|
212 | 4 | return; |
|
213 | } |
||
214 | |||
215 | 2 | if ($addition->adjacent($resilt) || !$addition->disjoint($resilt)) { |
|
216 | // next to each other, make a single larger interval |
||
217 | 2 | $bigger = $addition->union($resilt); |
|
218 | 2 | $this->intervals[$i] = $bigger; |
|
219 | |||
220 | // make sure we didn't just create an interval that |
||
221 | // should be merged with next interval in list |
||
222 | 2 | $i++; |
|
223 | 2 | while ($i < \count($this->intervals)) { |
|
224 | 2 | $next = $this->intervals[$i]; |
|
225 | |||
226 | 2 | if (!$bigger->adjacent($next) && $bigger->disjoint($next)) { |
|
227 | 2 | break; |
|
228 | } |
||
229 | |||
230 | // if we bump up against or overlap next, merge |
||
231 | \array_splice($this->intervals, $i, 1); // remove this one |
||
232 | |||
233 | $i--; // move backwards to what we just set |
||
234 | |||
235 | $this->intervals[$i] = $bigger->union($next); // set to 3 merged ones |
||
236 | |||
237 | $i++; // first call to next after previous duplicates the result |
||
238 | } |
||
239 | |||
240 | 2 | return; |
|
241 | } |
||
242 | |||
243 | 2 | if ($addition->startsBeforeDisjoint($resilt)) { |
|
244 | // insert before r |
||
245 | 2 | \array_splice($this->intervals, $i, 0, [$addition]); |
|
246 | |||
247 | 2 | return; |
|
248 | } |
||
249 | |||
250 | // if disjoint and after r, a future iteration will handle it |
||
251 | } |
||
252 | |||
253 | // ok, must be after last interval (and disjoint from last interval) just add it |
||
254 | 7 | $this->intervals[] = $addition; |
|
255 | 7 | } |
|
256 | |||
257 | 2 | public function addSet(IntervalSet $other) : self |
|
258 | { |
||
259 | 2 | if ($other->intervals !== null) { |
|
260 | 2 | foreach ($other->intervals as $i) { |
|
261 | 2 | $this->addInterval(new Interval($i->start, $i->stop)); |
|
262 | } |
||
263 | } |
||
264 | |||
265 | 2 | return $this; |
|
266 | } |
||
267 | |||
268 | 7 | public function contains(int $item) : bool |
|
269 | { |
||
270 | 7 | $count = \count($this->intervals); |
|
271 | 7 | $left = 0; |
|
272 | 7 | $right = $count - 1; |
|
273 | // Binary search for the element in the (sorted, disjoint) array of intervals. |
||
274 | |||
275 | 7 | while ($left <= $right) { |
|
276 | 7 | $m = \intval($left + $right, 2); |
|
277 | |||
278 | 7 | $interval = $this->intervals[$m]; |
|
279 | 7 | $start = $interval->start; |
|
280 | 7 | $stop = $interval->stop; |
|
281 | |||
282 | 7 | if ($stop < $item) { |
|
283 | 5 | $left = $m + 1; |
|
284 | 7 | } elseif ($start > $item) { |
|
285 | 7 | $right = $m - 1; |
|
286 | } else { // item >= start && item <= stop |
||
287 | 5 | return true; |
|
288 | } |
||
289 | } |
||
290 | |||
291 | 7 | return false; |
|
292 | } |
||
293 | |||
294 | 7 | public function length() : int |
|
295 | { |
||
296 | 7 | $length = 0; |
|
297 | |||
298 | 7 | foreach ($this->intervals as $i) { |
|
299 | 7 | $length += $i->getLength(); |
|
300 | } |
||
301 | |||
302 | 7 | return $length; |
|
303 | } |
||
304 | |||
305 | 4 | public function removeOne(int $v) : void |
|
306 | { |
||
307 | 4 | foreach ($this->intervals as $k => $i) { |
|
308 | // intervals is ordered |
||
309 | 1 | if ($v < $i->start) { |
|
310 | 1 | return; |
|
311 | } |
||
312 | |||
313 | // check for single value range |
||
314 | 1 | if ($v === $i->start && $v === $i->stop) { |
|
315 | 1 | \array_splice($this->intervals, $k, 1); |
|
316 | |||
317 | 1 | return; |
|
318 | } |
||
319 | |||
320 | // check for lower boundary |
||
321 | if ($v === $i->start) { |
||
322 | $this->intervals[$k] = new Interval($i->start + 1, $i->stop); |
||
323 | |||
324 | return; |
||
325 | } |
||
326 | |||
327 | // check for upper boundary |
||
328 | if ($v === $i->stop - 1) { |
||
329 | $this->intervals[$k] = new Interval($i->start, $i->stop - 1); |
||
330 | |||
331 | return; |
||
332 | } |
||
333 | |||
334 | // split existing range |
||
335 | if ($v < $i->stop - 1) { |
||
336 | $x = new Interval($i->start, $v); |
||
337 | |||
338 | $i->start = $v + 1; |
||
339 | |||
340 | \array_splice($this->intervals, $k, 0, [$x]); |
||
341 | |||
342 | return; |
||
343 | } |
||
344 | } |
||
345 | 4 | } |
|
346 | |||
347 | 4 | public function isNull() : bool |
|
348 | { |
||
349 | 4 | return \count($this->intervals) === 0; |
|
350 | } |
||
351 | |||
352 | /** |
||
353 | * Returns the maximum value contained in the set if not isNull(). |
||
354 | * |
||
355 | * @return int The maximum value contained in the set. |
||
356 | * |
||
357 | * @throws \RuntimeException If set is empty. |
||
358 | */ |
||
359 | public function getMaxElement() : int |
||
360 | { |
||
361 | if ($this->isNull()) { |
||
362 | throw new \RuntimeException('The set is empty.'); |
||
363 | } |
||
364 | |||
365 | return $this->intervals[\count($this->intervals)-1]->stop; |
||
366 | } |
||
367 | |||
368 | /** |
||
369 | * Returns the minimum value contained in the set if not isNull(). |
||
370 | * |
||
371 | * @return int The minimum value contained in the set. |
||
372 | * |
||
373 | * @throws \RuntimeException If set is empty. |
||
374 | */ |
||
375 | 4 | public function getMinElement() : int |
|
376 | { |
||
377 | 4 | if ($this->isNull()) { |
|
378 | throw new \RuntimeException('The set is empty.'); |
||
379 | } |
||
380 | |||
381 | 4 | return $this->intervals[0]->start; |
|
382 | } |
||
383 | |||
384 | public function toStringChars(bool $elemAreChar) : string |
||
385 | { |
||
386 | if (!$this->intervals) { |
||
0 ignored issues
–
show
|
|||
387 | return '{}'; |
||
388 | } |
||
389 | |||
390 | $buf = ''; |
||
391 | |||
392 | if ($this->length() > 1) { |
||
393 | $buf .= '{'; |
||
394 | } |
||
395 | |||
396 | $iter = new \ArrayIterator($this->intervals); |
||
397 | |||
398 | while ($iter->valid()) { |
||
399 | $interval = $iter->current(); |
||
400 | $iter->next(); |
||
401 | $start = $interval->start; |
||
402 | $stop = $interval->stop; |
||
403 | |||
404 | if ($start === $stop) { |
||
405 | if ($start === Token::EOF) { |
||
406 | $buf .= '<EOF>'; |
||
407 | } elseif ($elemAreChar) { |
||
408 | $buf .= '\'' . StringUtils::char($start) . '\''; |
||
409 | } else { |
||
410 | $buf .= $start; |
||
411 | } |
||
412 | } else { |
||
413 | if ($elemAreChar) { |
||
414 | $buf .= \sprintf( |
||
415 | '\'%s\'..\'%s\'', |
||
416 | StringUtils::char($start), |
||
417 | StringUtils::char($stop) |
||
418 | ); |
||
419 | } else { |
||
420 | $buf .= \sprintf('%s..%s', $start, $stop); |
||
421 | } |
||
422 | } |
||
423 | |||
424 | if ($iter->valid()) { |
||
425 | $buf .= ', '; |
||
426 | } |
||
427 | } |
||
428 | |||
429 | if ($this->length() > 1) { |
||
430 | $buf .= '}'; |
||
431 | } |
||
432 | |||
433 | return $buf; |
||
434 | } |
||
435 | |||
436 | 4 | public function toStringVocabulary(Vocabulary $vocabulary) : string |
|
437 | { |
||
438 | 4 | if (!$this->intervals) { |
|
0 ignored issues
–
show
The expression
$this->intervals of type Antlr\Antlr4\Runtime\Interval[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using ![]() |
|||
439 | return '{}'; |
||
440 | } |
||
441 | |||
442 | 4 | $buf = ''; |
|
443 | 4 | if ($this->length() > 1) { |
|
444 | 4 | $buf .= '{'; |
|
445 | } |
||
446 | |||
447 | 4 | $iterator = new \ArrayIterator($this->intervals); |
|
448 | |||
449 | 4 | while ($iterator->valid()) { |
|
450 | 4 | $interval = $iterator->current(); |
|
451 | 4 | $iterator->next(); |
|
452 | 4 | $start = $interval->start; |
|
453 | 4 | $stop = $interval->stop; |
|
454 | |||
455 | 4 | if ($start === $stop) { |
|
456 | 4 | $buf .= $this->elementName($vocabulary, $start); |
|
457 | } else { |
||
458 | 4 | for ($i = $start; $i <= $stop; $i++) { |
|
459 | 4 | if ($i > $start) { |
|
460 | 4 | $buf .= ', '; |
|
461 | } |
||
462 | |||
463 | 4 | $buf .= $this->elementName($vocabulary, $i); |
|
464 | } |
||
465 | } |
||
466 | |||
467 | 4 | if ($iterator->valid()) { |
|
468 | 4 | $buf .= ', '; |
|
469 | } |
||
470 | } |
||
471 | |||
472 | 4 | if ($this->length() > 1) { |
|
473 | 4 | $buf .= '}'; |
|
474 | } |
||
475 | |||
476 | 4 | return $buf; |
|
477 | } |
||
478 | |||
479 | 4 | protected function elementName(Vocabulary $vocabulary, int $a) : string |
|
480 | { |
||
481 | 4 | if ($a === Token::EOF) { |
|
482 | return '<EOF>'; |
||
483 | } |
||
484 | |||
485 | 4 | if ($a === Token::EPSILON) { |
|
486 | return '<EPSILON>'; |
||
487 | } |
||
488 | |||
489 | 4 | return $vocabulary->getDisplayName($a); |
|
490 | } |
||
491 | |||
492 | public function equals(object $other) : bool |
||
493 | { |
||
494 | if ($this === $other) { |
||
495 | return true; |
||
496 | } |
||
497 | |||
498 | if (!$other instanceof self) { |
||
499 | return false; |
||
500 | } |
||
501 | |||
502 | return $this->readOnly === $other->readOnly |
||
503 | && Equality::equals($this->intervals, $other->intervals); |
||
504 | } |
||
505 | |||
506 | /** |
||
507 | * @return array<int> |
||
508 | */ |
||
509 | public function toArray() : array |
||
510 | { |
||
511 | $values = []; |
||
512 | foreach ($this->intervals as $interval) { |
||
513 | $start = $interval->start; |
||
514 | $stop = $interval->stop; |
||
515 | |||
516 | for ($value = $start; $value <= $stop; $value++) { |
||
517 | $values[] = $value; |
||
518 | } |
||
519 | } |
||
520 | |||
521 | return $values; |
||
522 | } |
||
523 | |||
524 | public function __toString() : string |
||
525 | { |
||
526 | return $this->toStringChars(false); |
||
527 | } |
||
528 | } |
||
529 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.