Issues (67)

src/IntervalSet.php (2 issues)

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Antlr\Antlr4\Runtime;
6
7
use Antlr\Antlr4\Runtime\Comparison\Equality;
8
use Antlr\Antlr4\Runtime\Comparison\Equatable;
9
use Antlr\Antlr4\Runtime\Utils\StringUtils;
10
11
/**
12
 * This class implements the {@see IntSet} backed by a sorted array of
13
 * non-overlapping intervals. It is particularly efficient for representing
14
 * large collections of numbers, where the majority of elements appear as part
15
 * of a sequential range of numbers that are all part of the set. For example,
16
 * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }.
17
 *
18
 * This class is able to represent sets containing any combination of values in
19
 * the range {@see Integer::MIN_VALUE} to {@see Integer::MAX_VALUE}
20
 * (inclusive).
21
 */
22
final class IntervalSet implements Equatable
23
{
24
    /** @var array<Interval> */
25
    protected $intervals = [];
26
27
    /** @var bool */
28
    protected $readOnly = false;
29
30
    /**
31
     * Create a set with a single element, el.
32
     */
33 2
    public static function fromInt(int $number) : self
34
    {
35 2
        $set = new self();
36
37 2
        $set->addOne($number);
38
39 2
        return $set;
40
    }
41
42
    /**
43
     * Create a set with all ints within range [start..end] (inclusive).
44
     */
45
    public static function fromRange(int $start, int $end) : self
46
    {
47
        $set = new self();
48
49
        $set->addRange($start, $end);
50
51
        return $set;
52
    }
53
54
    public function complement(IntervalSet $set) : ?self
55
    {
56
        if ($set->isNull()) {
57
            return null;
58
        }
59
60
        return $set->subtract($this);
61
    }
62
63
    public function subtract(IntervalSet $set) : self
64
    {
65
        if ($set->isNull()) {
66
            return $this;
67
        }
68
69
        return self::subtractSets($this, $set);
70
    }
71
72
    public function orSet(IntervalSet $other) : self
73
    {
74
        $result = new self();
75
76
        $result->addSet($this);
77
        $result->addSet($other);
78
79
        return $result;
80
    }
81
82
    /**
83
     * Compute the set difference between two interval sets. The specific
84
     * operation is `left - right`. If either of the input sets is `null`,
85
     * it is treated as though it was an empty set.
86
     */
87
    public static function subtractSets(IntervalSet $left, IntervalSet $right) : self
88
    {
89
        if ($left->isNull()) {
90
            return new self();
91
        }
92
93
        if ($right->isNull()) {
94
            // right set has no elements; just return the copy of the current set
95
            return $left;
96
        }
97
98
        $result = $left;
99
        $resultI = 0;
100
        $rightI = 0;
101
        while ($resultI < \count($result->intervals) && $rightI < \count($right->intervals)) {
102
            $resultInterval = $result->intervals[$resultI];
103
            $rightInterval = $right->intervals[$rightI];
104
105
            // operation: (resultInterval - rightInterval) and update indexes
106
107
            if ($rightInterval->stop < $resultInterval->start) {
108
                $rightI++;
109
110
                continue;
111
            }
112
113
            if ($rightInterval->start > $resultInterval->stop) {
114
                $resultI++;
115
116
                continue;
117
            }
118
119
            $beforeCurrent = null;
120
            $afterCurrent = null;
121
            if ($rightInterval->start > $resultInterval->start) {
122
                $beforeCurrent = new Interval($resultInterval->start, $rightInterval->start - 1);
123
            }
124
125
            if ($rightInterval->stop < $resultInterval->stop) {
126
                $afterCurrent = new Interval($rightInterval->stop + 1, $resultInterval->stop);
127
            }
128
129
            if ($beforeCurrent !== null) {
130
                if ($afterCurrent !== null) {
131
                    // split the current interval into two
132
                    $result->intervals[$resultI] = $beforeCurrent;
133
                    $result->intervals[$resultI + 1] = $afterCurrent;
134
                    $resultI++;
135
                    $rightI++;
136
137
                    continue;
138
                }
139
140
                // replace the current interval
141
                $result->intervals[$resultI] = $beforeCurrent;
142
                $resultI++;
143
                continue;
144
            }
145
146
            if ($afterCurrent !== null) {
147
                // replace the current interval
148
                $result->intervals[$resultI] = $afterCurrent;
149
                $rightI++;
150
151
                continue;
152
            }
153
154
            // remove the current interval (thus no need to increment resultI)
155
            \array_splice($result->intervals, $resultI, 1);
156
157
            continue;
158
        }
159
160
        // If rightI reached right.intervals.size(), no more intervals to subtract from result.
161
        // If resultI reached result.intervals.size(), we would be subtracting from an empty set.
162
        // Either way, we are done.
163
        return $result;
164
    }
165
166
    public function isReadOnly() : bool
167
    {
168
        return $this->readOnly;
169
    }
170
171 2
    public function setReadOnly(bool $readOnly) : void
172
    {
173 2
        $this->readOnly = $readOnly;
174 2
    }
175
176
    public function first() : int
177
    {
178
        if ($this->intervals === null || \count($this->intervals) === 0) {
179
            return Token::INVALID_TYPE;
180
        }
181
182
        return $this->intervals[0]->start;
183
    }
184
185 7
    public function addOne(int $value) : void
186
    {
187 7
        $this->addInterval(new Interval($value, $value));
188 7
    }
189
190 1
    public function addRange(int $left, int $right) : void
191
    {
192 1
        $this->addInterval(new Interval($left, $right));
193 1
    }
194
195 7
    protected function addInterval(Interval $addition) : void
196
    {
197 7
        if ($this->readOnly) {
198
            throw new \InvalidArgumentException('Can\'t alter readonly IntervalSet.');
199
        }
200
201 7
        if ($addition->stop < $addition->start) {
202
            return;
203
        }
204
205
        // find position in list
206
        // Use iterators as we modify list in place
207 7
        for ($i = 0, $count = \count($this->intervals); $i < $count; $i++) {
208
            /** @var Interval $resilt */
209 4
            $resilt = $this->intervals[$i];
210
211 4
            if ($addition->equals($resilt)) {
212 4
                return;
213
            }
214
215 2
            if ($addition->adjacent($resilt) || !$addition->disjoint($resilt)) {
216
                // next to each other, make a single larger interval
217 2
                $bigger = $addition->union($resilt);
218 2
                $this->intervals[$i] = $bigger;
219
220
                // make sure we didn't just create an interval that
221
                // should be merged with next interval in list
222 2
                $i++;
223 2
                while ($i < \count($this->intervals)) {
224 2
                    $next = $this->intervals[$i];
225
226 2
                    if (!$bigger->adjacent($next) && $bigger->disjoint($next)) {
227 2
                        break;
228
                    }
229
230
                    // if we bump up against or overlap next, merge
231
                    \array_splice($this->intervals, $i, 1); // remove this one
232
233
                    $i--; // move backwards to what we just set
234
235
                    $this->intervals[$i] = $bigger->union($next); // set to 3 merged ones
236
237
                    $i++; // first call to next after previous duplicates the result
238
                }
239
240 2
                return;
241
            }
242
243 2
            if ($addition->startsBeforeDisjoint($resilt)) {
244
                // insert before r
245 2
                \array_splice($this->intervals, $i, 0, [$addition]);
246
247 2
                return;
248
            }
249
250
            // if disjoint and after r, a future iteration will handle it
251
        }
252
253
        // ok, must be after last interval (and disjoint from last interval) just add it
254 7
        $this->intervals[] = $addition;
255 7
    }
256
257 2
    public function addSet(IntervalSet $other) : self
258
    {
259 2
        if ($other->intervals !== null) {
260 2
            foreach ($other->intervals as $i) {
261 2
                $this->addInterval(new Interval($i->start, $i->stop));
262
            }
263
        }
264
265 2
        return $this;
266
    }
267
268 7
    public function contains(int $item) : bool
269
    {
270 7
        $count = \count($this->intervals);
271 7
        $left = 0;
272 7
        $right = $count - 1;
273
        // Binary search for the element in the (sorted, disjoint) array of intervals.
274
275 7
        while ($left <= $right) {
276 7
            $m = \intval($left + $right, 2);
277
278 7
            $interval = $this->intervals[$m];
279 7
            $start = $interval->start;
280 7
            $stop = $interval->stop;
281
282 7
            if ($stop < $item) {
283 5
                $left = $m + 1;
284 7
            } elseif ($start > $item) {
285 7
                $right = $m - 1;
286
            } else { // item >= start && item <= stop
287 5
                return true;
288
            }
289
        }
290
291 7
        return false;
292
    }
293
294 7
    public function length() : int
295
    {
296 7
        $length = 0;
297
298 7
        foreach ($this->intervals as $i) {
299 7
            $length += $i->getLength();
300
        }
301
302 7
        return $length;
303
    }
304
305 4
    public function removeOne(int $v) : void
306
    {
307 4
        foreach ($this->intervals as $k => $i) {
308
            // intervals is ordered
309 1
            if ($v < $i->start) {
310 1
                return;
311
            }
312
313
            // check for single value range
314 1
            if ($v === $i->start && $v === $i->stop) {
315 1
                \array_splice($this->intervals, $k, 1);
316
317 1
                return;
318
            }
319
320
            // check for lower boundary
321
            if ($v === $i->start) {
322
                $this->intervals[$k] = new Interval($i->start + 1, $i->stop);
323
324
                return;
325
            }
326
327
            // check for upper boundary
328
            if ($v === $i->stop - 1) {
329
                $this->intervals[$k] = new Interval($i->start, $i->stop - 1);
330
331
                return;
332
            }
333
334
            // split existing range
335
            if ($v < $i->stop - 1) {
336
                $x = new Interval($i->start, $v);
337
338
                $i->start = $v + 1;
339
340
                \array_splice($this->intervals, $k, 0, [$x]);
341
342
                return;
343
            }
344
        }
345 4
    }
346
347 4
    public function isNull() : bool
348
    {
349 4
        return \count($this->intervals) === 0;
350
    }
351
352
    /**
353
     * Returns the maximum value contained in the set if not isNull().
354
     *
355
     * @return int The maximum value contained in the set.
356
     *
357
     * @throws \RuntimeException If set is empty.
358
     */
359
    public function getMaxElement() : int
360
    {
361
        if ($this->isNull()) {
362
            throw new \RuntimeException('The set is empty.');
363
        }
364
365
        return $this->intervals[\count($this->intervals)-1]->stop;
366
    }
367
368
    /**
369
     * Returns the minimum value contained in the set if not isNull().
370
     *
371
     * @return int The minimum value contained in the set.
372
     *
373
     * @throws \RuntimeException If set is empty.
374
     */
375 4
    public function getMinElement() : int
376
    {
377 4
        if ($this->isNull()) {
378
            throw new \RuntimeException('The set is empty.');
379
        }
380
381 4
        return $this->intervals[0]->start;
382
    }
383
384
    public function toStringChars(bool $elemAreChar) : string
385
    {
386
        if (!$this->intervals) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->intervals of type Antlr\Antlr4\Runtime\Interval[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
387
            return '{}';
388
        }
389
390
        $buf = '';
391
392
        if ($this->length() > 1) {
393
            $buf .= '{';
394
        }
395
396
        $iter = new \ArrayIterator($this->intervals);
397
398
        while ($iter->valid()) {
399
            $interval = $iter->current();
400
            $iter->next();
401
            $start = $interval->start;
402
            $stop = $interval->stop;
403
404
            if ($start === $stop) {
405
                if ($start === Token::EOF) {
406
                    $buf .= '<EOF>';
407
                } elseif ($elemAreChar) {
408
                    $buf .= '\'' . StringUtils::char($start) . '\'';
409
                } else {
410
                    $buf .= $start;
411
                }
412
            } else {
413
                if ($elemAreChar) {
414
                    $buf .= \sprintf(
415
                        '\'%s\'..\'%s\'',
416
                        StringUtils::char($start),
417
                        StringUtils::char($stop)
418
                    );
419
                } else {
420
                    $buf .= \sprintf('%s..%s', $start, $stop);
421
                }
422
            }
423
424
            if ($iter->valid()) {
425
                $buf .= ', ';
426
            }
427
        }
428
429
        if ($this->length() > 1) {
430
            $buf .= '}';
431
        }
432
433
        return $buf;
434
    }
435
436 4
    public function toStringVocabulary(Vocabulary $vocabulary) : string
437
    {
438 4
        if (!$this->intervals) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->intervals of type Antlr\Antlr4\Runtime\Interval[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
439
            return '{}';
440
        }
441
442 4
        $buf = '';
443 4
        if ($this->length() > 1) {
444 4
            $buf .= '{';
445
        }
446
447 4
        $iterator = new \ArrayIterator($this->intervals);
448
449 4
        while ($iterator->valid()) {
450 4
            $interval = $iterator->current();
451 4
            $iterator->next();
452 4
            $start = $interval->start;
453 4
            $stop = $interval->stop;
454
455 4
            if ($start === $stop) {
456 4
                $buf .= $this->elementName($vocabulary, $start);
457
            } else {
458 4
                for ($i = $start; $i <= $stop; $i++) {
459 4
                    if ($i > $start) {
460 4
                        $buf .= ', ';
461
                    }
462
463 4
                    $buf .= $this->elementName($vocabulary, $i);
464
                }
465
            }
466
467 4
            if ($iterator->valid()) {
468 4
                $buf .= ', ';
469
            }
470
        }
471
472 4
        if ($this->length() > 1) {
473 4
            $buf .= '}';
474
        }
475
476 4
        return $buf;
477
    }
478
479 4
    protected function elementName(Vocabulary $vocabulary, int $a) : string
480
    {
481 4
        if ($a === Token::EOF) {
482
            return '<EOF>';
483
        }
484
485 4
        if ($a === Token::EPSILON) {
486
            return '<EPSILON>';
487
        }
488
489 4
        return $vocabulary->getDisplayName($a);
490
    }
491
492
    public function equals(object $other) : bool
493
    {
494
        if ($this === $other) {
495
            return true;
496
        }
497
498
        if (!$other instanceof self) {
499
            return false;
500
        }
501
502
        return $this->readOnly === $other->readOnly
503
            && Equality::equals($this->intervals, $other->intervals);
504
    }
505
506
    /**
507
     * @return array<int>
508
     */
509
    public function toArray() : array
510
    {
511
        $values = [];
512
        foreach ($this->intervals as $interval) {
513
            $start = $interval->start;
514
            $stop = $interval->stop;
515
516
            for ($value = $start; $value <= $stop; $value++) {
517
                $values[] = $value;
518
            }
519
        }
520
521
        return $values;
522
    }
523
524
    public function __toString() : string
525
    {
526
        return $this->toStringChars(false);
527
    }
528
}
529