RightToLeftSupport::spanLtrRtl()   F
last analyzed

Complexity

Conditions 82
Paths > 20000

Size

Total Lines 400
Code Lines 253

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 82
eloc 253
c 0
b 0
f 0
nc 22632961
nop 1
dl 0
loc 400
rs 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2023 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Report;
21
22
use Fisharebest\Webtrees\I18N;
23
24
use function ord;
25
use function preg_replace;
26
use function str_contains;
27
use function str_pad;
28
use function str_replace;
29
use function strlen;
30
use function strpos;
31
use function strrpos;
32
use function strtolower;
33
use function strtoupper;
34
use function substr;
35
36
use const STR_PAD_LEFT;
37
use const STR_PAD_RIGHT;
38
39
/**
40
 * RTL Functions for use in the PDF reports
41
 */
42
class RightToLeftSupport
43
{
44
    private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark:  zero-width character with LTR directionality)
45
    private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark:  zero-width character with RTL directionality)
46
    private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode)
47
    private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode)
48
    private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text)
49
    private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text)
50
    private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE)
51
52
    private const OPEN_PARENTHESES = '([{';
53
54
    private const CLOSE_PARENTHESES = ')]}';
55
56
    private const NUMBERS = '0123456789';
57
58
    private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings
59
60
    private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings
61
62
    private const PUNCTUATION = ',.:;?!';
63
64
    // Markup
65
    private const START_LTR    = '<LTR>';
66
    private const END_LTR      = '</LTR>';
67
    private const START_RTL    = '<RTL>';
68
    private const END_RTL      = '</RTL>';
69
    private const LENGTH_START = 5;
70
    private const LENGTH_END   = 6;
71
72
    /* Were we previously processing LTR or RTL. */
73
    private static string $previousState;
74
75
    /* Are we currently processing LTR or RTL. */
76
    private static string $currentState;
77
78
    /* Text waiting to be processed. */
79
    private static string $waitingText;
80
81
    /* Offset into the text. */
82
    private static int $posSpanStart;
83
84
    /**
85
     * This function strips &lrm; and &rlm; from the input string. It should be used for all
86
     * text that has been passed through the PrintReady() function before that text is stored
87
     * in the database. The database should NEVER contain these characters.
88
     *
89
     * @param string $inputText The string from which the &lrm; and &rlm; characters should be stripped
90
     *
91
     * @return string The input string, with &lrm; and &rlm; stripped
92
     */
93
    private static function stripLrmRlm(string $inputText): string
94
    {
95
        return str_replace([
96
            self::UTF8_LRM,
97
            self::UTF8_RLM,
98
            self::UTF8_LRO,
99
            self::UTF8_RLO,
100
            self::UTF8_LRE,
101
            self::UTF8_RLE,
102
            self::UTF8_PDF,
103
            '&lrm;',
104
            '&rlm;',
105
            '&LRM;',
106
            '&RLM;',
107
        ], '', $inputText);
108
    }
109
110
    /**
111
     * This function encapsulates all texts in the input with <span dir='xxx'> and </span>
112
     * according to the directionality specified.
113
     *
114
     * @param string $inputText Raw input
115
     *
116
     * @return string The string with all texts encapsulated as required
117
     */
118
    public static function spanLtrRtl(string $inputText): string
119
    {
120
        if ($inputText === '') {
121
            // Nothing to do
122
            return '';
123
        }
124
125
        $workingText = str_replace("\n", '<br>', $inputText);
126
        $workingText = str_replace([
127
            '<span class="starredname"><br>',
128
            '<span<br>class="starredname">',
129
        ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks
130
        $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes
131
132
        self::$previousState = '';
133
        self::$currentState  = strtoupper(I18N::direction());
134
        $numberState         = false; // Set when we're inside a numeric string
135
        $result              = '';
136
        self::$waitingText   = '';
137
        $openParDirection    = [];
138
139
        self::beginCurrentSpan($result);
140
141
        while ($workingText !== '') {
142
            $charArray     = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character
143
            $currentLetter = $charArray['letter'];
144
            $currentLen    = $charArray['length'];
145
146
            $openParIndex  = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this?
147
            $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this?
148
149
            switch ($currentLetter) {
150
                case '<':
151
                    // Assume this '<' starts an HTML element
152
                    $endPos = strpos($workingText, '>'); // look for the terminating '>'
153
                    if ($endPos === false) {
154
                        $endPos = 0;
155
                    }
156
                    $currentLen += $endPos;
157
                    $element    = substr($workingText, 0, $currentLen);
158
                    $temp       = strtolower(substr($element, 0, 3));
159
                    if (strlen($element) < 7 && $temp === '<br') {
160
                        if ($numberState) {
161
                            $numberState = false;
162
                            if (self::$currentState === 'RTL') {
163
                                self::$waitingText .= self::UTF8_PDF;
164
                            }
165
                        }
166
                        self::breakCurrentSpan($result);
167
                    } elseif (self::$waitingText === '') {
168
                        $result .= $element;
169
                    } else {
170
                        self::$waitingText .= $element;
171
                    }
172
                    $workingText = substr($workingText, $currentLen);
173
                    break;
174
                case '&':
175
                    // Assume this '&' starts an HTML entity
176
                    $endPos = strpos($workingText, ';'); // look for the terminating ';'
177
                    if ($endPos === false) {
178
                        $endPos = 0;
179
                    }
180
                    $currentLen += $endPos;
181
                    $entity     = substr($workingText, 0, $currentLen);
182
                    if (strtolower($entity) === '&nbsp;') {
183
                        $entity = '&nbsp;'; // Ensure consistent case for this entity
184
                    }
185
                    if (self::$waitingText === '') {
186
                        $result .= $entity;
187
                    } else {
188
                        self::$waitingText .= $entity;
189
                    }
190
                    $workingText = substr($workingText, $currentLen);
191
                    break;
192
                case '{':
193
                    if (substr($workingText, 1, 1) === '{') {
194
                        // Assume this '{{' starts a TCPDF directive
195
                        $endPos = strpos($workingText, '}}'); // look for the terminating '}}'
196
                        if ($endPos === false) {
197
                            $endPos = 0;
198
                        }
199
                        $currentLen        = $endPos + 2;
200
                        $directive         = substr($workingText, 0, $currentLen);
201
                        $workingText       = substr($workingText, $currentLen);
202
                        $result            .= self::$waitingText . $directive;
203
                        self::$waitingText = '';
204
                        break;
205
                    }
206
                    // no break
207
                default:
208
                    // Look for strings of numbers with optional leading or trailing + or -
209
                    // and with optional embedded numeric punctuation
210
                    if ($numberState) {
211
                        // If we're inside a numeric string, look for reasons to end it
212
                        $offset    = 0; // Be sure to look at the current character first
213
                        $charArray = self::getChar($workingText . "\n", $offset);
214
                        if (!str_contains(self::NUMBERS, $charArray['letter'])) {
215
                            // This is not a digit. Is it numeric punctuation?
216
                            if (substr($workingText . "\n", $offset, 6) === '&nbsp;') {
217
                                $offset += 6; // This could be numeric punctuation
218
                            } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) {
219
                                $offset += $charArray['length']; // This could be numeric punctuation
220
                            }
221
                            // If the next character is a digit, the current character is numeric punctuation
222
                            $charArray = self::getChar($workingText . "\n", $offset);
223
                            if (!str_contains(self::NUMBERS, $charArray['letter'])) {
224
                                // This is not a digit. End the run of digits and punctuation.
225
                                $numberState = false;
226
                                if (self::$currentState === 'RTL') {
227
                                    if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) {
228
                                        $currentLetter = self::UTF8_PDF . $currentLetter;
229
                                    } else {
230
                                        $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run
231
                                    }
232
                                }
233
                            }
234
                        }
235
                    } elseif (str_contains(self::NUMBER_PREFIX, $currentLetter)) {
236
                        // If we're outside a numeric string, look for reasons to start it
237
                        // This might be a number lead-in
238
                        $offset   = $currentLen;
239
                        $nextChar = substr($workingText . "\n", $offset, 1);
240
                        if (str_contains(self::NUMBERS, $nextChar)) {
241
                            $numberState = true; // We found a digit: the lead-in is therefore numeric
242
                            if (self::$currentState === 'RTL') {
243
                                $currentLetter = self::UTF8_LRE . $currentLetter;
244
                            }
245
                        }
246
                    } elseif (str_contains(self::NUMBERS, $currentLetter)) {
247
                        $numberState = true; // The current letter is a digit
248
                        if (self::$currentState === 'RTL') {
249
                            $currentLetter = self::UTF8_LRE . $currentLetter;
250
                        }
251
                    }
252
253
                    // Determine the directionality of the current UTF-8 character
254
                    $newState = self::$currentState;
255
256
                    while (true) {
257
                        if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') {
258
                            if (self::$currentState === '') {
259
                                $newState = 'RTL';
260
                                break;
261
                            }
262
263
                            if (self::$currentState === 'RTL') {
264
                                break;
265
                            }
266
                            // Switch to RTL only if this isn't a solitary RTL letter
267
                            $tempText = substr($workingText, $currentLen);
268
                            while ($tempText !== '') {
269
                                $nextCharArray = self::getChar($tempText, 0);
270
                                $nextLetter    = $nextCharArray['letter'];
271
                                $nextLen       = $nextCharArray['length'];
272
                                $tempText      = substr($tempText, $nextLen);
273
274
                                if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') {
275
                                    $newState = 'RTL';
276
                                    break 2;
277
                                }
278
279
                                if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) {
280
                                    $newState = 'RTL';
281
                                    break 2;
282
                                }
283
284
                                if ($nextLetter === ' ') {
285
                                    break;
286
                                }
287
                                $nextLetter .= substr($tempText . "\n", 0, 5);
288
                                if ($nextLetter === '&nbsp;') {
289
                                    break;
290
                                }
291
                            }
292
                            // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality
293
                            $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF;
294
                            $newState      = 'LTR';
295
                            break;
296
                        }
297
                        if ($currentLen !== 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') {
298
                            // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR
299
                            $newState = 'LTR';
300
                            break;
301
                        }
302
                        if ($closeParIndex !== false) {
303
                            // This closing parenthesis has to inherit the matching opening parenthesis' directionality
304
                            if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') {
305
                                $newState = $openParDirection[$closeParIndex];
306
                            }
307
                            $openParDirection[$closeParIndex] = '';
308
                            break;
309
                        }
310
                        self::$waitingText .= $currentLetter;
311
                        $workingText       = substr($workingText, $currentLen);
312
                        if ($openParIndex !== false) {
313
                            // Opening parentheses always inherit the following directionality
314
                            while (true) {
315
                                if ($workingText === '') {
316
                                    break;
317
                                }
318
                                if (str_starts_with($workingText, ' ')) {
319
                                    // Spaces following this left parenthesis inherit the following directionality too
320
                                    self::$waitingText .= ' ';
321
                                    $workingText       = substr($workingText, 1);
322
                                    continue;
323
                                }
324
                                if (str_starts_with($workingText, '&nbsp;')) {
325
                                    // Spaces following this left parenthesis inherit the following directionality too
326
                                    self::$waitingText .= '&nbsp;';
327
                                    $workingText       = substr($workingText, 6);
328
                                    continue;
329
                                }
330
                                break;
331
                            }
332
                            $openParDirection[$openParIndex] = '?';
333
                            break 2; // double break because we're waiting for more information
334
                        }
335
336
                        // We have a digit or a "normal" special character.
337
                        //
338
                        // When this character is not at the start of the input string, it inherits the preceding directionality;
339
                        // at the start of the input string, it assumes the following directionality.
340
                        //
341
                        // Exceptions to this rule will be handled later during final clean-up.
342
                        //
343
                        if (self::$currentState !== '') {
344
                            $result            .= self::$waitingText;
345
                            self::$waitingText = '';
346
                        }
347
                        break 2; // double break because we're waiting for more information
348
                    }
349
                    if ($newState !== self::$currentState) {
350
                        // A direction change has occurred
351
                        self::finishCurrentSpan($result);
352
                        self::$previousState = self::$currentState;
353
                        self::$currentState  = $newState;
354
                        self::beginCurrentSpan($result);
355
                    }
356
                    self::$waitingText .= $currentLetter;
357
                    $workingText       = substr($workingText, $currentLen);
358
                    $result            .= self::$waitingText;
359
                    self::$waitingText = '';
360
361
                    foreach ($openParDirection as $index => $value) {
362
                        // Since we now know the proper direction, remember it for all waiting opening parentheses
363
                        if ($value === '?') {
364
                            $openParDirection[$index] = self::$currentState;
365
                        }
366
                    }
367
368
                    break;
369
            }
370
        }
371
372
        // We're done. Finish last <span> if necessary
373
        if ($numberState) {
374
            if (self::$waitingText === '') {
375
                if (self::$currentState === 'RTL') {
376
                    $result .= self::UTF8_PDF;
377
                }
378
            } elseif (self::$currentState === 'RTL') {
379
                self::$waitingText .= self::UTF8_PDF;
380
            }
381
        }
382
        self::finishCurrentSpan($result, true);
383
384
        // Get rid of any waiting text
385
        if (self::$waitingText !== '') {
386
            if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') {
387
                $result .= self::START_RTL;
388
                $result .= self::$waitingText;
389
                $result .= self::END_RTL;
390
            } else {
391
                $result .= self::START_LTR;
392
                $result .= self::$waitingText;
393
                $result .= self::END_LTR;
394
            }
395
            self::$waitingText = '';
396
        }
397
398
        // Lastly, do some more cleanups
399
400
        // Move leading RTL numeric strings to following LTR text
401
        // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text)
402
        while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) {
403
            $spanEnd = strpos($result, self::END_RTL . self::START_LTR);
404
            if ($spanEnd === false) {
405
                break;
406
            }
407
            $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3));
408
            if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') {
409
                break;
410
            }
411
            $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END);
412
            break;
413
        }
414
415
        // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span
416
        if (I18N::direction() === 'rtl') {
417
            $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result);
418
        }
419
420
        // Trim trailing blanks preceding <br> in LTR text
421
        while (self::$previousState !== 'RTL') {
422
            if (str_contains($result, ' <LTRbr>')) {
423
                $result = str_replace(' <LTRbr>', '<LTRbr>', $result);
424
                continue;
425
            }
426
            if (str_contains($result, '&nbsp;<LTRbr>')) {
427
                $result = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $result);
428
                continue;
429
            }
430
            if (str_contains($result, ' <br>')) {
431
                $result = str_replace(' <br>', '<br>', $result);
432
                continue;
433
            }
434
            if (str_contains($result, '&nbsp;<br>')) {
435
                $result = str_replace('&nbsp;<br>', '<br>', $result);
436
                continue;
437
            }
438
            break; // Neither space nor &nbsp; : we're done
439
        }
440
441
        // Trim trailing blanks preceding <br> in RTL text
442
        while (true) {
443
            if (str_contains($result, ' <RTLbr>')) {
444
                $result = str_replace(' <RTLbr>', '<RTLbr>', $result);
445
                continue;
446
            }
447
            if (str_contains($result, '&nbsp;<RTLbr>')) {
448
                $result = str_replace('&nbsp;<RTLbr>', '<RTLbr>', $result);
449
                continue;
450
            }
451
            break; // Neither space nor &nbsp; : we're done
452
        }
453
454
        // Convert '<LTRbr>' and '<RTLbr'
455
        $result = str_replace([
456
            '<LTRbr>',
457
            '<RTLbr>',
458
        ], [
459
            self::END_LTR . '<br>' . self::START_LTR,
460
            self::END_RTL . '<br>' . self::START_RTL,
461
        ], $result);
462
463
        // Include leading indeterminate directional text in whatever follows
464
        if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && !str_starts_with($result . "\n", '<br>')) {
465
            $leadingText = '';
466
            while (true) {
467
                if ($result === '') {
468
                    $result = $leadingText;
469
                    break;
470
                }
471
                if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) {
472
                    $leadingText .= substr($result, 0, 1);
473
                    $result      = substr($result, 1);
474
                    continue;
475
                }
476
                $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START);
477
                break;
478
            }
479
        }
480
481
        // Include solitary "-" and "+" in surrounding RTL text
482
        $result = str_replace([
483
            self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL,
484
            self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL,
485
        ], [
486
            '-',
487
            '+',
488
        ], $result);
489
490
        //$result = strtr($result, [
491
        //    self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL => '-',
492
        //    self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL => '+',
493
        //]);
494
495
        // Remove empty spans
496
        $result = str_replace([
497
            self::START_LTR . self::END_LTR,
498
            self::START_RTL . self::END_RTL,
499
        ], '', $result);
500
501
        // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>'
502
        // LTR text: <span dir="ltr"> text </span>
503
        // RTL text: <span dir="rtl"> text </span>
504
505
        $result = str_replace([
506
            self::START_LTR,
507
            self::END_LTR,
508
            self::START_RTL,
509
            self::END_RTL,
510
        ], [
511
            '<span dir="ltr">',
512
            '</span>',
513
            '<span dir="rtl">',
514
            '</span>',
515
        ], $result);
516
517
        return $result;
518
    }
519
520
    /**
521
     * Wrap words that have an asterisk suffix in <u> and </u> tags.
522
     * This should underline starred names to show the preferred name.
523
     *
524
     * @param string $textSpan
525
     * @param string $direction
526
     *
527
     * @return string
528
     */
529
    private static function starredName(string $textSpan, string $direction): string
530
    {
531
        // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags
532
        // only when page and span directions are identical.
533
        if ($direction === strtoupper(I18N::direction())) {
534
            while (true) {
535
                $starPos = strpos($textSpan, '*');
536
                if ($starPos === false) {
537
                    break;
538
                }
539
                $trailingText = substr($textSpan, $starPos + 1);
540
                $textSpan     = substr($textSpan, 0, $starPos);
541
                $wordStart    = strrpos($textSpan, ' '); // Find the start of the word
542
                if ($wordStart !== false) {
543
                    $leadingText = substr($textSpan, 0, $wordStart + 1);
544
                    $wordText    = substr($textSpan, $wordStart + 1);
545
                } else {
546
                    $leadingText = '';
547
                    $wordText    = $textSpan;
548
                }
549
                $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText;
550
            }
551
            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan);
552
            // The &nbsp; is a work-around for a TCPDF bug eating blanks.
553
            $textSpan = str_replace([
554
                ' <u>',
555
                '</u> ',
556
            ], [
557
                '&nbsp;<u>',
558
                '</u>&nbsp;',
559
            ], $textSpan);
560
        } else {
561
            // Text and page directions differ:  remove the <span> and </span>
562
            $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan);
563
            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan);
564
        }
565
566
        return $textSpan;
567
    }
568
569
    /**
570
     * Get the next character from an input string
571
     *
572
     * @param string $text
573
     * @param int    $offset
574
     *
575
     * @return array{letter:string,length:int}
576
     */
577
    private static function getChar(string $text, int $offset): array
578
    {
579
        if ($text === '') {
580
            return [
581
                'letter' => '',
582
                'length' => 0,
583
            ];
584
        }
585
586
        $char   = substr($text, $offset, 1);
587
        $length = 1;
588
        if ((ord($char) & 0xE0) === 0xC0) {
589
            $length = 2;
590
        }
591
        if ((ord($char) & 0xF0) === 0xE0) {
592
            $length = 3;
593
        }
594
        if ((ord($char) & 0xF8) === 0xF0) {
595
            $length = 4;
596
        }
597
        $letter = substr($text, $offset, $length);
598
599
        return [
600
            'letter' => $letter,
601
            'length' => $length,
602
        ];
603
    }
604
605
    /**
606
     * Insert <br> into current span
607
     *
608
     * @param string $result
609
     *
610
     * @return void
611
     */
612
    private static function breakCurrentSpan(string &$result): void
613
    {
614
        // Interrupt the current span, insert that <br>, and then continue the current span
615
        $result            .= self::$waitingText;
616
        self::$waitingText = '';
617
618
        $breakString = '<' . self::$currentState . 'br>';
619
        $result      .= $breakString;
620
    }
621
622
    /**
623
     * Begin current span
624
     *
625
     * @param string $result
626
     *
627
     * @return void
628
     */
629
    private static function beginCurrentSpan(string &$result): void
630
    {
631
        if (self::$currentState === 'LTR') {
632
            $result .= self::START_LTR;
633
        }
634
        if (self::$currentState === 'RTL') {
635
            $result .= self::START_RTL;
636
        }
637
638
        self::$posSpanStart = strlen($result);
639
    }
640
641
    /**
642
     * Finish current span
643
     *
644
     * @param string $result
645
     * @param bool   $theEnd
646
     *
647
     * @return void
648
     */
649
    private static function finishCurrentSpan(string &$result, bool $theEnd = false): void
650
    {
651
        $textSpan = substr($result, self::$posSpanStart);
652
        $result   = substr($result, 0, self::$posSpanStart);
653
654
        // Get rid of empty spans, so that our check for presence of RTL will work
655
        $result = str_replace([
656
            self::START_LTR . self::END_LTR,
657
            self::START_RTL . self::END_RTL,
658
        ], '', $result);
659
660
        // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers.
661
        $tempResult = '';
662
        while ($textSpan !== '') {
663
            $posColon = strpos($textSpan, ':');
664
            if ($posColon === false) {
665
                break;
666
            } // No more possible time strings
667
            $posLRE = strpos($textSpan, self::UTF8_LRE);
668
            if ($posLRE === false) {
669
                break;
670
            } // No more numeric strings
671
            $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE);
672
            if ($posPDF === false) {
673
                break;
674
            } // No more numeric strings
675
676
            $tempResult    .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string
677
            $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string
678
            $textSpan      = substr($textSpan, $posPDF + 3);
679
            $posColon      = strpos($numericString, ':');
680
            if ($posColon === false) {
681
                // Nothing that looks like a time here
682
                $tempResult .= $numericString;
683
                continue;
684
            }
685
            $posBlank = strpos($numericString . ' ', ' ');
686
            $posNbsp  = strpos($numericString . '&nbsp;', '&nbsp;');
687
            if ($posBlank < $posNbsp) {
688
                $posSeparator    = $posBlank;
689
                $lengthSeparator = 1;
690
            } else {
691
                $posSeparator    = $posNbsp;
692
                $lengthSeparator = 6;
693
            }
694
            if ($posColon > $posSeparator) {
695
                // We have a time string preceded by a blank: Exclude that blank from the numeric string
696
                $tempResult    .= substr($numericString, 0, $posSeparator);
697
                $tempResult    .= self::UTF8_PDF;
698
                $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
699
                $tempResult    .= self::UTF8_LRE;
700
                $numericString = substr($numericString, $posSeparator + $lengthSeparator);
701
            }
702
703
            $posBlank = strpos($numericString, ' ');
704
            $posNbsp  = strpos($numericString, '&nbsp;');
705
            if ($posBlank === false && $posNbsp === false) {
706
                // The time string isn't followed by a blank
707
                $textSpan = $numericString . $textSpan;
708
                continue;
709
            }
710
711
            // We have a time string followed by a blank: Exclude that blank from the numeric string
712
            if ($posBlank === false) {
713
                $posSeparator    = $posNbsp;
714
                $lengthSeparator = 6;
715
            } elseif ($posNbsp === false) {
716
                $posSeparator    = $posBlank;
717
                $lengthSeparator = 1;
718
            } elseif ($posBlank < $posNbsp) {
719
                $posSeparator    = $posBlank;
720
                $lengthSeparator = 1;
721
            } else {
722
                $posSeparator    = $posNbsp;
723
                $lengthSeparator = 6;
724
            }
725
            $tempResult    .= substr($numericString, 0, $posSeparator);
726
            $tempResult    .= self::UTF8_PDF;
727
            $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
728
            $posSeparator  += $lengthSeparator;
729
            $numericString = substr($numericString, $posSeparator);
730
            $textSpan      = self::UTF8_LRE . $numericString . $textSpan;
731
        }
732
        $textSpan       = $tempResult . $textSpan;
733
        $trailingBlanks = '';
734
        $trailingBreaks = '';
735
736
        /* ****************************** LTR text handling ******************************** */
737
738
        if (self::$currentState === 'LTR') {
739
            // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too.
740
            if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) {
741
                $trailingString = '';
742
                $savedSpan      = $textSpan;
743
                while ($textSpan !== '') {
744
                    // Look for trailing spaces and tentatively move them
745
                    if (str_ends_with($textSpan, ' ')) {
746
                        $trailingString = ' ' . $trailingString;
747
                        $textSpan       = substr($textSpan, 0, -1);
748
                        continue;
749
                    }
750
                    if (str_ends_with($textSpan, '&nbsp;')) {
751
                        $trailingString = '&nbsp;' . $trailingString;
752
                        $textSpan       = substr($textSpan, 0, -1);
753
                        continue;
754
                    }
755
                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
756
                        // There is no trailing numeric string
757
                        $textSpan = $savedSpan;
758
                        break;
759
                    }
760
761
                    // We have a numeric string
762
                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
763
                    if ($posStartNumber === false) {
764
                        $posStartNumber = 0;
765
                    }
766
                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
767
                    $textSpan       = substr($textSpan, 0, $posStartNumber);
768
769
                    // Look for more spaces and move them too
770
                    while ($textSpan !== '') {
771
                        if (str_ends_with($textSpan, ' ')) {
772
                            $trailingString = ' ' . $trailingString;
773
                            $textSpan       = substr($textSpan, 0, -1);
774
                            continue;
775
                        }
776
                        if (str_ends_with($textSpan, '&nbsp;')) {
777
                            $trailingString = '&nbsp;' . $trailingString;
778
                            $textSpan       = substr($textSpan, 0, -1);
779
                            continue;
780
                        }
781
                        break;
782
                    }
783
784
                    self::$waitingText = $trailingString . self::$waitingText;
785
                    break;
786
                }
787
            }
788
789
            $savedSpan = $textSpan;
790
            // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span
791
            while ($textSpan !== '') {
792
                if (str_ends_with($textSpan, ' ')) {
793
                    $trailingBlanks = ' ' . $trailingBlanks;
794
                    $textSpan       = substr($textSpan, 0, -1);
795
                    continue;
796
                }
797
                if (str_ends_with('......' . $textSpan, '&nbsp;')) {
798
                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
799
                    $textSpan       = substr($textSpan, 0, -6);
800
                    continue;
801
                }
802
                break;
803
            }
804
            while (str_ends_with($textSpan, '<LTRbr>')) {
805
                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
806
                $textSpan       = substr($textSpan, 0, -7);
807
            }
808
            if ($trailingBreaks !== '') {
809
                while ($textSpan !== '') {
810
                    if (str_ends_with($textSpan, ' ')) {
811
                        $trailingBreaks = ' ' . $trailingBreaks;
812
                        $textSpan       = substr($textSpan, 0, -1);
813
                        continue;
814
                    }
815
                    if (str_ends_with($textSpan, '&nbsp;')) {
816
                        $trailingBreaks = '&nbsp;' . $trailingBreaks;
817
                        $textSpan       = substr($textSpan, 0, -6);
818
                        continue;
819
                    }
820
                    break;
821
                }
822
                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
823
            } else {
824
                $textSpan = $savedSpan;
825
            }
826
827
            $trailingBlanks      = '';
828
            $trailingPunctuation = '';
829
            $trailingID          = '';
830
            $trailingSeparator   = '';
831
            $leadingSeparator    = '';
832
833
            while (I18N::direction() === 'rtl') {
834
                if (str_contains($result, self::START_RTL)) {
835
                    // Remove trailing blanks for inclusion in a separate LTR span
836
                    while ($textSpan !== '') {
837
                        if (str_ends_with($textSpan, ' ')) {
838
                            $trailingBlanks = ' ' . $trailingBlanks;
839
                            $textSpan       = substr($textSpan, 0, -1);
840
                            continue;
841
                        }
842
                        if (str_ends_with($textSpan, '&nbsp;')) {
843
                            $trailingBlanks = '&nbsp;' . $trailingBlanks;
844
                            $textSpan       = substr($textSpan, 0, -1);
845
                            continue;
846
                        }
847
                        break;
848
                    }
849
850
                    // Remove trailing punctuation for inclusion in a separate LTR span
851
                    if ($textSpan === '') {
852
                        $trailingChar = "\n";
853
                    } else {
854
                        $trailingChar = substr($textSpan, -1);
855
                    }
856
                    if (str_contains(self::PUNCTUATION, $trailingChar)) {
857
                        $trailingPunctuation = $trailingChar;
858
                        $textSpan            = substr($textSpan, 0, -1);
859
                    }
860
                }
861
862
                // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span
863
                while (true) {
864
                    if (!str_ends_with($textSpan, ')')) {
865
                        break;
866
                    } // There is no trailing ')'
867
                    $posLeftParen = strrpos($textSpan, '(');
868
                    if ($posLeftParen === false) {
869
                        break;
870
                    } // There is no leading '('
871
                    $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes
872
873
                    // If the parenthesized text doesn't look like an ID number,
874
                    // we don't want to touch it.
875
                    // This check won’t work if somebody uses ID numbers with an unusual format.
876
                    $offset    = 1;
877
                    $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text
878
                    if (str_contains(self::NUMBERS, $charArray['letter'])) {
879
                        break;
880
                    }
881
                    $offset += $charArray['length']; // Point at 2nd character of parenthesized text
882
                    if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) {
883
                        break;
884
                    }
885
                    // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too
886
                    if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) {
887
                        break;
888
                    }
889
890
                    $trailingID = substr($textSpan, $posLeftParen);
891
                    $textSpan   = substr($textSpan, 0, $posLeftParen);
892
                    break;
893
                }
894
895
                // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span
896
                if ($trailingID !== '') {
897
                    while ($textSpan !== '') {
898
                        if (str_ends_with($textSpan, ' ')) {
899
                            $trailingSeparator = ' ' . $trailingSeparator;
900
                            $textSpan          = substr($textSpan, 0, -1);
901
                            continue;
902
                        }
903
                        if (str_ends_with($textSpan, '&nbsp;')) {
904
                            $trailingSeparator = '&nbsp;' . $trailingSeparator;
905
                            $textSpan          = substr($textSpan, 0, -6);
906
                            continue;
907
                        }
908
                        if (str_ends_with($textSpan, '-')) {
909
                            $trailingSeparator = '-' . $trailingSeparator;
910
                            $textSpan          = substr($textSpan, 0, -1);
911
                            continue;
912
                        }
913
                        break;
914
                    }
915
                }
916
917
                // Look for " - " preceding the text and remove it for inclusion in a separate LTR span
918
                $foundSeparator = false;
919
                $savedSpan      = $textSpan;
920
                while ($textSpan !== '') {
921
                    if (str_starts_with($textSpan, ' ')) {
922
                        $leadingSeparator = ' ' . $leadingSeparator;
923
                        $textSpan         = substr($textSpan, 1);
924
                        continue;
925
                    }
926
                    if (str_starts_with($textSpan, '&nbsp;')) {
927
                        $leadingSeparator = '&nbsp;' . $leadingSeparator;
928
                        $textSpan         = substr($textSpan, 6);
929
                        continue;
930
                    }
931
                    if (str_starts_with($textSpan, '-')) {
932
                        $leadingSeparator = '-' . $leadingSeparator;
933
                        $textSpan         = substr($textSpan, 1);
934
                        $foundSeparator   = true;
935
                        continue;
936
                    }
937
                    break;
938
                }
939
                if (!$foundSeparator) {
940
                    $textSpan         = $savedSpan;
941
                    $leadingSeparator = '';
942
                }
943
                break;
944
            }
945
946
            // We're done: finish the span
947
            $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags
948
            while (true) {
949
                // Remove blanks that precede <LTRbr>
950
                if (str_contains($textSpan, ' <LTRbr>')) {
951
                    $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan);
952
                    continue;
953
                }
954
                if (str_contains($textSpan, '&nbsp;<LTRbr>')) {
955
                    $textSpan = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $textSpan);
956
                    continue;
957
                }
958
                break;
959
            }
960
            if ($leadingSeparator !== '') {
961
                $result .= self::START_LTR . $leadingSeparator . self::END_LTR;
962
            }
963
            $result .= $textSpan . self::END_LTR;
964
            if ($trailingSeparator !== '') {
965
                $result .= self::START_LTR . $trailingSeparator . self::END_LTR;
966
            }
967
            if ($trailingID !== '') {
968
                $result .= self::START_LTR . $trailingID . self::END_LTR;
969
            }
970
            if ($trailingPunctuation !== '') {
971
                $result .= self::START_LTR . $trailingPunctuation . self::END_LTR;
972
            }
973
            if ($trailingBlanks !== '') {
974
                $result .= self::START_LTR . $trailingBlanks . self::END_LTR;
975
            }
976
        }
977
978
        /* ****************************** RTL text handling ******************************** */
979
980
        if (self::$currentState === 'RTL') {
981
            $savedSpan = $textSpan;
982
983
            // Move any trailing <br>, optionally followed by blanks, outside this RTL span
984
            while ($textSpan !== '') {
985
                if (str_ends_with($textSpan, ' ')) {
986
                    $trailingBlanks = ' ' . $trailingBlanks;
987
                    $textSpan       = substr($textSpan, 0, -1);
988
                    continue;
989
                }
990
                if (str_ends_with('......' . $textSpan, '&nbsp;')) {
991
                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
992
                    $textSpan       = substr($textSpan, 0, -6);
993
                    continue;
994
                }
995
                break;
996
            }
997
            while (str_ends_with($textSpan, '<RTLbr>')) {
998
                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
999
                $textSpan       = substr($textSpan, 0, -7);
1000
            }
1001
            if ($trailingBreaks !== '') {
1002
                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
1003
            } else {
1004
                $textSpan = $savedSpan;
1005
            }
1006
1007
            // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too.
1008
            if (!$theEnd && I18N::direction() !== 'rtl') {
1009
                $trailingString = '';
1010
                $savedSpan      = $textSpan;
1011
                while ($textSpan !== '') {
1012
                    // Look for trailing spaces and tentatively move them
1013
                    if (str_ends_with($textSpan, ' ')) {
1014
                        $trailingString = ' ' . $trailingString;
1015
                        $textSpan       = substr($textSpan, 0, -1);
1016
                        continue;
1017
                    }
1018
                    if (str_ends_with($textSpan, '&nbsp;')) {
1019
                        $trailingString = '&nbsp;' . $trailingString;
1020
                        $textSpan       = substr($textSpan, 0, -1);
1021
                        continue;
1022
                    }
1023
                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
1024
                        // There is no trailing numeric string
1025
                        $textSpan = $savedSpan;
1026
                        break;
1027
                    }
1028
1029
                    // We have a numeric string
1030
                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
1031
                    if ($posStartNumber === false) {
1032
                        $posStartNumber = 0;
1033
                    }
1034
                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
1035
                    $textSpan       = substr($textSpan, 0, $posStartNumber);
1036
1037
                    // Look for more spaces and move them too
1038
                    while ($textSpan !== '') {
1039
                        if (str_ends_with($textSpan, ' ')) {
1040
                            $trailingString = ' ' . $trailingString;
1041
                            $textSpan       = substr($textSpan, 0, -1);
1042
                            continue;
1043
                        }
1044
                        if (str_ends_with($textSpan, '&nbsp;')) {
1045
                            $trailingString = '&nbsp;' . $trailingString;
1046
                            $textSpan       = substr($textSpan, 0, -1);
1047
                            continue;
1048
                        }
1049
                        break;
1050
                    }
1051
1052
                    self::$waitingText = $trailingString . self::$waitingText;
1053
                    break;
1054
                }
1055
            }
1056
1057
            // Trailing " - " needs to be prefixed to the following span
1058
            if (!$theEnd && str_ends_with('...' . $textSpan, ' - ')) {
1059
                $textSpan          = substr($textSpan, 0, -3);
1060
                self::$waitingText = ' - ' . self::$waitingText;
1061
            }
1062
1063
            while (I18N::direction() === 'rtl') {
1064
                // Look for " - " preceding <RTLbr> and relocate it to the front of the string
1065
                $posDashString = strpos($textSpan, ' - <RTLbr>');
1066
                if ($posDashString === false) {
1067
                    break;
1068
                }
1069
                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1070
                if ($posStringStart === false) {
1071
                    $posStringStart = 0;
1072
                } else {
1073
                    $posStringStart += 9;
1074
                } // Point to the first char following the last <RTLbr>
1075
1076
                $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3);
1077
            }
1078
1079
            // Strip leading spaces from the RTL text
1080
            $countLeadingSpaces = 0;
1081
            while ($textSpan !== '') {
1082
                if (str_starts_with($textSpan, ' ')) {
1083
                    $countLeadingSpaces++;
1084
                    $textSpan = substr($textSpan, 1);
1085
                    continue;
1086
                }
1087
                if (str_starts_with($textSpan, '&nbsp;')) {
1088
                    $countLeadingSpaces++;
1089
                    $textSpan = substr($textSpan, 6);
1090
                    continue;
1091
                }
1092
                break;
1093
            }
1094
1095
            // Strip trailing spaces from the RTL text
1096
            $countTrailingSpaces = 0;
1097
            while ($textSpan !== '') {
1098
                if (str_ends_with($textSpan, ' ')) {
1099
                    $countTrailingSpaces++;
1100
                    $textSpan = substr($textSpan, 0, -1);
1101
                    continue;
1102
                }
1103
                if (str_ends_with($textSpan, '&nbsp;')) {
1104
                    $countTrailingSpaces++;
1105
                    $textSpan = substr($textSpan, 0, -6);
1106
                    continue;
1107
                }
1108
                break;
1109
            }
1110
1111
            // Look for trailing " -", reverse it, and relocate it to the front of the string
1112
            if (str_ends_with($textSpan, ' -')) {
1113
                $posDashString  = strlen($textSpan) - 2;
1114
                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1115
                if ($posStringStart === false) {
1116
                    $posStringStart = 0;
1117
                } else {
1118
                    $posStringStart += 9;
1119
                } // Point to the first char following the last <RTLbr>
1120
1121
                $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2);
1122
            }
1123
1124
            if ($countLeadingSpaces !== 0) {
1125
                $newLength = strlen($textSpan) + $countLeadingSpaces;
1126
                $textSpan  = str_pad($textSpan, $newLength, ' ', I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT);
1127
            }
1128
            if ($countTrailingSpaces !== 0) {
1129
                if (I18N::direction() === 'ltr') {
1130
                    if ($trailingBreaks === '') {
1131
                        // Move trailing RTL spaces to front of following LTR span
1132
                        $newLength         = strlen(self::$waitingText) + $countTrailingSpaces;
1133
                        self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT);
1134
                    }
1135
                } else {
1136
                    $newLength = strlen($textSpan) + $countTrailingSpaces;
1137
                    $textSpan  = str_pad($textSpan, $newLength);
1138
                }
1139
            }
1140
1141
            // We're done: finish the span
1142
            $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags
1143
            $result   .= $textSpan . self::END_RTL;
1144
        }
1145
1146
        if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') {
1147
            $result .= $textSpan;
1148
        }
1149
1150
        $result .= $trailingBreaks; // Get rid of any waiting <br>
1151
    }
1152
}
1153