FunctionsRtl::spanLtrRtl()   F
last analyzed

Complexity

Conditions 82
Paths > 20000

Size

Total Lines 401
Code Lines 257

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 82
eloc 257
nc 22632961
nop 1
dl 0
loc 401
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2022 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Functions;
21
22
use Fisharebest\Webtrees\I18N;
23
24
use function str_contains;
25
26
/**
27
 * RTL Functions for use in the PDF/HTML reports
28
 */
29
class FunctionsRtl
30
{
31
    private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark:  zero-width character with LTR directionality)
32
    private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark:  zero-width character with RTL directionality)
33
    private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode)
34
    private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode)
35
    private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text)
36
    private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text)
37
    private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE)
38
39
    private const OPEN_PARENTHESES = '([{';
40
41
    private const CLOSE_PARENTHESES = ')]}';
42
43
    private const NUMBERS = '0123456789';
44
45
    private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings
46
47
    private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings
48
49
    private const PUNCTUATION = ',.:;?!';
50
51
    // Markup
52
    private const START_LTR    = '<LTR>';
53
    private const END_LTR      = '</LTR>';
54
    private const START_RTL    = '<RTL>';
55
    private const END_RTL      = '</RTL>';
56
    private const LENGTH_START = 5;
57
    private const LENGTH_END   = 6;
58
59
    /** @var string Were we previously processing LTR or RTL. */
60
    private static $previousState;
61
62
    /** @var string Are we currently processing LTR or RTL. */
63
    private static $currentState;
64
65
    /** @var string Text waiting to be processed. */
66
    private static $waitingText;
67
68
    /** @var int Offset into the text. */
69
    private static $posSpanStart;
70
71
    /**
72
     * This function strips &lrm; and &rlm; from the input string. It should be used for all
73
     * text that has been passed through the PrintReady() function before that text is stored
74
     * in the database. The database should NEVER contain these characters.
75
     *
76
     * @param  string $inputText The string from which the &lrm; and &rlm; characters should be stripped
77
     *
78
     * @return string The input string, with &lrm; and &rlm; stripped
79
     */
80
    public static function stripLrmRlm(string $inputText): string
81
    {
82
        return str_replace([
83
            self::UTF8_LRM,
84
            self::UTF8_RLM,
85
            self::UTF8_LRO,
86
            self::UTF8_RLO,
87
            self::UTF8_LRE,
88
            self::UTF8_RLE,
89
            self::UTF8_PDF,
90
            '&lrm;',
91
            '&rlm;',
92
            '&LRM;',
93
            '&RLM;',
94
        ], '', $inputText);
95
    }
96
97
    /**
98
     * This function encapsulates all texts in the input with <span dir='xxx'> and </span>
99
     * according to the directionality specified.
100
     *
101
     * @param string $inputText Raw input
102
     *
103
     * @return string The string with all texts encapsulated as required
104
     */
105
    public static function spanLtrRtl(string $inputText): string
106
    {
107
        if ($inputText === '') {
108
            // Nothing to do
109
            return '';
110
        }
111
112
        $workingText = str_replace("\n", '<br>', $inputText);
113
        $workingText = str_replace([
114
            '<span class="starredname"><br>',
115
            '<span<br>class="starredname">',
116
        ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks
117
        $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes
118
119
        self::$previousState = '';
120
        self::$currentState  = strtoupper(I18N::direction());
121
        $numberState         = false; // Set when we're inside a numeric string
122
        $result              = '';
123
        self::$waitingText   = '';
124
        $openParDirection    = [];
125
126
        self::beginCurrentSpan($result);
127
128
        while ($workingText !== '') {
129
            $charArray     = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character
130
            $currentLetter = $charArray['letter'];
131
            $currentLen    = $charArray['length'];
132
133
            $openParIndex  = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this?
134
            $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this?
135
136
            switch ($currentLetter) {
137
                case '<':
138
                    // Assume this '<' starts an HTML element
139
                    $endPos = strpos($workingText, '>'); // look for the terminating '>'
140
                    if ($endPos === false) {
141
                        $endPos = 0;
142
                    }
143
                    $currentLen += $endPos;
144
                    $element    = substr($workingText, 0, $currentLen);
145
                    $temp       = strtolower(substr($element, 0, 3));
146
                    if (strlen($element) < 7 && $temp === '<br') {
147
                        if ($numberState) {
148
                            $numberState = false;
149
                            if (self::$currentState === 'RTL') {
150
                                self::$waitingText .= self::UTF8_PDF;
151
                            }
152
                        }
153
                        self::breakCurrentSpan($result);
154
                    } elseif (self::$waitingText === '') {
155
                        $result .= $element;
156
                    } else {
157
                        self::$waitingText .= $element;
158
                    }
159
                    $workingText = substr($workingText, $currentLen);
160
                    break;
161
                case '&':
162
                    // Assume this '&' starts an HTML entity
163
                    $endPos = strpos($workingText, ';'); // look for the terminating ';'
164
                    if ($endPos === false) {
165
                        $endPos = 0;
166
                    }
167
                    $currentLen += $endPos;
168
                    $entity     = substr($workingText, 0, $currentLen);
169
                    if (strtolower($entity) === '&nbsp;') {
170
                        $entity .= '&nbsp;'; // Ensure consistent case for this entity
171
                    }
172
                    if (self::$waitingText === '') {
173
                        $result .= $entity;
174
                    } else {
175
                        self::$waitingText .= $entity;
176
                    }
177
                    $workingText = substr($workingText, $currentLen);
178
                    break;
179
                case '{':
180
                    if (substr($workingText, 1, 1) === '{') {
181
                        // Assume this '{{' starts a TCPDF directive
182
                        $endPos = strpos($workingText, '}}'); // look for the terminating '}}'
183
                        if ($endPos === false) {
184
                            $endPos = 0;
185
                        }
186
                        $currentLen        = $endPos + 2;
187
                        $directive         = substr($workingText, 0, $currentLen);
188
                        $workingText       = substr($workingText, $currentLen);
189
                        $result            .= self::$waitingText . $directive;
190
                        self::$waitingText = '';
191
                        break;
192
                    }
193
                    // no break
194
                default:
195
                    // Look for strings of numbers with optional leading or trailing + or -
196
                    // and with optional embedded numeric punctuation
197
                    if ($numberState) {
198
                        // If we're inside a numeric string, look for reasons to end it
199
                        $offset    = 0; // Be sure to look at the current character first
200
                        $charArray = self::getChar($workingText . "\n", $offset);
201
                        if (!str_contains(self::NUMBERS, $charArray['letter'])) {
202
                            // This is not a digit. Is it numeric punctuation?
203
                            if (substr($workingText . "\n", $offset, 6) === '&nbsp;') {
204
                                $offset += 6; // This could be numeric punctuation
205
                            } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) {
206
                                $offset += $charArray['length']; // This could be numeric punctuation
207
                            }
208
                            // If the next character is a digit, the current character is numeric punctuation
209
                            $charArray = self::getChar($workingText . "\n", $offset);
210
                            if (!str_contains(self::NUMBERS, $charArray['letter'])) {
211
                                // This is not a digit. End the run of digits and punctuation.
212
                                $numberState = false;
213
                                if (self::$currentState === 'RTL') {
214
                                    if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) {
215
                                        $currentLetter = self::UTF8_PDF . $currentLetter;
216
                                    } else {
217
                                        $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run
218
                                    }
219
                                }
220
                            }
221
                        }
222
                    } else {
223
                        // If we're outside a numeric string, look for reasons to start it
224
                        if (str_contains(self::NUMBER_PREFIX, $currentLetter)) {
225
                            // This might be a number lead-in
226
                            $offset   = $currentLen;
227
                            $nextChar = substr($workingText . "\n", $offset, 1);
228
                            if (str_contains(self::NUMBERS, $nextChar)) {
229
                                $numberState = true; // We found a digit: the lead-in is therefore numeric
230
                                if (self::$currentState === 'RTL') {
231
                                    $currentLetter = self::UTF8_LRE . $currentLetter;
232
                                }
233
                            }
234
                        } elseif (str_contains(self::NUMBERS, $currentLetter)) {
235
                            $numberState = true; // The current letter is a digit
236
                            if (self::$currentState === 'RTL') {
237
                                $currentLetter = self::UTF8_LRE . $currentLetter;
238
                            }
239
                        }
240
                    }
241
242
                    // Determine the directionality of the current UTF-8 character
243
                    $newState = self::$currentState;
244
245
                    while (true) {
246
                        if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') {
247
                            if (self::$currentState === '') {
248
                                $newState = 'RTL';
249
                                break;
250
                            }
251
252
                            if (self::$currentState === 'RTL') {
253
                                break;
254
                            }
255
                            // Switch to RTL only if this isn't a solitary RTL letter
256
                            $tempText = substr($workingText, $currentLen);
257
                            while ($tempText !== '') {
258
                                $nextCharArray = self::getChar($tempText, 0);
259
                                $nextLetter    = $nextCharArray['letter'];
260
                                $nextLen       = $nextCharArray['length'];
261
                                $tempText      = substr($tempText, $nextLen);
262
263
                                if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') {
264
                                    $newState = 'RTL';
265
                                    break 2;
266
                                }
267
268
                                if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) {
269
                                    $newState = 'RTL';
270
                                    break 2;
271
                                }
272
273
                                if ($nextLetter === ' ') {
274
                                    break;
275
                                }
276
                                $nextLetter .= substr($tempText . "\n", 0, 5);
277
                                if ($nextLetter === '&nbsp;') {
278
                                    break;
279
                                }
280
                            }
281
                            // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality
282
                            $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF;
283
                            $newState      = 'LTR';
284
                            break;
285
                        }
286
                        if (($currentLen !== 1) || ($currentLetter >= 'A' && $currentLetter <= 'Z') || ($currentLetter >= 'a' && $currentLetter <= 'z')) {
287
                            // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR
288
                            $newState = 'LTR';
289
                            break;
290
                        }
291
                        if ($closeParIndex !== false) {
292
                            // This closing parenthesis has to inherit the matching opening parenthesis' directionality
293
                            if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') {
294
                                $newState = $openParDirection[$closeParIndex];
295
                            }
296
                            $openParDirection[$closeParIndex] = '';
297
                            break;
298
                        }
299
                        if ($openParIndex !== false) {
300
                            // Opening parentheses always inherit the following directionality
301
                            self::$waitingText .= $currentLetter;
302
                            $workingText = substr($workingText, $currentLen);
303
                            while (true) {
304
                                if ($workingText === '') {
305
                                    break;
306
                                }
307
                                if (substr($workingText, 0, 1) === ' ') {
308
                                    // Spaces following this left parenthesis inherit the following directionality too
309
                                    self::$waitingText .= ' ';
310
                                    $workingText = substr($workingText, 1);
311
                                    continue;
312
                                }
313
                                if (substr($workingText, 0, 6) === '&nbsp;') {
314
                                    // Spaces following this left parenthesis inherit the following directionality too
315
                                    self::$waitingText .= '&nbsp;';
316
                                    $workingText = substr($workingText, 6);
317
                                    continue;
318
                                }
319
                                break;
320
                            }
321
                            $openParDirection[$openParIndex] = '?';
322
                            break 2; // double break because we're waiting for more information
323
                        }
324
325
                        // We have a digit or a "normal" special character.
326
                        //
327
                        // When this character is not at the start of the input string, it inherits the preceding directionality;
328
                        // at the start of the input string, it assumes the following directionality.
329
                        //
330
                        // Exceptions to this rule will be handled later during final clean-up.
331
                        //
332
                        self::$waitingText .= $currentLetter;
333
                        $workingText = substr($workingText, $currentLen);
334
                        if (self::$currentState !== '') {
335
                            $result .= self::$waitingText;
336
                            self::$waitingText = '';
337
                        }
338
                        break 2; // double break because we're waiting for more information
339
                    }
340
                    if ($newState !== self::$currentState) {
341
                        // A direction change has occurred
342
                        self::finishCurrentSpan($result);
343
                        self::$previousState = self::$currentState;
344
                        self::$currentState  = $newState;
345
                        self::beginCurrentSpan($result);
346
                    }
347
                    self::$waitingText .= $currentLetter;
348
                    $workingText       = substr($workingText, $currentLen);
349
                    $result            .= self::$waitingText;
350
                    self::$waitingText = '';
351
352
                    foreach ($openParDirection as $index => $value) {
353
                        // Since we now know the proper direction, remember it for all waiting opening parentheses
354
                        if ($value === '?') {
355
                            $openParDirection[$index] = self::$currentState;
356
                        }
357
                    }
358
359
                    break;
360
            }
361
        }
362
363
        // We're done. Finish last <span> if necessary
364
        if ($numberState) {
365
            if (self::$waitingText === '') {
366
                if (self::$currentState === 'RTL') {
367
                    $result .= self::UTF8_PDF;
368
                }
369
            } else {
370
                if (self::$currentState === 'RTL') {
371
                    self::$waitingText .= self::UTF8_PDF;
372
                }
373
            }
374
        }
375
        self::finishCurrentSpan($result, true);
376
377
        // Get rid of any waiting text
378
        if (self::$waitingText !== '') {
379
            if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') {
380
                $result .= self::START_RTL;
381
                $result .= self::$waitingText;
382
                $result .= self::END_RTL;
383
            } else {
384
                $result .= self::START_LTR;
385
                $result .= self::$waitingText;
386
                $result .= self::END_LTR;
387
            }
388
            self::$waitingText = '';
389
        }
390
391
        // Lastly, do some more cleanups
392
393
        // Move leading RTL numeric strings to following LTR text
394
        // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text)
395
        while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) {
396
            $spanEnd = strpos($result, self::END_RTL . self::START_LTR);
397
            if ($spanEnd === false) {
398
                break;
399
            }
400
            $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3));
401
            if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') {
402
                break;
403
            }
404
            $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END);
405
            break;
406
        }
407
408
        // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span
409
        if (I18N::direction() === 'rtl') {
410
            $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result);
411
        }
412
413
        // Trim trailing blanks preceding <br> in LTR text
414
        while (self::$previousState !== 'RTL') {
415
            if (str_contains($result, ' <LTRbr>')) {
416
                $result = str_replace(' <LTRbr>', '<LTRbr>', $result);
417
                continue;
418
            }
419
            if (str_contains($result, '&nbsp;<LTRbr>')) {
420
                $result = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $result);
421
                continue;
422
            }
423
            if (str_contains($result, ' <br>')) {
424
                $result = str_replace(' <br>', '<br>', $result);
425
                continue;
426
            }
427
            if (str_contains($result, '&nbsp;<br>')) {
428
                $result = str_replace('&nbsp;<br>', '<br>', $result);
429
                continue;
430
            }
431
            break; // Neither space nor &nbsp; : we're done
432
        }
433
434
        // Trim trailing blanks preceding <br> in RTL text
435
        while (true) {
436
            if (str_contains($result, ' <RTLbr>')) {
437
                $result = str_replace(' <RTLbr>', '<RTLbr>', $result);
438
                continue;
439
            }
440
            if (str_contains($result, '&nbsp;<RTLbr>')) {
441
                $result = str_replace('&nbsp;<RTLbr>', '<RTLbr>', $result);
442
                continue;
443
            }
444
            break; // Neither space nor &nbsp; : we're done
445
        }
446
447
        // Convert '<LTRbr>' and '<RTLbr'
448
        $result = str_replace([
449
            '<LTRbr>',
450
            '<RTLbr>',
451
        ], [
452
            self::END_LTR . '<br>' . self::START_LTR,
453
            self::END_RTL . '<br>' . self::START_RTL,
454
        ], $result);
455
456
        // Include leading indeterminate directional text in whatever follows
457
        if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && substr($result . "\n", 0, 4) !== '<br>') {
458
            $leadingText = '';
459
            while (true) {
460
                if ($result === '') {
461
                    $result = $leadingText;
462
                    break;
463
                }
464
                if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) {
465
                    $leadingText .= substr($result, 0, 1);
466
                    $result = substr($result, 1);
467
                    continue;
468
                }
469
                $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START);
470
                break;
471
            }
472
        }
473
474
        // Include solitary "-" and "+" in surrounding RTL text
475
        $result = str_replace([
476
            self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL,
477
            self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL,
478
        ], [
479
            '-',
480
            '+',
481
        ], $result);
482
483
        // Remove empty spans
484
        $result = str_replace([
485
            self::START_LTR . self::END_LTR,
486
            self::START_RTL . self::END_RTL,
487
        ], '', $result);
488
489
        // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>'
490
        // LTR text: <span dir="ltr"> text </span>
491
        // RTL text: <span dir="rtl"> text </span>
492
493
        $result = str_replace([
494
            self::START_LTR,
495
            self::END_LTR,
496
            self::START_RTL,
497
            self::END_RTL,
498
        ], [
499
            '<span dir="ltr">',
500
            '</span>',
501
            '<span dir="rtl">',
502
            '</span>',
503
        ], $result);
504
505
        return $result;
506
    }
507
508
    /**
509
     * Wrap words that have an asterisk suffix in <u> and </u> tags.
510
     * This should underline starred names to show the preferred name.
511
     *
512
     * @param string $textSpan
513
     * @param string $direction
514
     *
515
     * @return string
516
     */
517
    public static function starredName(string $textSpan, string $direction): string
518
    {
519
        // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags
520
        // only when page and span directions are identical.
521
        if ($direction === strtoupper(I18N::direction())) {
522
            while (true) {
523
                $starPos = strpos($textSpan, '*');
524
                if ($starPos === false) {
525
                    break;
526
                }
527
                $trailingText = substr($textSpan, $starPos + 1);
528
                $textSpan     = substr($textSpan, 0, $starPos);
529
                $wordStart    = strrpos($textSpan, ' '); // Find the start of the word
530
                if ($wordStart !== false) {
531
                    $leadingText = substr($textSpan, 0, $wordStart + 1);
532
                    $wordText    = substr($textSpan, $wordStart + 1);
533
                } else {
534
                    $leadingText = '';
535
                    $wordText    = $textSpan;
536
                }
537
                $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText;
538
            }
539
            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan);
540
            // The &nbsp; is a work-around for a TCPDF bug eating blanks.
541
            $textSpan = str_replace([
542
                ' <u>',
543
                '</u> ',
544
            ], [
545
                '&nbsp;<u>',
546
                '</u>&nbsp;',
547
            ], $textSpan);
548
        } else {
549
            // Text and page directions differ:  remove the <span> and </span>
550
            $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan);
551
            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan);
552
        }
553
554
        return $textSpan;
555
    }
556
557
    /**
558
     * Get the next character from an input string
559
     *
560
     * @param string $text
561
     * @param int    $offset
562
     *
563
     * @return array{'letter':string,'length':int}
564
     */
565
    public static function getChar(string $text, int $offset): array
566
    {
567
        if ($text === '') {
568
            return [
569
                'letter' => '',
570
                'length' => 0,
571
            ];
572
        }
573
574
        $char   = substr($text, $offset, 1);
575
        $length = 1;
576
        if ((ord($char) & 0xE0) === 0xC0) {
577
            $length = 2;
578
        }
579
        if ((ord($char) & 0xF0) === 0xE0) {
580
            $length = 3;
581
        }
582
        if ((ord($char) & 0xF8) === 0xF0) {
583
            $length = 4;
584
        }
585
        $letter = substr($text, $offset, $length);
586
587
        return [
588
            'letter' => $letter,
589
            'length' => $length,
590
        ];
591
    }
592
593
    /**
594
     * Insert <br> into current span
595
     *
596
     * @param string $result
597
     *
598
     * @return void
599
     */
600
    public static function breakCurrentSpan(string &$result): void
601
    {
602
        // Interrupt the current span, insert that <br>, and then continue the current span
603
        $result .= self::$waitingText;
604
        self::$waitingText = '';
605
606
        $breakString = '<' . self::$currentState . 'br>';
607
        $result .= $breakString;
608
    }
609
610
    /**
611
     * Begin current span
612
     *
613
     * @param string $result
614
     *
615
     * @return void
616
     */
617
    public static function beginCurrentSpan(string &$result): void
618
    {
619
        if (self::$currentState === 'LTR') {
620
            $result .= self::START_LTR;
621
        }
622
        if (self::$currentState === 'RTL') {
623
            $result .= self::START_RTL;
624
        }
625
626
        self::$posSpanStart = strlen($result);
627
    }
628
629
    /**
630
     * Finish current span
631
     *
632
     * @param string $result
633
     * @param bool   $theEnd
634
     *
635
     * @return void
636
     */
637
    public static function finishCurrentSpan(string &$result, bool $theEnd = false): void
638
    {
639
        $textSpan = substr($result, self::$posSpanStart);
640
        $result   = substr($result, 0, self::$posSpanStart);
641
642
        // Get rid of empty spans, so that our check for presence of RTL will work
643
        $result = str_replace([
644
            self::START_LTR . self::END_LTR,
645
            self::START_RTL . self::END_RTL,
646
        ], '', $result);
647
648
        // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers.
649
        $tempResult = '';
650
        while ($textSpan !== '') {
651
            $posColon = strpos($textSpan, ':');
652
            if ($posColon === false) {
653
                break;
654
            } // No more possible time strings
655
            $posLRE = strpos($textSpan, self::UTF8_LRE);
656
            if ($posLRE === false) {
657
                break;
658
            } // No more numeric strings
659
            $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE);
660
            if ($posPDF === false) {
661
                break;
662
            } // No more numeric strings
663
664
            $tempResult .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string
665
            $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string
666
            $textSpan      = substr($textSpan, $posPDF + 3);
667
            $posColon      = strpos($numericString, ':');
668
            if ($posColon === false) {
669
                // Nothing that looks like a time here
670
                $tempResult .= $numericString;
671
                continue;
672
            }
673
            $posBlank = strpos($numericString . ' ', ' ');
674
            $posNbsp  = strpos($numericString . '&nbsp;', '&nbsp;');
675
            if ($posBlank < $posNbsp) {
676
                $posSeparator    = $posBlank;
677
                $lengthSeparator = 1;
678
            } else {
679
                $posSeparator    = $posNbsp;
680
                $lengthSeparator = 6;
681
            }
682
            if ($posColon > $posSeparator) {
683
                // We have a time string preceded by a blank: Exclude that blank from the numeric string
684
                $tempResult    .= substr($numericString, 0, $posSeparator);
685
                $tempResult    .= self::UTF8_PDF;
686
                $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
687
                $tempResult    .= self::UTF8_LRE;
688
                $numericString = substr($numericString, $posSeparator + $lengthSeparator);
689
            }
690
691
            $posBlank = strpos($numericString, ' ');
692
            $posNbsp  = strpos($numericString, '&nbsp;');
693
            if ($posBlank === false && $posNbsp === false) {
694
                // The time string isn't followed by a blank
695
                $textSpan = $numericString . $textSpan;
696
                continue;
697
            }
698
699
            // We have a time string followed by a blank: Exclude that blank from the numeric string
700
            if ($posBlank === false) {
701
                $posSeparator    = $posNbsp;
702
                $lengthSeparator = 6;
703
            } elseif ($posNbsp === false) {
704
                $posSeparator    = $posBlank;
705
                $lengthSeparator = 1;
706
            } elseif ($posBlank < $posNbsp) {
707
                $posSeparator    = $posBlank;
708
                $lengthSeparator = 1;
709
            } else {
710
                $posSeparator    = $posNbsp;
711
                $lengthSeparator = 6;
712
            }
713
            $tempResult    .= substr($numericString, 0, $posSeparator);
714
            $tempResult    .= self::UTF8_PDF;
715
            $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
716
            $posSeparator  += $lengthSeparator;
717
            $numericString = substr($numericString, $posSeparator);
718
            $textSpan      = self::UTF8_LRE . $numericString . $textSpan;
719
        }
720
        $textSpan       = $tempResult . $textSpan;
721
        $trailingBlanks = '';
722
        $trailingBreaks = '';
723
724
        /* ****************************** LTR text handling ******************************** */
725
726
        if (self::$currentState === 'LTR') {
727
            // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too.
728
            if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) {
729
                $trailingString = '';
730
                $savedSpan      = $textSpan;
731
                while ($textSpan !== '') {
732
                    // Look for trailing spaces and tentatively move them
733
                    if (substr($textSpan, -1) === ' ') {
734
                        $trailingString = ' ' . $trailingString;
735
                        $textSpan       = substr($textSpan, 0, -1);
736
                        continue;
737
                    }
738
                    if (substr($textSpan, -6) === '&nbsp;') {
739
                        $trailingString = '&nbsp;' . $trailingString;
740
                        $textSpan       = substr($textSpan, 0, -1);
741
                        continue;
742
                    }
743
                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
744
                        // There is no trailing numeric string
745
                        $textSpan = $savedSpan;
746
                        break;
747
                    }
748
749
                    // We have a numeric string
750
                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
751
                    if ($posStartNumber === false) {
752
                        $posStartNumber = 0;
753
                    }
754
                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
755
                    $textSpan       = substr($textSpan, 0, $posStartNumber);
756
757
                    // Look for more spaces and move them too
758
                    while ($textSpan !== '') {
759
                        if (substr($textSpan, -1) === ' ') {
760
                            $trailingString = ' ' . $trailingString;
761
                            $textSpan       = substr($textSpan, 0, -1);
762
                            continue;
763
                        }
764
                        if (substr($textSpan, -6) === '&nbsp;') {
765
                            $trailingString = '&nbsp;' . $trailingString;
766
                            $textSpan       = substr($textSpan, 0, -1);
767
                            continue;
768
                        }
769
                        break;
770
                    }
771
772
                    self::$waitingText = $trailingString . self::$waitingText;
773
                    break;
774
                }
775
            }
776
777
            $savedSpan = $textSpan;
778
            // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span
779
            while ($textSpan !== '') {
780
                if (substr($textSpan, -1) === ' ') {
781
                    $trailingBlanks = ' ' . $trailingBlanks;
782
                    $textSpan       = substr($textSpan, 0, -1);
783
                    continue;
784
                }
785
                if (substr('......' . $textSpan, -6) === '&nbsp;') {
786
                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
787
                    $textSpan       = substr($textSpan, 0, -6);
788
                    continue;
789
                }
790
                break;
791
            }
792
            while (substr($textSpan, -7) === '<LTRbr>') {
793
                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
794
                $textSpan       = substr($textSpan, 0, -7);
795
            }
796
            if ($trailingBreaks !== '') {
797
                while ($textSpan !== '') {
798
                    if (substr($textSpan, -1) === ' ') {
799
                        $trailingBreaks = ' ' . $trailingBreaks;
800
                        $textSpan       = substr($textSpan, 0, -1);
801
                        continue;
802
                    }
803
                    if (substr($textSpan, -6) === '&nbsp;') {
804
                        $trailingBreaks = '&nbsp;' . $trailingBreaks;
805
                        $textSpan       = substr($textSpan, 0, -6);
806
                        continue;
807
                    }
808
                    break;
809
                }
810
                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
811
            } else {
812
                $textSpan = $savedSpan;
813
            }
814
815
            $trailingBlanks      = '';
816
            $trailingPunctuation = '';
817
            $trailingID          = '';
818
            $trailingSeparator   = '';
819
            $leadingSeparator    = '';
820
821
            while (I18N::direction() === 'rtl') {
822
                if (str_contains($result, self::START_RTL)) {
823
                    // Remove trailing blanks for inclusion in a separate LTR span
824
                    while ($textSpan !== '') {
825
                        if (substr($textSpan, -1) === ' ') {
826
                            $trailingBlanks = ' ' . $trailingBlanks;
827
                            $textSpan       = substr($textSpan, 0, -1);
828
                            continue;
829
                        }
830
                        if (substr($textSpan, -6) === '&nbsp;') {
831
                            $trailingBlanks = '&nbsp;' . $trailingBlanks;
832
                            $textSpan       = substr($textSpan, 0, -1);
833
                            continue;
834
                        }
835
                        break;
836
                    }
837
838
                    // Remove trailing punctuation for inclusion in a separate LTR span
839
                    if ($textSpan === '') {
840
                        $trailingChar = "\n";
841
                    } else {
842
                        $trailingChar = substr($textSpan, -1);
843
                    }
844
                    if (str_contains(self::PUNCTUATION, $trailingChar)) {
845
                        $trailingPunctuation = $trailingChar;
846
                        $textSpan            = substr($textSpan, 0, -1);
847
                    }
848
                }
849
850
                // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span
851
                while (true) {
852
                    if (substr($textSpan, -1) !== ')') {
853
                        break;
854
                    } // There is no trailing ')'
855
                    $posLeftParen = strrpos($textSpan, '(');
856
                    if ($posLeftParen === false) {
857
                        break;
858
                    } // There is no leading '('
859
                    $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes
860
861
                    // If the parenthesized text doesn't look like an ID number,
862
                    // we don't want to touch it.
863
                    // This check won’t work if somebody uses ID numbers with an unusual format.
864
                    $offset    = 1;
865
                    $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text
866
                    if (str_contains(self::NUMBERS, $charArray['letter'])) {
867
                        break;
868
                    }
869
                    $offset += $charArray['length']; // Point at 2nd character of parenthesized text
870
                    if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) {
871
                        break;
872
                    }
873
                    // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too
874
                    if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) {
875
                        break;
876
                    }
877
878
                    $trailingID = substr($textSpan, $posLeftParen);
879
                    $textSpan   = substr($textSpan, 0, $posLeftParen);
880
                    break;
881
                }
882
883
                // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span
884
                if ($trailingID !== '') {
885
                    while ($textSpan !== '') {
886
                        if (substr($textSpan, -1) === ' ') {
887
                            $trailingSeparator = ' ' . $trailingSeparator;
888
                            $textSpan          = substr($textSpan, 0, -1);
889
                            continue;
890
                        }
891
                        if (substr($textSpan, -6) === '&nbsp;') {
892
                            $trailingSeparator = '&nbsp;' . $trailingSeparator;
893
                            $textSpan          = substr($textSpan, 0, -6);
894
                            continue;
895
                        }
896
                        if (substr($textSpan, -1) === '-') {
897
                            $trailingSeparator = '-' . $trailingSeparator;
898
                            $textSpan          = substr($textSpan, 0, -1);
899
                            continue;
900
                        }
901
                        break;
902
                    }
903
                }
904
905
                // Look for " - " preceding the text and remove it for inclusion in a separate LTR span
906
                $foundSeparator = false;
907
                $savedSpan      = $textSpan;
908
                while ($textSpan !== '') {
909
                    if (substr($textSpan, 0, 1) === ' ') {
910
                        $leadingSeparator = ' ' . $leadingSeparator;
911
                        $textSpan         = substr($textSpan, 1);
912
                        continue;
913
                    }
914
                    if (substr($textSpan, 0, 6) === '&nbsp;') {
915
                        $leadingSeparator = '&nbsp;' . $leadingSeparator;
916
                        $textSpan         = substr($textSpan, 6);
917
                        continue;
918
                    }
919
                    if (substr($textSpan, 0, 1) === '-') {
920
                        $leadingSeparator = '-' . $leadingSeparator;
921
                        $textSpan         = substr($textSpan, 1);
922
                        $foundSeparator   = true;
923
                        continue;
924
                    }
925
                    break;
926
                }
927
                if (!$foundSeparator) {
928
                    $textSpan         = $savedSpan;
929
                    $leadingSeparator = '';
930
                }
931
                break;
932
            }
933
934
            // We're done: finish the span
935
            $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags
936
            while (true) {
937
                // Remove blanks that precede <LTRbr>
938
                if (str_contains($textSpan, ' <LTRbr>')) {
939
                    $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan);
940
                    continue;
941
                }
942
                if (str_contains($textSpan, '&nbsp;<LTRbr>')) {
943
                    $textSpan = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $textSpan);
944
                    continue;
945
                }
946
                break;
947
            }
948
            if ($leadingSeparator !== '') {
949
                $result .= self::START_LTR . $leadingSeparator . self::END_LTR;
950
            }
951
            $result .= $textSpan . self::END_LTR;
952
            if ($trailingSeparator !== '') {
953
                $result .= self::START_LTR . $trailingSeparator . self::END_LTR;
954
            }
955
            if ($trailingID !== '') {
956
                $result .= self::START_LTR . $trailingID . self::END_LTR;
957
            }
958
            if ($trailingPunctuation !== '') {
959
                $result .= self::START_LTR . $trailingPunctuation . self::END_LTR;
960
            }
961
            if ($trailingBlanks !== '') {
962
                $result .= self::START_LTR . $trailingBlanks . self::END_LTR;
963
            }
964
        }
965
966
        /* ****************************** RTL text handling ******************************** */
967
968
        if (self::$currentState === 'RTL') {
969
            $savedSpan = $textSpan;
970
971
            // Move any trailing <br>, optionally followed by blanks, outside this RTL span
972
            while ($textSpan !== '') {
973
                if (substr($textSpan, -1) === ' ') {
974
                    $trailingBlanks = ' ' . $trailingBlanks;
975
                    $textSpan       = substr($textSpan, 0, -1);
976
                    continue;
977
                }
978
                if (substr('......' . $textSpan, -6) === '&nbsp;') {
979
                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
980
                    $textSpan       = substr($textSpan, 0, -6);
981
                    continue;
982
                }
983
                break;
984
            }
985
            while (substr($textSpan, -7) === '<RTLbr>') {
986
                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
987
                $textSpan       = substr($textSpan, 0, -7);
988
            }
989
            if ($trailingBreaks !== '') {
990
                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
991
            } else {
992
                $textSpan = $savedSpan;
993
            }
994
995
            // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too.
996
            if (!$theEnd && I18N::direction() !== 'rtl') {
997
                $trailingString = '';
998
                $savedSpan      = $textSpan;
999
                while ($textSpan !== '') {
1000
                    // Look for trailing spaces and tentatively move them
1001
                    if (substr($textSpan, -1) === ' ') {
1002
                        $trailingString = ' ' . $trailingString;
1003
                        $textSpan       = substr($textSpan, 0, -1);
1004
                        continue;
1005
                    }
1006
                    if (substr($textSpan, -6) === '&nbsp;') {
1007
                        $trailingString = '&nbsp;' . $trailingString;
1008
                        $textSpan       = substr($textSpan, 0, -1);
1009
                        continue;
1010
                    }
1011
                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
1012
                        // There is no trailing numeric string
1013
                        $textSpan = $savedSpan;
1014
                        break;
1015
                    }
1016
1017
                    // We have a numeric string
1018
                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
1019
                    if ($posStartNumber === false) {
1020
                        $posStartNumber = 0;
1021
                    }
1022
                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
1023
                    $textSpan       = substr($textSpan, 0, $posStartNumber);
1024
1025
                    // Look for more spaces and move them too
1026
                    while ($textSpan !== '') {
1027
                        if (substr($textSpan, -1) === ' ') {
1028
                            $trailingString = ' ' . $trailingString;
1029
                            $textSpan       = substr($textSpan, 0, -1);
1030
                            continue;
1031
                        }
1032
                        if (substr($textSpan, -6) === '&nbsp;') {
1033
                            $trailingString = '&nbsp;' . $trailingString;
1034
                            $textSpan       = substr($textSpan, 0, -1);
1035
                            continue;
1036
                        }
1037
                        break;
1038
                    }
1039
1040
                    self::$waitingText = $trailingString . self::$waitingText;
1041
                    break;
1042
                }
1043
            }
1044
1045
            // Trailing " - " needs to be prefixed to the following span
1046
            if (!$theEnd && substr('...' . $textSpan, -3) === ' - ') {
1047
                $textSpan          = substr($textSpan, 0, -3);
1048
                self::$waitingText = ' - ' . self::$waitingText;
1049
            }
1050
1051
            while (I18N::direction() === 'rtl') {
1052
                // Look for " - " preceding <RTLbr> and relocate it to the front of the string
1053
                $posDashString = strpos($textSpan, ' - <RTLbr>');
1054
                if ($posDashString === false) {
1055
                    break;
1056
                }
1057
                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1058
                if ($posStringStart === false) {
1059
                    $posStringStart = 0;
1060
                } else {
1061
                    $posStringStart += 9;
1062
                } // Point to the first char following the last <RTLbr>
1063
1064
                $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3);
1065
            }
1066
1067
            // Strip leading spaces from the RTL text
1068
            $countLeadingSpaces = 0;
1069
            while ($textSpan !== '') {
1070
                if (substr($textSpan, 0, 1) === ' ') {
1071
                    $countLeadingSpaces++;
1072
                    $textSpan = substr($textSpan, 1);
1073
                    continue;
1074
                }
1075
                if (substr($textSpan, 0, 6) === '&nbsp;') {
1076
                    $countLeadingSpaces++;
1077
                    $textSpan = substr($textSpan, 6);
1078
                    continue;
1079
                }
1080
                break;
1081
            }
1082
1083
            // Strip trailing spaces from the RTL text
1084
            $countTrailingSpaces = 0;
1085
            while ($textSpan !== '') {
1086
                if (substr($textSpan, -1) === ' ') {
1087
                    $countTrailingSpaces++;
1088
                    $textSpan = substr($textSpan, 0, -1);
1089
                    continue;
1090
                }
1091
                if (substr($textSpan, -6) === '&nbsp;') {
1092
                    $countTrailingSpaces++;
1093
                    $textSpan = substr($textSpan, 0, -6);
1094
                    continue;
1095
                }
1096
                break;
1097
            }
1098
1099
            // Look for trailing " -", reverse it, and relocate it to the front of the string
1100
            if (substr($textSpan, -2) === ' -') {
1101
                $posDashString  = strlen($textSpan) - 2;
1102
                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1103
                if ($posStringStart === false) {
1104
                    $posStringStart = 0;
1105
                } else {
1106
                    $posStringStart += 9;
1107
                } // Point to the first char following the last <RTLbr>
1108
1109
                $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2);
1110
            }
1111
1112
            if ($countLeadingSpaces !== 0) {
1113
                $newLength = strlen($textSpan) + $countLeadingSpaces;
1114
                $textSpan  = str_pad($textSpan, $newLength, ' ', (I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT));
1115
            }
1116
            if ($countTrailingSpaces !== 0) {
1117
                if (I18N::direction() === 'ltr') {
1118
                    if ($trailingBreaks === '') {
1119
                        // Move trailing RTL spaces to front of following LTR span
1120
                        $newLength         = strlen(self::$waitingText) + $countTrailingSpaces;
1121
                        self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT);
1122
                    }
1123
                } else {
1124
                    $newLength = strlen($textSpan) + $countTrailingSpaces;
1125
                    $textSpan  = str_pad($textSpan, $newLength, ' ');
1126
                }
1127
            }
1128
1129
            // We're done: finish the span
1130
            $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags
1131
            $result   .= $textSpan . self::END_RTL;
1132
        }
1133
1134
        if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') {
1135
            $result .= $textSpan;
1136
        }
1137
1138
        $result .= $trailingBreaks; // Get rid of any waiting <br>
1139
    }
1140
1141
    /**
1142
     * Wrap text, similar to the PHP wordwrap() function.
1143
     *
1144
     * @param string $string
1145
     * @param int    $width
1146
     * @param string $sep
1147
     * @param bool   $cut
1148
     *
1149
     * @return string
1150
     */
1151
    public static function utf8WordWrap(string $string, int $width = 75, string $sep = "\n", bool $cut = false): string
1152
    {
1153
        $out = '';
1154
        while ($string) {
1155
            if (mb_strlen($string) <= $width) {
1156
                // Do not wrap any text that is less than the output area.
1157
                $out .= $string;
1158
                $string = '';
1159
            } else {
1160
                $sub1 = mb_substr($string, 0, $width + 1);
1161
                if (mb_substr($string, mb_strlen($sub1) - 1, 1) === ' ') {
1162
                    // include words that end by a space immediately after the area.
1163
                    $sub = $sub1;
1164
                } else {
1165
                    $sub = mb_substr($string, 0, $width);
1166
                }
1167
                $spacepos = strrpos($sub, ' ');
1168
                if ($spacepos === false) {
1169
                    // No space on line?
1170
                    if ($cut) {
1171
                        $out .= $sub . $sep;
1172
                        $string = mb_substr($string, mb_strlen($sub));
1173
                    } else {
1174
                        $spacepos = strpos($string, ' ');
1175
                        if ($spacepos === false) {
1176
                            $out    .= $string;
1177
                            $string = '';
1178
                        } else {
1179
                            $out    .= substr($string, 0, $spacepos) . $sep;
1180
                            $string = substr($string, $spacepos + 1);
1181
                        }
1182
                    }
1183
                } else {
1184
                    // Split at space;
1185
                    $out .= substr($string, 0, $spacepos) . $sep;
1186
                    $string = substr($string, $spacepos + 1);
1187
                }
1188
            }
1189
        }
1190
1191
        return $out;
1192
    }
1193
}
1194