Text::getExplodedText()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 4
dl 0
loc 8
ccs 5
cts 5
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 0
crap 2
1
<?php
2
3
declare(strict_types = 1);
4
5
namespace DummyGenerator\Core;
6
7
use DummyGenerator\Definitions\Extension\Awareness\RandomizerAwareExtensionInterface;
8
use DummyGenerator\Definitions\Extension\Awareness\RandomizerAwareExtensionTrait;
9
use DummyGenerator\Definitions\Extension\Awareness\ReplacerAwareExtensionInterface;
10
use DummyGenerator\Definitions\Extension\Awareness\ReplacerAwareExtensionTrait;
11
use DummyGenerator\Definitions\Extension\Exception\ExtensionArgumentException;
12
use DummyGenerator\Definitions\Extension\Exception\ExtensionOverflowException;
13
use DummyGenerator\Definitions\Extension\TextExtensionInterface;
14
15
class Text implements
16
    TextExtensionInterface,
17
    RandomizerAwareExtensionInterface,
18
    ReplacerAwareExtensionInterface
19
{
20
    use RandomizerAwareExtensionTrait;
21
    use ReplacerAwareExtensionTrait;
22
23
    protected string $defaultText = __DIR__ . '/../../resources/en_US.txt';
24
25
    protected string $baseText = '';
26
    /** @var non-empty-string */
0 ignored issues
show
Documentation Bug introduced by
The doc comment non-empty-string at position 0 could not be parsed: Unknown type name 'non-empty-string' at position 0 in non-empty-string.
Loading history...
27
    protected string $separator = ' ';
28
    protected int $separatorLen = 1;
29
    /** @var array<int, string> */
30
    protected array $explodedText = [];
31
    /** @var array<int, array<string, array<int, string>>> */
32
    protected array $consecutiveWords = [];
33
    protected bool $textStartsWithUppercase = true;
34
35 2
    public function __construct(?string $baseText = null)
36
    {
37 2
        if (null !== $baseText) {
38 1
            $this->baseText = $baseText;
39 1
        } elseif ($file = file_get_contents($this->defaultText)) {
40 1
            $this->baseText = $file;
41
        }
42
    }
43
44
    /**
45
     * Generate a text string by the Markov chain algorithm.
46
     *
47
     * Depending on the $maxNbChars, returns a random valid looking text. The algorithm
48
     * generates a weighted table with the specified number of words as the index and the
49
     * possible following words as the value.
50
     *
51
     * @param int $min Minimum number of characters the text should contain (maximum: 8)
52
     * @param int $max Maximum number of characters the text should contain (minimum: 10)
53
     * @param int $indexSize  Determines how many words are considered for the generation of the next word.
54
     *                        The minimum is 1, and it produces a higher level of randomness, although the
55
     *                        generated text usually doesn't make sense. Higher index sizes (up to 5)
56
     *                        produce more correct text, at the price of less randomness.
57
     *
58
     * @example 'Alice, swallowing down her flamingo, and began by taking the little golden key'
59
     */
60 2
    public function realText(int $min = 160, int $max = 200, int $indexSize = 2): string
61
    {
62 2
        if ($min < 1) {
63
            throw new ExtensionArgumentException('min must be at least 1');
64
        }
65
66 2
        if ($max < 10) {
67
            throw new ExtensionArgumentException('max must be at least 10');
68
        }
69
70 2
        if ($indexSize < 1) {
71
            throw new ExtensionArgumentException('indexSize must be at least 1');
72
        }
73
74 2
        if ($indexSize > 5) {
75
            throw new ExtensionArgumentException('indexSize must be at most 5');
76
        }
77
78 2
        if ($min >= $max) {
79
            throw new ExtensionArgumentException('min must be smaller than max');
80
        }
81
82 2
        $words = $this->getConsecutiveWords($indexSize);
83
84 2
        $iterations = 0;
85
86
        do {
87 2
            ++$iterations;
88
89 2
            if ($iterations >= 100) {
90
                throw new ExtensionOverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations));
91
            }
92
93 2
            $result = $this->generateText($max, $words);
94 2
        } while ($this->replacer->strlen($result) <= $min);
95
96 2
        return $result;
97
    }
98
99
    /** @param array<string, array<int, string>> $words */
100 2
    protected function generateText(int $max, array $words): string
101
    {
102 2
        $result = [];
103 2
        $resultLength = 0;
104
        // take a random starting point
105
        /** @var string $next */
106 2
        $next = $this->randomizer->randomKey($words);
107
108 2
        while ($resultLength < $max && isset($words[$next])) {
109
            // fetch a random word to append
110 2
            $word = $this->randomizer->randomElement($words[$next]);
111
112
            // calculate next index
113 2
            $currentWords = explode($this->separator, $next);
114 2
            $currentWords[] = $word;
115 2
            array_shift($currentWords);
116 2
            $next = implode($this->separator, $currentWords);
117
118
            // ensure text starts with an uppercase letter
119 2
            if ($resultLength === 0 && !$this->validStart($word)) {
120 2
                continue;
121
            }
122
123
            // append the element
124 2
            $result[] = $word;
125 2
            $resultLength += $this->replacer->strlen($word) + $this->separatorLen;
126
        }
127
128
        // remove the element that caused the text to overflow
129 2
        array_pop($result);
130
131
        // build result
132 2
        $result = implode($this->separator, $result);
133
134 2
        return preg_replace("/([ ,-:;\x{2013}\x{2014}]+$)/us", '', $result) . '.';
135
    }
136
137
    /** @return array<string, array<int, string>> */
138 2
    protected function getConsecutiveWords(int $indexSize): array
139
    {
140 2
        if (!isset($this->consecutiveWords[$indexSize])) {
141 2
            $parts = $this->getExplodedText();
142 2
            $words = [];
143 2
            $index = [];
144
145 2
            for ($i = 0; $i < $indexSize; ++$i) {
146 2
                $index[] = array_shift($parts);
147
            }
148
149 2
            $partsCount = count($parts);
150 2
            for ($i = 0; $i < $partsCount; ++$i) {
151 2
                $stringIndex = implode($this->separator, $index);
152
153 2
                if (!isset($words[$stringIndex])) {
154 2
                    $words[$stringIndex] = [];
155
                }
156
157 2
                $word = $parts[$i];
158 2
                $words[$stringIndex][] = $word;
159 2
                array_shift($index);
160 2
                $index[] = $word;
161
            }
162
163
            // cache look up words for performance
164 2
            $this->consecutiveWords[$indexSize] = $words;
165
        }
166
167 2
        return $this->consecutiveWords[$indexSize];
168
    }
169
170
    /** @return array<int, string> */
171 2
    protected function getExplodedText(): array
172
    {
173 2
        if (empty($this->explodedText)) {
174 2
            $replaced = preg_replace('/\s+/u', ' ', $this->baseText);
175 2
            $this->explodedText = explode($this->separator, $replaced ?? '');
176
        }
177
178 2
        return $this->explodedText;
179
    }
180
181 2
    protected function validStart(string $word): bool
182
    {
183 2
        $isValid = true;
184
185 2
        if ($this->textStartsWithUppercase) {
186 2
            $isValid = preg_match('/^\p{Lu}/u', $word);
187
        }
188
189 2
        return (bool) $isValid;
190
    }
191
}
192