Passed
Branch main (7ce824)
by Johny
03:15
created

Text::__construct()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 4.128

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 7
ccs 4
cts 5
cp 0.8
rs 10
c 0
b 0
f 0
cc 4
nc 3
nop 1
crap 4.128
1
<?php
2
3
declare(strict_types=1);
4
5
namespace DummyGenerator\Core;
6
7
use DummyGenerator\Definitions\Extension\Awareness\RandomizerAwareExtensionInterface;
8
use DummyGenerator\Definitions\Extension\Awareness\RandomizerAwareExtensionTrait;
9
use DummyGenerator\Definitions\Extension\Awareness\ReplacerAwareExtensionInterface;
10
use DummyGenerator\Definitions\Extension\Awareness\ReplacerAwareExtensionTrait;
11
use DummyGenerator\Definitions\Extension\Exception\ExtensionArgumentException;
12
use DummyGenerator\Definitions\Extension\Exception\ExtensionOverflowException;
13
use DummyGenerator\Definitions\Extension\TextExtensionInterface;
14
15
class Text implements
16
    TextExtensionInterface,
17
    RandomizerAwareExtensionInterface,
18
    ReplacerAwareExtensionInterface
19
{
20
    use RandomizerAwareExtensionTrait;
21
    use ReplacerAwareExtensionTrait;
22
23
    protected string $defaultText = __DIR__.'/../../resources/en_US.txt';
24
25
    protected string $baseText = '';
26
    /**
27
     * @var non-empty-string
0 ignored issues
show
Documentation Bug introduced by
The doc comment non-empty-string at position 0 could not be parsed: Unknown type name 'non-empty-string' at position 0 in non-empty-string.
Loading history...
28
     */
29
    protected string $separator = ' ';
30
    protected int $separatorLen = 1;
31
    /**
32
     * @var array<int, string>
33
     */
34
    protected array $explodedText = [];
35
    /**
36
     * @var array<int, array<string, array<int, string>>>
37
     */
38
    protected array $consecutiveWords = [];
39
    protected bool $textStartsWithUppercase = true;
40
41 1
    public function __construct(string $baseText = null)
42
    {
43 1
        if (null !== $baseText && $file = file_get_contents($baseText)) {
44
            $this->baseText = $file;
45
        } else {
46 1
            $file = file_get_contents($this->defaultText);
47 1
            $this->baseText = $file !== false ? $file : '';
48
        }
49
    }
50
51
52
    /**
53
     * Generate a text string by the Markov chain algorithm.
54
     *
55
     * Depending on the $maxNbChars, returns a random valid looking text. The algorithm
56
     * generates a weighted table with the specified number of words as the index and the
57
     * possible following words as the value.
58
     *
59
     * @example 'Alice, swallowing down her flamingo, and began by taking the little golden key'
60
     *
61
     * @param int $min Minimum number of characters the text should contain (maximum: 8)
62
     * @param int $max Maximum number of characters the text should contain (minimum: 10)
63
     * @param int $indexSize  Determines how many words are considered for the generation of the next word.
64
     *                        The minimum is 1, and it produces a higher level of randomness, although the
65
     *                        generated text usually doesn't make sense. Higher index sizes (up to 5)
66
     *                        produce more correct text, at the price of less randomness.
67
     *
68
     * @return string
69
     */
70 1
    public function realText(int $min = 160, int $max = 200, int $indexSize = 2): string
71
    {
72 1
        if ($min < 1) {
73
            throw new ExtensionArgumentException('min must be at least 1');
74
        }
75
76 1
        if ($max < 10) {
77
            throw new ExtensionArgumentException('max must be at least 10');
78
        }
79
80 1
        if ($indexSize < 1) {
81
            throw new ExtensionArgumentException('indexSize must be at least 1');
82
        }
83
84 1
        if ($indexSize > 5) {
85
            throw new ExtensionArgumentException('indexSize must be at most 5');
86
        }
87
88 1
        if ($min >= $max) {
89
            throw new ExtensionArgumentException('min must be smaller than max');
90
        }
91
92 1
        $words = $this->getConsecutiveWords($indexSize);
93
94 1
        $iterations = 0;
95
96
        do {
97 1
            ++$iterations;
98
99 1
            if ($iterations >= 100) {
100
                throw new ExtensionOverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations));
101
            }
102
103 1
            $result = $this->generateText($max, $words);
104 1
        } while ($this->replacer->strlen($result) <= $min);
105
106 1
        return $result;
107
    }
108
109
    /**
110
     * @param int $max
111
     * @param array<string, array<int, string>> $words
112
     * @return string
113
     */
114 1
    protected function generateText(int $max, array $words): string
115
    {
116 1
        $result = [];
117 1
        $resultLength = 0;
118
        // take a random starting point
119
        /** @var string $next */
120 1
        $next = $this->randomizer->randomKey($words);
121
122 1
        while ($resultLength < $max && isset($words[$next])) {
123
            // fetch a random word to append
124 1
            $word = $this->randomizer->randomElement($words[$next]);
125
126
            // calculate next index
127 1
            $currentWords = explode($this->separator, $next);
128 1
            $currentWords[] = $word;
129 1
            array_shift($currentWords);
130 1
            $next = implode($this->separator, $currentWords);
131
132
            // ensure text starts with an uppercase letter
133 1
            if ($resultLength === 0 && !$this->validStart($word)) {
134 1
                continue;
135
            }
136
137
            // append the element
138 1
            $result[] = $word;
139 1
            $resultLength += $this->replacer->strlen($word) + $this->separatorLen;
140
        }
141
142
        // remove the element that caused the text to overflow
143 1
        array_pop($result);
144
145
        // build result
146 1
        $result = implode($this->separator, $result);
147
148 1
        return preg_replace("/([ ,-:;\x{2013}\x{2014}]+$)/us", '', $result) . '.';
149
    }
150
151
    /**
152
     * @param int $indexSize
153
     * @return array<string, array<int, string>>
154
     */
155 1
    protected function getConsecutiveWords(int $indexSize): array
156
    {
157 1
        if (!isset($this->consecutiveWords[$indexSize])) {
158 1
            $parts = $this->getExplodedText();
159 1
            $words = [];
160 1
            $index = [];
161
162 1
            for ($i = 0; $i < $indexSize; ++$i) {
163 1
                $index[] = array_shift($parts);
164
            }
165
166 1
            $partsCount = count($parts);
167 1
            for ($i = 0; $i < $partsCount; ++$i) {
168 1
                $stringIndex = implode($this->separator, $index);
169
170 1
                if (!isset($words[$stringIndex])) {
171 1
                    $words[$stringIndex] = [];
172
                }
173 1
                $word = $parts[$i];
174 1
                $words[$stringIndex][] = $word;
175 1
                array_shift($index);
176 1
                $index[] = $word;
177
            }
178
            // cache look up words for performance
179
            /** @var array<string, array<int, string>> $words */
180 1
            $this->consecutiveWords[$indexSize] = $words;
181
        }
182
183 1
        return $this->consecutiveWords[$indexSize];
184
    }
185
186
    /**
187
     * @return array<int, string>
188
     */
189 1
    protected function getExplodedText(): array
190
    {
191 1
        if (empty($this->explodedText)) {
192 1
            $replaced = preg_replace('/\s+/u', ' ', $this->baseText);
193 1
            $this->explodedText = explode($this->separator, $replaced ?? '');
194
        }
195
196 1
        return $this->explodedText;
197
    }
198
199 1
    protected function validStart(string $word): bool
200
    {
201 1
        $isValid = true;
202
203 1
        if ($this->textStartsWithUppercase) {
204 1
            $isValid = preg_match('/^\p{Lu}/u', $word);
205
        }
206
207 1
        return (bool) $isValid;
208
    }
209
}
210