| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | declare(strict_types=1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | namespace DummyGenerator\Core; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | use DummyGenerator\Definitions\Extension\Awareness\RandomizerAwareExtensionInterface; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | use DummyGenerator\Definitions\Extension\Awareness\RandomizerAwareExtensionTrait; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | use DummyGenerator\Definitions\Extension\Awareness\ReplacerAwareExtensionInterface; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | use DummyGenerator\Definitions\Extension\Awareness\ReplacerAwareExtensionTrait; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | use DummyGenerator\Definitions\Extension\Exception\ExtensionArgumentException; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | use DummyGenerator\Definitions\Extension\Exception\ExtensionOverflowException; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | use DummyGenerator\Definitions\Extension\TextExtensionInterface; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | class Text implements | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     TextExtensionInterface, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     RandomizerAwareExtensionInterface, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |     ReplacerAwareExtensionInterface | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |     use RandomizerAwareExtensionTrait; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |     use ReplacerAwareExtensionTrait; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     protected string $defaultText = __DIR__.'/../../resources/en_US.txt'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     protected string $baseText = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |      * @var non-empty-string | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     protected string $separator = ' '; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     protected int $separatorLen = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |      * @var array<int, string> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     protected array $explodedText = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |      * @var array<int, array<string, array<int, string>>> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     protected array $consecutiveWords = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     protected bool $textStartsWithUppercase = true; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 | 1 |  |     public function __construct(string $baseText = null) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 | 1 |  |         if (null !== $baseText && $file = file_get_contents($baseText)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |             $this->baseText = $file; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 | 1 |  |             $file = file_get_contents($this->defaultText); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 | 1 |  |             $this->baseText = $file !== false ? $file : ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |      * Generate a text string by the Markov chain algorithm. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |      * Depending on the $maxNbChars, returns a random valid looking text. The algorithm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |      * generates a weighted table with the specified number of words as the index and the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |      * possible following words as the value. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |      * @example 'Alice, swallowing down her flamingo, and began by taking the little golden key' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |      * @param int $min Minimum number of characters the text should contain (maximum: 8) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |      * @param int $max Maximum number of characters the text should contain (minimum: 10) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |      * @param int $indexSize  Determines how many words are considered for the generation of the next word. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |      *                        The minimum is 1, and it produces a higher level of randomness, although the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |      *                        generated text usually doesn't make sense. Higher index sizes (up to 5) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |      *                        produce more correct text, at the price of less randomness. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |      * @return string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 | 1 |  |     public function realText(int $min = 160, int $max = 200, int $indexSize = 2): string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 | 1 |  |         if ($min < 1) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |             throw new ExtensionArgumentException('min must be at least 1'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 | 1 |  |         if ($max < 10) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |             throw new ExtensionArgumentException('max must be at least 10'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 | 1 |  |         if ($indexSize < 1) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |             throw new ExtensionArgumentException('indexSize must be at least 1'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 | 1 |  |         if ($indexSize > 5) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |             throw new ExtensionArgumentException('indexSize must be at most 5'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 | 1 |  |         if ($min >= $max) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |             throw new ExtensionArgumentException('min must be smaller than max'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 | 1 |  |         $words = $this->getConsecutiveWords($indexSize); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 | 1 |  |         $iterations = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         do { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 | 1 |  |             ++$iterations; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 | 1 |  |             if ($iterations >= 100) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |                 throw new ExtensionOverflowException(sprintf('Maximum retries of %d reached without finding a valid real text', $iterations)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 | 1 |  |             $result = $this->generateText($max, $words); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 | 1 |  |         } while ($this->replacer->strlen($result) <= $min); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 | 1 |  |         return $result; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |      * @param int $max | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |      * @param array<string, array<int, string>> $words | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |      * @return string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 | 1 |  |     protected function generateText(int $max, array $words): string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 | 1 |  |         $result = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 | 1 |  |         $resultLength = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         // take a random starting point | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         /** @var string $next */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 | 1 |  |         $next = $this->randomizer->randomKey($words); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 | 1 |  |         while ($resultLength < $max && isset($words[$next])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |             // fetch a random word to append | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 | 1 |  |             $word = $this->randomizer->randomElement($words[$next]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |             // calculate next index | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 | 1 |  |             $currentWords = explode($this->separator, $next); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 | 1 |  |             $currentWords[] = $word; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 | 1 |  |             array_shift($currentWords); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 | 1 |  |             $next = implode($this->separator, $currentWords); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |             // ensure text starts with an uppercase letter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 | 1 |  |             if ($resultLength === 0 && !$this->validStart($word)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 | 1 |  |                 continue; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |             // append the element | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 | 1 |  |             $result[] = $word; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 | 1 |  |             $resultLength += $this->replacer->strlen($word) + $this->separatorLen; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |         // remove the element that caused the text to overflow | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 | 1 |  |         array_pop($result); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |         // build result | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 | 1 |  |         $result = implode($this->separator, $result); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 | 1 |  |         return preg_replace("/([ ,-:;\x{2013}\x{2014}]+$)/us", '', $result) . '.'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |      * @param int $indexSize | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |      * @return array<string, array<int, string>> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 | 1 |  |     protected function getConsecutiveWords(int $indexSize): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 | 1 |  |         if (!isset($this->consecutiveWords[$indexSize])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 | 1 |  |             $parts = $this->getExplodedText(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 | 1 |  |             $words = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 | 1 |  |             $index = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 | 1 |  |             for ($i = 0; $i < $indexSize; ++$i) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 | 1 |  |                 $index[] = array_shift($parts); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 | 1 |  |             $partsCount = count($parts); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 | 1 |  |             for ($i = 0; $i < $partsCount; ++$i) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 | 1 |  |                 $stringIndex = implode($this->separator, $index); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 | 1 |  |                 if (!isset($words[$stringIndex])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 | 1 |  |                     $words[$stringIndex] = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 | 1 |  |                 $word = $parts[$i]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 | 1 |  |                 $words[$stringIndex][] = $word; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 | 1 |  |                 array_shift($index); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 | 1 |  |                 $index[] = $word; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |             // cache look up words for performance | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |             /** @var array<string, array<int, string>> $words */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 | 1 |  |             $this->consecutiveWords[$indexSize] = $words; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 | 1 |  |         return $this->consecutiveWords[$indexSize]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |      * @return array<int, string> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 | 1 |  |     protected function getExplodedText(): array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 | 1 |  |         if (empty($this->explodedText)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 | 1 |  |             $replaced = preg_replace('/\s+/u', ' ', $this->baseText); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 | 1 |  |             $this->explodedText = explode($this->separator, $replaced ?? ''); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 | 1 |  |         return $this->explodedText; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 198 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 199 | 1 |  |     protected function validStart(string $word): bool | 
            
                                                                        
                            
            
                                    
            
            
                | 200 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 201 | 1 |  |         $isValid = true; | 
            
                                                                        
                            
            
                                    
            
            
                | 202 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 203 | 1 |  |         if ($this->textStartsWithUppercase) { | 
            
                                                                        
                            
            
                                    
            
            
                | 204 | 1 |  |             $isValid = preg_match('/^\p{Lu}/u', $word); | 
            
                                                                        
                            
            
                                    
            
            
                | 205 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 206 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 207 | 1 |  |         return (bool) $isValid; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 209 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 210 |  |  |  |