| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | namespace Mystem; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * Class Mystem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * Helper for execute mystem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | class Mystem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |     private static $handle; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |     protected static $pipes; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |     /* @var string $mystemPath path to mystem binary */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |     public static $mystemPath = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |      * Runs mystem binary and returns raw morphological data for each word | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |      * Ex. for 'хрюкотали' returns: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |      *   array(2) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |      *      ["text"]=> string(18) "хрюкотали" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |      *      ["analysis"]=> array(1) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |      *          [0]=> array(3) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |      *              ["lex"] =>string(18) "хрюкотать" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |      *              ["gr"]  =>string(42) "V,несов,нп=прош,мн,изъяв" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |      *              ["qual"]=>string(7) "bastard" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |      *          } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |      *     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |      *   } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |      * @param string $text | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |      * @throws \Exception | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |      * @return array[] lexical strings associative array | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 | 40 |  |     public static function stemm($text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 | 40 |  |         self::procOpen(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |         do { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 | 40 |  |             $endMark = 'end' . rand(99999, PHP_INT_MAX); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 | 40 |  |         } while (mb_strpos($text, $endMark) !== false); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 | 40 |  |         fwrite(self::$pipes[0], $text . ".$endMark\n"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 | 40 |  |         $raw = self::readUntil(self::$pipes[1], $endMark); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 | 40 |  |         $possibleError = stream_get_contents(self::$pipes[2], 1024); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 | 40 |  |         if (!empty($possibleError)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 | 1 |  |             throw new \Exception("Error: ".$possibleError); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 | 39 |  |         $lines = explode("\n", $raw); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 | 39 |  |         foreach ($lines as &$line) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 | 39 |  |             $line = json_decode($line, true); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 | 39 |  |         $lines = array_filter($lines, function ($value) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 | 39 |  |             return !empty($value['analysis']); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 | 39 |  |         }); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 | 39 |  |         return $lines; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |      * @param $pipe | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |      * @param string $endMark | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |      * @return string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 | 40 |  |     private static function readUntil($pipe, $endMark) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 | 40 |  |         $w = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 | 40 |  |         $read = array($pipe); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 | 40 |  |         if (stream_select($read, $w, $e, 4, 1000) == 0) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |             return ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 | 40 |  |         $raw = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 | 40 |  |         $newOffset = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 | 40 |  |         $counter = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         do { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 | 40 |  |             $offset = $newOffset; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 | 40 |  |             usleep(500); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 | 40 |  |             $raw .= stream_get_contents($pipe); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 | 40 |  |             $newOffset = mb_strlen($raw); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 | 40 |  |         } while (mb_strpos($raw, $endMark, $offset) == false && $counter++<20); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 | 40 |  |         return $raw; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 | 40 |  |     private static function procOpen() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 | 40 |  |         if (self::$handle !== null) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 | 7 |  |             return array(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 | 40 |  |         self::$handle = proc_open(self::getMystem() . ' -incs --format=json', array( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 | 40 |  |             0 => array("pipe", "r"), 1 => array("pipe", "w"), 2 => array("pipe", "w") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 | 40 |  |         ), self::$pipes); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 | 40 |  |         if (!is_resource(self::$handle)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |             throw new \Exception("Can't proc_open mystem"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 | 40 |  |         stream_set_blocking(self::$pipes[1], 0); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 | 40 |  |         stream_set_blocking(self::$pipes[2], 0); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 | 40 |  |         register_shutdown_function(array('\Mystem\Mystem', 'destruct')); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 | 40 |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 97 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 98 |  |  |     public static function destruct() | 
            
                                                                        
                            
            
                                    
            
            
                | 99 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 100 |  |  |         if (self::$handle === null) { | 
            
                                                                        
                            
            
                                    
            
            
                | 101 |  |  |             return false; | 
            
                                                                        
                            
            
                                    
            
            
                | 102 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 103 |  |  |         if (is_array(self::$pipes)) { | 
            
                                                                        
                            
            
                                    
            
            
                | 104 |  |  |             foreach (self::$pipes as $pipe) { | 
            
                                                                        
                            
            
                                    
            
            
                | 105 |  |  |                 fflush($pipe); | 
            
                                                                        
                            
            
                                    
            
            
                | 106 |  |  |                 fclose($pipe); | 
            
                                                                        
                            
            
                                    
            
            
                | 107 |  |  |             } | 
            
                                                                        
                            
            
                                    
            
            
                | 108 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 109 |  |  |         proc_terminate(self::$handle); | 
            
                                                                        
                            
            
                                    
            
            
                | 110 |  |  |         proc_close(self::$handle); | 
            
                                                                        
                            
            
                                    
            
            
                | 111 |  |  |         self::$handle = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |      * Returns mystem executable depends bit depth of operating system and OS type | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |      * @return string | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 | 40 |  |     private static function getMystem() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 | 40 |  |         if (self::$mystemPath === null) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 | 39 |  |             if (is_dir(__DIR__ . '/../../vendor/bin/')) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 | 39 |  |                 self::$mystemPath = __DIR__ . '/../../vendor/bin/'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |                 self::$mystemPath = __DIR__ . '/../../../../bin/'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 | 40 |  |         return self::$mystemPath . ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 | 40 |  |             strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' ? 'mystem.exe' : 'mystem' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |         ); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 132 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 133 |  |  |  |