aotd1 /
mystem
| 1 | <?php |
||
| 2 | namespace Mystem; |
||
| 3 | |||
| 4 | /** |
||
| 5 | * Class Mystem |
||
| 6 | * Helper for execute mystem |
||
| 7 | */ |
||
| 8 | class Mystem |
||
| 9 | { |
||
| 10 | private static $handle; |
||
| 11 | protected static $pipes; |
||
| 12 | |||
| 13 | /* @var string $mystemPath path to mystem binary */ |
||
| 14 | public static $mystemPath = null; |
||
| 15 | |||
| 16 | /** |
||
| 17 | * Runs mystem binary and returns raw morphological data for each word |
||
| 18 | * Ex. for 'хрюкотали' returns: |
||
| 19 | * array(2) { |
||
| 20 | * ["text"]=> string(18) "хрюкотали" |
||
| 21 | * ["analysis"]=> array(1) { |
||
| 22 | * [0]=> array(3) { |
||
| 23 | * ["lex"] =>string(18) "хрюкотать" |
||
| 24 | * ["gr"] =>string(42) "V,несов,нп=прош,мн,изъяв" |
||
| 25 | * ["qual"]=>string(7) "bastard" |
||
| 26 | * } |
||
| 27 | * } |
||
| 28 | * } |
||
| 29 | * @param string $text |
||
| 30 | * @throws \Exception |
||
| 31 | * @return array[] lexical strings associative array |
||
| 32 | */ |
||
| 33 | 40 | public static function stemm($text) |
|
| 34 | { |
||
| 35 | 40 | self::procOpen(); |
|
| 36 | do { |
||
| 37 | 40 | $endMark = 'end' . rand(99999, PHP_INT_MAX); |
|
| 38 | 40 | } while (mb_strpos($text, $endMark) !== false); |
|
| 39 | 40 | fwrite(self::$pipes[0], $text . ".$endMark\n"); |
|
| 40 | 40 | $raw = self::readUntil(self::$pipes[1], $endMark); |
|
| 41 | 40 | $possibleError = stream_get_contents(self::$pipes[2], 1024); |
|
| 42 | 40 | if (!empty($possibleError)) { |
|
| 43 | 1 | throw new \Exception("Error: ".$possibleError); |
|
| 44 | } |
||
| 45 | 39 | $lines = explode("\n", $raw); |
|
| 46 | 39 | foreach ($lines as &$line) { |
|
| 47 | 39 | $line = json_decode($line, true); |
|
| 48 | 39 | } |
|
| 49 | 39 | $lines = array_filter($lines, function ($value) { |
|
| 50 | 39 | return !empty($value['analysis']); |
|
| 51 | 39 | }); |
|
| 52 | 39 | return $lines; |
|
| 53 | } |
||
| 54 | |||
| 55 | /** |
||
| 56 | * @param $pipe |
||
| 57 | * @param string $endMark |
||
| 58 | * @return string |
||
| 59 | */ |
||
| 60 | 40 | private static function readUntil($pipe, $endMark) |
|
| 61 | { |
||
| 62 | 40 | $w = null; |
|
| 63 | 40 | $read = array($pipe); |
|
| 64 | 40 | if (stream_select($read, $w, $e, 4, 1000) == 0) { |
|
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 65 | return ''; |
||
| 66 | } |
||
| 67 | 40 | $raw = ''; |
|
| 68 | 40 | $newOffset = 0; |
|
| 69 | 40 | $counter = 0; |
|
| 70 | do { |
||
| 71 | 40 | $offset = $newOffset; |
|
| 72 | 40 | usleep(500); |
|
| 73 | 40 | $raw .= stream_get_contents($pipe); |
|
| 74 | 40 | $newOffset = mb_strlen($raw); |
|
| 75 | 40 | } while (mb_strpos($raw, $endMark, $offset) == false && $counter++<20); |
|
| 76 | 40 | return $raw; |
|
| 77 | } |
||
| 78 | |||
| 79 | /** |
||
| 80 | * @return array |
||
| 81 | * @throws \Exception |
||
| 82 | */ |
||
| 83 | 40 | private static function procOpen() |
|
| 84 | { |
||
| 85 | 40 | if (self::$handle !== null) { |
|
| 86 | 7 | return array(); |
|
| 87 | } |
||
| 88 | |||
| 89 | 40 | self::$handle = proc_open(self::getMystem() . ' -incs --format=json', array( |
|
| 90 | 40 | 0 => array("pipe", "r"), 1 => array("pipe", "w"), 2 => array("pipe", "w") |
|
| 91 | 40 | ), self::$pipes); |
|
| 92 | |||
| 93 | 40 | if (!is_resource(self::$handle)) { |
|
| 94 | throw new \Exception("Can't proc_open mystem"); |
||
| 95 | } |
||
| 96 | 40 | stream_set_blocking(self::$pipes[1], 0); |
|
| 97 | 40 | stream_set_blocking(self::$pipes[2], 0); |
|
| 98 | |||
| 99 | 40 | register_shutdown_function(array('\Mystem\Mystem', 'destruct')); |
|
| 100 | 40 | } |
|
| 101 | |||
| 102 | public static function destruct() |
||
| 103 | { |
||
| 104 | if (self::$handle === null) { |
||
| 105 | return false; |
||
| 106 | } |
||
| 107 | if (is_array(self::$pipes)) { |
||
| 108 | foreach (self::$pipes as $pipe) { |
||
| 109 | fflush($pipe); |
||
| 110 | fclose($pipe); |
||
| 111 | } |
||
| 112 | } |
||
| 113 | proc_terminate(self::$handle); |
||
| 114 | proc_close(self::$handle); |
||
| 115 | self::$handle = null; |
||
| 116 | } |
||
| 117 | |||
| 118 | /** |
||
| 119 | * Returns mystem executable depends bit depth of operating system and OS type |
||
| 120 | * @return string |
||
| 121 | */ |
||
| 122 | 40 | private static function getMystem() |
|
| 123 | { |
||
| 124 | 40 | if (static::$mystemPath === null) { |
|
|
0 ignored issues
–
show
|
|||
| 125 | 39 | if (is_dir(__DIR__ . '/../../vendor/bin/')) { |
|
| 126 | 39 | static::$mystemPath = __DIR__ . '/../../vendor/bin/'; |
|
| 127 | 39 | } else { |
|
| 128 | static::$mystemPath = __DIR__ . '/../../../../bin/'; |
||
| 129 | } |
||
| 130 | 39 | } |
|
| 131 | |||
| 132 | 40 | return static::$mystemPath . ( |
|
| 133 | 40 | strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' ? 'mystem.exe' : 'mystem' |
|
| 134 | 40 | ); |
|
| 135 | } |
||
| 136 | } |
||
| 137 |