1 | <?php |
||
2 | namespace Mystem; |
||
3 | |||
4 | /** |
||
5 | * Class Mystem |
||
6 | * Helper for execute mystem |
||
7 | */ |
||
8 | class Mystem |
||
9 | { |
||
10 | private static $handle; |
||
11 | protected static $pipes; |
||
12 | |||
13 | /* @var string $mystemPath path to mystem binary */ |
||
14 | public static $mystemPath = null; |
||
15 | |||
16 | /** |
||
17 | * Runs mystem binary and returns raw morphological data for each word |
||
18 | * Ex. for 'хрюкотали' returns: |
||
19 | * array(2) { |
||
20 | * ["text"]=> string(18) "хрюкотали" |
||
21 | * ["analysis"]=> array(1) { |
||
22 | * [0]=> array(3) { |
||
23 | * ["lex"] =>string(18) "хрюкотать" |
||
24 | * ["gr"] =>string(42) "V,несов,нп=прош,мн,изъяв" |
||
25 | * ["qual"]=>string(7) "bastard" |
||
26 | * } |
||
27 | * } |
||
28 | * } |
||
29 | * @param string $text |
||
30 | * @throws \Exception |
||
31 | * @return array[] lexical strings associative array |
||
32 | */ |
||
33 | 40 | public static function stemm($text) |
|
34 | { |
||
35 | 40 | self::procOpen(); |
|
36 | do { |
||
37 | 40 | $endMark = 'end' . rand(99999, PHP_INT_MAX); |
|
38 | 40 | } while (mb_strpos($text, $endMark) !== false); |
|
39 | 40 | fwrite(self::$pipes[0], $text . ".$endMark\n"); |
|
40 | 40 | $raw = self::readUntil(self::$pipes[1], $endMark); |
|
41 | 40 | $possibleError = stream_get_contents(self::$pipes[2], 1024); |
|
42 | 40 | if (!empty($possibleError)) { |
|
43 | 1 | throw new \Exception("Error: ".$possibleError); |
|
44 | } |
||
45 | 39 | $lines = explode("\n", $raw); |
|
46 | 39 | foreach ($lines as &$line) { |
|
47 | 39 | $line = json_decode($line, true); |
|
48 | 39 | } |
|
49 | 39 | $lines = array_filter($lines, function ($value) { |
|
50 | 39 | return !empty($value['analysis']); |
|
51 | 39 | }); |
|
52 | 39 | return $lines; |
|
53 | } |
||
54 | |||
55 | /** |
||
56 | * @param $pipe |
||
57 | * @param string $endMark |
||
58 | * @return string |
||
59 | */ |
||
60 | 40 | private static function readUntil($pipe, $endMark) |
|
61 | { |
||
62 | 40 | $w = null; |
|
63 | 40 | $read = array($pipe); |
|
64 | 40 | if (stream_select($read, $w, $e, 4, 1000) == 0) { |
|
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
65 | return ''; |
||
66 | } |
||
67 | 40 | $raw = ''; |
|
68 | 40 | $newOffset = 0; |
|
69 | 40 | $counter = 0; |
|
70 | do { |
||
71 | 40 | $offset = $newOffset; |
|
72 | 40 | usleep(500); |
|
73 | 40 | $raw .= stream_get_contents($pipe); |
|
74 | 40 | $newOffset = mb_strlen($raw); |
|
75 | 40 | } while (mb_strpos($raw, $endMark, $offset) == false && $counter++<20); |
|
76 | 40 | return $raw; |
|
77 | } |
||
78 | |||
79 | /** |
||
80 | * @return array |
||
81 | * @throws \Exception |
||
82 | */ |
||
83 | 40 | private static function procOpen() |
|
84 | { |
||
85 | 40 | if (self::$handle !== null) { |
|
86 | 7 | return array(); |
|
87 | } |
||
88 | |||
89 | 40 | self::$handle = proc_open(self::getMystem() . ' -incs --format=json', array( |
|
90 | 40 | 0 => array("pipe", "r"), 1 => array("pipe", "w"), 2 => array("pipe", "w") |
|
91 | 40 | ), self::$pipes); |
|
92 | |||
93 | 40 | if (!is_resource(self::$handle)) { |
|
94 | throw new \Exception("Can't proc_open mystem"); |
||
95 | } |
||
96 | 40 | stream_set_blocking(self::$pipes[1], 0); |
|
97 | 40 | stream_set_blocking(self::$pipes[2], 0); |
|
98 | |||
99 | 40 | register_shutdown_function(array('\Mystem\Mystem', 'destruct')); |
|
100 | 40 | } |
|
101 | |||
102 | public static function destruct() |
||
103 | { |
||
104 | if (self::$handle === null) { |
||
105 | return false; |
||
106 | } |
||
107 | if (is_array(self::$pipes)) { |
||
108 | foreach (self::$pipes as $pipe) { |
||
109 | fflush($pipe); |
||
110 | fclose($pipe); |
||
111 | } |
||
112 | } |
||
113 | proc_terminate(self::$handle); |
||
114 | proc_close(self::$handle); |
||
115 | self::$handle = null; |
||
116 | } |
||
117 | |||
118 | /** |
||
119 | * Returns mystem executable depends bit depth of operating system and OS type |
||
120 | * @return string |
||
121 | */ |
||
122 | 40 | private static function getMystem() |
|
123 | { |
||
124 | 40 | if (static::$mystemPath === null) { |
|
0 ignored issues
–
show
|
|||
125 | 39 | if (is_dir(__DIR__ . '/../../vendor/bin/')) { |
|
126 | 39 | static::$mystemPath = __DIR__ . '/../../vendor/bin/'; |
|
127 | 39 | } else { |
|
128 | static::$mystemPath = __DIR__ . '/../../../../bin/'; |
||
129 | } |
||
130 | 39 | } |
|
131 | |||
132 | 40 | return static::$mystemPath . ( |
|
133 | 40 | strtoupper(substr(PHP_OS, 0, 3)) === 'WIN' ? 'mystem.exe' : 'mystem' |
|
134 | 40 | ); |
|
135 | } |
||
136 | } |
||
137 |