1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Class NLP |
4
|
|
|
* |
5
|
|
|
* @link https://www.icy2003.com/ |
6
|
|
|
* @author icy2003 <[email protected]> |
7
|
|
|
* @copyright Copyright (c) 2017, icy2003 |
8
|
|
|
*/ |
9
|
|
|
namespace icy2003\php\iapis\baidu; |
10
|
|
|
|
11
|
|
|
use Exception; |
12
|
|
|
use icy2003\php\C; |
13
|
|
|
use icy2003\php\I; |
14
|
|
|
use icy2003\php\ihelpers\Arrays; |
15
|
|
|
use icy2003\php\ihelpers\Http; |
16
|
|
|
use icy2003\php\ihelpers\Json; |
17
|
|
|
use icy2003\php\ihelpers\Strings; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* 自然语言处理 |
21
|
|
|
* |
22
|
|
|
* @link https://ai.baidu.com/docs#/NLP-Basic-API/top |
23
|
|
|
*/ |
24
|
|
|
class NLP extends Base |
25
|
|
|
{ |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* 选项列表 |
29
|
|
|
* |
30
|
|
|
* @var array |
31
|
|
|
*/ |
32
|
|
|
protected $_options = [ |
33
|
|
|
'text' => null, |
34
|
|
|
'word_1' => null, |
35
|
|
|
'word_2' => null, |
36
|
|
|
'mode' => 0, |
37
|
|
|
]; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* 设置选项 |
41
|
|
|
* |
42
|
|
|
* @param array $options |
43
|
|
|
* - text:待分析文本,长度限制根据不同接口定 |
44
|
|
|
* - word_1:词 1,最大 64 字节 |
45
|
|
|
* - word_2:词 2,最大 64 字节 |
46
|
|
|
* - mode:模型选择。默认值为0,可选值mode=0(对应web模型);mode=1(对应query模型),默认为 0 |
47
|
|
|
* 1. Query模型:该模型的训练数据来源于用户在百度的日常搜索数据,适用于处理信息需求类的搜索或口语query。例如:手机缝隙灰尘怎么清除 |
48
|
|
|
* 2. Web模型:该模型的训练数据来源于全网网页数据,适用于处理网页文本等书面表达句子。例如:一般而言,股份的表现形式可以是股票、股权份额等等 |
49
|
|
|
* |
50
|
|
|
* @return static |
51
|
|
|
*/ |
52
|
|
|
public function setOptions($options) |
53
|
|
|
{ |
54
|
|
|
return parent::setOptions($options); |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* 词法分析(通用版) |
59
|
|
|
* |
60
|
|
|
* - 向用户提供分词、词性标注、专名识别三大功能 |
61
|
|
|
* - 能够识别出文本串中的基本词汇(分词),对这些词汇进行重组、标注组合后词汇的词性,并进一步识别出命名实体 |
62
|
|
|
* - setOptions():text(限制为 20000 字节) |
63
|
|
|
* |
64
|
|
|
* @return static |
65
|
|
|
*/ |
66
|
|
|
public function lexer() |
67
|
|
|
{ |
68
|
|
|
C::assertTrue(Strings::byteLength($this->_options['text']) <= 20000, '文字太长,不允许超过 20000 字节'); |
69
|
|
|
$this->requestToken(); |
70
|
|
|
$this->_result = (array)Json::decode(Http::body('https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer', Json::encode(Arrays::some($this->_options, [ |
71
|
|
|
'text', |
72
|
|
|
])), [ |
73
|
|
|
'access_token' => $this->_token, |
74
|
|
|
'charset' => 'UTF-8', |
75
|
|
|
])); |
76
|
|
|
|
77
|
|
|
$this->_toArrayCall = function($result) { |
78
|
|
|
return Arrays::column((array)I::get($result, 'items', []), 'item'); |
79
|
|
|
}; |
80
|
|
|
|
81
|
|
|
return $this; |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* 依存句法分析 |
86
|
|
|
* |
87
|
|
|
* - 依存句法分析接口可自动分析文本中的依存句法结构信息,利用句子中词与词之间的依存关系来表示词语的句法结构信息(如“主谓”、“动宾”、“定中”等结构关系),并用树状结构来表示整句的结构(如“主谓宾”、“定状补”等) |
88
|
|
|
* - setOptions():text(限制为 256 字节)、mode |
89
|
|
|
* |
90
|
|
|
* @return static |
91
|
|
|
*/ |
92
|
|
|
public function depparser() |
93
|
|
|
{ |
94
|
|
|
if (Strings::byteLength($this->_options['text']) > 256) { |
95
|
|
|
throw new Exception('文字太长,不允许超过 256 字节'); |
96
|
|
|
} |
97
|
|
|
$this->requestToken(); |
98
|
|
|
$this->_result = (array)Json::decode(Http::body('https://aip.baidubce.com/rpc/2.0/nlp/v1/depparser', Json::encode(Arrays::some($this->_options, [ |
99
|
|
|
'text', |
100
|
|
|
'mode', |
101
|
|
|
])), [ |
102
|
|
|
'access_token' => $this->_token, |
103
|
|
|
'charset' => 'UTF-8', |
104
|
|
|
])); |
105
|
|
|
$this->_toArrayCall = function($result) { |
106
|
|
|
return Arrays::column((array)I::get($result, 'items', []), 'word'); |
107
|
|
|
}; |
108
|
|
|
|
109
|
|
|
return $this; |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* 词义相似度 |
114
|
|
|
* |
115
|
|
|
* - 输入两个词,得到两个词的相似度结果 |
116
|
|
|
* - setOptions():word_1、word_2 |
117
|
|
|
* |
118
|
|
|
* @return static |
119
|
|
|
*/ |
120
|
|
|
public function wordSim() |
121
|
|
|
{ |
122
|
|
|
if (Strings::byteLength($this->_options['word_1']) > 64) { |
123
|
|
|
throw new Exception('词 1 太长,不允许超过 64 字节'); |
124
|
|
|
} |
125
|
|
|
if (Strings::byteLength($this->_options['word_2']) > 64) { |
126
|
|
|
throw new Exception('词 2 太长,不允许超过 64 字节'); |
127
|
|
|
} |
128
|
|
|
$this->requestToken(); |
129
|
|
|
$this->_result = Json::decode(Http::body('https://aip.baidubce.com/rpc/2.0/nlp/v2/word_emb_sim', Json::encode(Arrays::some($this->_options, [ |
|
|
|
|
130
|
|
|
'word_1', |
131
|
|
|
'word_2', |
132
|
|
|
])), [ |
133
|
|
|
'access_token' => $this->_token, |
134
|
|
|
'charset' => 'UTF-8', |
135
|
|
|
])); |
136
|
|
|
$this->_toArrayCall = function($result) { |
137
|
|
|
return I::get($result, 'score'); |
138
|
|
|
}; |
139
|
|
|
|
140
|
|
|
return $this; |
141
|
|
|
} |
142
|
|
|
} |
143
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.