Passed
Push — master ( 368a55...684ad3 )
by Ekin
04:13
created

BrillTagger::isAccusativePronoun()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php
2
/**
3
 * Part Of Speech Tagging
4
 * Brill Tagger
5
 *
6
 * @category   BrillTagger
7
 * @author     Ekin H. Bayar <[email protected]>
8
 * @version    0.1.0
9
 */
10
11
namespace BrillTagger;
12
13
class BrillTagger
14
{
15
    private $dictionary = LEXICON;
16
17
    /**
18
     * @param $text
19
     * @return array
20
     */
21 22
    public function tag($text)
22
    {
23
24 22
        preg_match_all("/[\w\d\.'%@]+/", $text, $matches);
25
26 22
        $tags = [];
27 22
        $i    = 0;
28
29 22
        foreach ($matches[0] as $token) {
30
            # default to a common noun
31 22
            $tags[$i] = ['token' => $token, 'tag' => 'NN'];
32
33
            # remove trailing full stops
34 22
            if (substr(trim($token), -1) == '.') {
35 13
                $token = preg_replace('/\.+$/', '', $token);
36
            }
37
38
            # get from dictionary if set
39 22
            if ($this->tokenExists($token)) {
40 22
                $tags[$i]['tag'] = $this->dictionary[$token][0];
41
            }
42
43 22
            $tags[$i]['tag'] = $this->transformNumerics($tags[$i]['tag'], $token);
44
45
            # Anything that ends 'ly' is an adverb
46 22
            if ($this->isAdverb($token)) {
47 1
                $tags[$i]['tag'] = 'RB';
48
            }
49
50 22
            if ($this->isNoun($tags[$i]['tag']) && !$this->isProperNoun($tags[$i]['tag'])) {
51 16
                $tags[$i]['tag'] = $this->transformNoun($tags[$i]['tag'], $token);
52
            }
53
54 22
            if ($i > 0) {
55 13
                $tags[$i]['tag'] = $this->transformNounToVerb($tags, $i, $token);
56 13
                $tags[$i]['tag'] = $this->transformVerbToNoun($tags, $i);
57
            }
58
59 22
            $i++;
60
        }
61
62 22
        return $tags;
63
    }
64
65
    /**
66
     * @param string $token
67
     * @return bool
68
     */
69 23
    public function tokenExists($token)
70
    {
71 23
        return isset($this->dictionary[$token]);
72
    }
73
74
    /**
75
     * @param string $tag
76
     * @return bool
77
     */
78 22
    public function isNoun($tag)
79
    {
80 22
        return substr(trim($tag), 0, 1) == 'N';
81
    }
82
83
    /**
84
     * @param string $tag
85
     * @return bool
86
     */
87 16
    public function isProperNoun($tag)
88
    {
89 16
        return substr(trim($tag), 0, 2) == 'NP';
90
    }
91
92
    /**
93
     * @param string $tag
94
     * @return bool
95
     */
96 14
    public function isSingularNoun($tag)
97
    {
98 14
        return $tag == 'NN';
99
    }
100
101
    /**
102
     * @param string $tag
103
     * @param string $token
104
     * @return bool
105
     */
106 16
    public function isPluralNoun($tag, $token)
107
    {
108 16
        return ($this->isNoun($tag) && substr($token, -1) == 's');
109
    }
110
111
    /**
112
     * @param string $tag
113
     * @return bool
114
     */
115 2
    public function isVerb($tag)
116
    {
117 2
        return substr(trim($tag), 0, 2) == 'VB';
118
    }
119
120
    /**
121
     * @param string $tag
122
     * @return bool
123
     */
124 1
    public function isPronoun($tag)
125
    {
126 1
        return substr(trim($tag), 0, 1) == 'P';
127
    }
128
129
    /**
130
     * @param string $token
131
     * @return bool
132
     */
133 4
    public function isPastTenseVerb($token)
134
    {
135 4
        return in_array('VBN', $this->dictionary[$token]);
136
    }
137
138
    /**
139
     * @param string $token
140
     * @return bool
141
     */
142 4
    public function isPresentTenseVerb($token)
143
    {
144 4
        return in_array('VBZ', $this->dictionary[$token]);
145
    }
146
147
    /** it him me us you 'em thee we'uns
148
     *
149
     * @param string $tag
150
     * @return bool
151
     */
152 1
    public function isAccusativePronoun($tag)
153
    {
154 1
        return substr(trim($tag), 0, 3) === 'PPO';
155
    }
156
157
    /** it he she thee
158
     *
159
     * @param string $tag
160
     * @return bool
161
     */
162 1
    public function isThirdPersonPronoun($tag)
163
    {
164 1
        return substr(trim($tag), 0, 3) === 'PPS';
165
    }
166
167
    /** they we I you ye thou you'uns
168
     *
169
     * @param string $tag
170
     * @return bool
171
     */
172 1
    public function isSingularPersonalPronoun($tag)
173
    {
174 1
        return substr(trim($tag), 0, 4) === 'PPSS';
175
    }
176
177
    /** itself himself myself yourself herself oneself ownself
178
     *
179
     * @param string $tag
180
     * @return bool
181
     */
182 1
    public function isSingularReflexivePronoun($tag)
183
    {
184 1
        return substr(trim($tag), 0, 3) === 'PPL';
185
    }
186
187
    /** themselves ourselves yourselves
188
     *
189
     * @param string $tag
190
     * @return bool
191
     */
192 1
    public function isPluralReflexivePronoun($tag)
193
    {
194 1
        return substr(trim($tag), 0, 4) === 'PPLS';
195
    }
196
197
    /** ours mine his her/hers their/theirs our its my your/yours out thy thine
198
     *
199
     * @param string $tag
200
     * @return bool
201
     */
202 1
    public function isPossessivePronoun($tag)
203
    {
204 1
        return in_array($tag, ['PP$$', 'PP$']);
205
    }
206
207
    /**
208
     * @param string $token
209
     * @return bool
210
     */
211 17
    public function isAdjective($token)
212
    {
213 17
        return (substr($token, -2) == 'al' || in_array('JJ', $this->dictionary[$token]));
214
    }
215
216
    /**
217
     * @param string $token
218
     * @return bool
219
     */
220 17
    public function isGerund($token)
221
    {
222 17
        return substr($token, -3) == 'ing';
223
    }
224
225
    /**
226
     * @param string $token
227
     * @return bool
228
     */
229 17
    public function isPastParticiple($token)
230
    {
231 17
        return substr($token, -2) == 'ed';
232
    }
233
234
    /**
235
     * @param string $token
236
     * @return bool
237
     */
238 23
    public function isAdverb($token)
239
    {
240 23
        return substr($token, -2) == 'ly';
241
    }
242
243
    /** Common noun to adj. if it ends with 'al',
244
     * to gerund if 'ing', to past tense if 'ed'
245
     *
246
     * @param string $tag
247
     * @param string $token
248
     * @return string
249
     */
250 16
    public function transformNoun($tag, $token)
251
    {
252
253 16
        if ($this->isAdjective($token)) {
254
            $tag = 'JJ';
255 16
        } elseif ($this->isGerund($token)) {
256 1
            $tag = 'VBG';
257 16
        } elseif ($this->isPastParticiple($token)) {
258
            $tag = 'VBN';
259 16
        } elseif ($token === 'I') {
260
            $tag = 'PPSS';
261 16
        } elseif ($this->isPluralNoun($tag, $token)) {
262 8
            $tag = 'NNS';
263
        }
264
265
        # Convert noun to number if . appears
266 16
        if (strpos($token, '.') !== false) {
267
            $tag = 'CD';
268
        }
269
270 16
        return $tag;
271
    }
272
273
    /**
274
     * @param array  $tags
275
     * @param int    $i
276
     * @param string $token
277
     * @return mixed
278
     */
279 13
    public function transformNounToVerb($tags, $i, $token)
280
    {
281
        # Noun to verb if the word before is 'would'
282 13
        if ($this->isSingularNoun($tags[$i]['tag']) && $tags[$i - 1]['token'] == 'would') {
283
            $tags[$i]['tag'] = 'VB';
284
        }
285
286
        # If we get noun noun, and the 2nd can be a verb, convert to verb
287 13
        if ($this->isNoun($tags[$i]['tag']) &&
288 13
            $this->isNoun($tags[$i - 1]['tag']) &&
289 13
            $this->tokenExists($token)
290
        ) {
291 3
            if ($this->isPastTenseVerb($token)) {
292
                $tags[$i]['tag'] = 'VBN';
293 3
            } elseif ($this->isPresentTenseVerb($token)) {
294 3
                $tags[$i]['tag'] = 'VBZ';
295
            }
296
        }
297
298 13
        return $tags[$i]['tag'];
299
    }
300
301
    /**
302
     * @param array $tags
303
     * @param int   $i
304
     * @return mixed
305
     */
306 13
    public function transformVerbToNoun($tags, $i)
307
    {
308
        # Converts verbs after 'the' to nouns
309 13
        if ($tags[$i - 1]['tag'] == 'DT' && $this->isVerb($tags[$i]['tag'])) {
310
            $tags[$i]['tag'] = 'NN';
311
        }
312
313 13
        return $tags[$i]['tag'];
314
    }
315
316
    /**
317
     * @param string $tag
318
     * @param string $token
319
     * @return string
320
     */
321 22
    public function transformNumerics($tag, $token)
322
    {
323
        # tag numerals, cardinals, money (NNS)
324 22
        if (preg_match(NUMERAL, $token)) {
325 1
            $tag = 'NNS';
326
        }
327
328
        # tag years
329 22
        if (preg_match(YEAR, $token, $matches)) {
330 1
            $tag = (isset($matches['nns'])) ? 'NNS' : 'CD';
331
        }
332
333
        # tag percentages
334 22
        if (preg_match(PERCENTAGE, $token)) {
335 1
            $tag = 'NN';
336
        }
337
338 22
        return $tag;
339
    }
340
}
341