Completed
Push — master ( 4a5bc5...c5c112 )
by Ekin
03:01
created

BrillTagger::isPastParticiple()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 1
1
<?php
2
/**
3
 * Part Of Speech Tagging
4
 * Brill Tagger
5
 *
6
 * @category   BrillTagger
7
 * @author     Ekin H. Bayar <[email protected]>
8
 * @version    0.1.0
9
 */
10
11
namespace BrillTagger;
12
13
class BrillTagger
14
{
15
    private $dictionary = LEXICON;
16
17
    /**
18
     * @param $text
19
     * @return array
20
     */
21 21
    public function tag($text) {
22
23 21
        preg_match_all("/[\w\d\.'%@]+/", $text, $matches);
24
25 21
        $tags = [];
26 21
        $i = 0;
27
28 21
        foreach ($matches[0] as $token) {
29
            # default to a common noun
30 21
            $tags[$i] = ['token' => $token, 'tag' => 'NN'];
31
32
            # remove trailing full stops
33 21
            if (substr(trim($token), -1) == '.') {
34 12
                $token = preg_replace('/\.+$/', '', $token);
35
            }
36
37
            # get from dictionary if set
38 21
            if ($this->tokenExists($token)) {
39 21
                $tags[$i]['tag'] = $this->dictionary[strtolower($token)][0];
40
            }
41
42 21
            $tags[$i]['tag'] = $this->transformNumerics($tags[$i]['tag'], $token);
43
44
            # Anything that ends 'ly' is an adverb
45 21
            if ($this->isAdverb($token)) {
46 1
                $tags[$i]['tag'] = 'RB';
47
            }
48
49 21
            if ($this->isNoun($tags[$i]['tag'])) {
50 18
                $tags[$i]['tag'] = $this->transformNoun($tags[$i]['tag'], $token);
51
            }
52
53 21
            if ($i > 0) {
54 12
                $tags[$i]['tag'] = $this->transformNounToVerb($tags, $i, $token);
55 12
                $tags[$i]['tag'] = $this->transformVerbToNoun($tags, $i);
56
            }
57
58 21
            $i++;
59
        }
60
61 21
        return $tags;
62
    }
63
64
    /**
65
     * @param string $token
66
     * @return bool
67
     */
68 22
    public function tokenExists($token) {
69 22
        return isset($this->dictionary[strtolower($token)]);
70
    }
71
72
    /**
73
     * @param string $tag
74
     * @return bool
75
     */
76 21
    public function isNoun($tag) {
77 21
        return substr(trim($tag), 0, 1) == 'N';
78
    }
79
80
    /**
81
     * @param string $tag
82
     * @return bool
83
     */
84 13
    public function isSingularNoun($tag) {
85 13
        return $tag == 'NN';
86
    }
87
88
    /**
89
     * @param string $tag
90
     * @param string $token
91
     * @return bool
92
     */
93 15
    public function isPluralNoun($tag, $token) {
94 15
        return ($this->isNoun($tag) && substr($token, -1) == 's');
95
    }
96
97
    /**
98
     * @param string $tag
99
     * @return bool
100
     */
101 2
    public function isVerb($tag) {
102 2
        return substr(trim($tag), 0, 2) == 'VB';
103
    }
104
105
    /**
106
     * @param string $tag
107
     * @return bool
108
     */
109 1
    public function isPronoun($tag) {
110 1
        return substr(trim($tag), 0, 1) == 'P';
111
    }
112
113
    /**
114
     * @param string $token
115
     * @return bool
116
     */
117 4
    public function isPastTenseVerb($token) {
118 4
        return in_array('VBN', $this->dictionary[strtolower($token)]);
119
    }
120
121
    /**
122
     * @param string $token
123
     * @return bool
124
     */
125 4
    public function isPresentTenseVerb($token) {
126 4
        return in_array('VBZ', $this->dictionary[strtolower($token)]);
127
    }
128
129
    /** it him me us you 'em thee we'uns
130
     * @param string $tag
131
     * @return bool
132
     */
133 1
    public function isAccusativePronoun($tag) {
134 1
        return $tag === 'PPO';
135
    }
136
137
    /** it he she thee
138
     * @param string $tag
139
     * @return bool
140
     */
141 1
    public function isThirdPersonPronoun($tag) {
142 1
        return $tag === 'PPS';
143
    }
144
145
    /** they we I you ye thou you'uns
146
     * @param string $tag
147
     * @return bool
148
     */
149 1
    public function isSingularPersonalPronoun($tag) {
150 1
        return $tag === 'PPSS';
151
    }
152
153
    /** itself himself myself yourself herself oneself ownself
154
     * @param string $tag
155
     * @return bool
156
     */
157 1
    public function isSingularReflexivePronoun($tag) {
158 1
        return $tag === 'PPL';
159
    }
160
161
    /** themselves ourselves yourselves
162
     * @param string $tag
163
     * @return bool
164
     */
165 1
    public function isPluralReflexivePronoun($tag) {
166 1
        return $tag === 'PPLS';
167
    }
168
169
    /** ours mine his her/hers their/theirs our its my your/yours out thy thine
170
     * @param string $tag
171
     * @return bool
172
     */
173 1
    public function isPossessivePronoun($tag) {
174 1
        return in_array($tag, ['PP$$', 'PP$']);
175
    }
176
177
    /**
178
     * @param string $token
179
     * @return bool
180
     */
181 19
    public function isAdjective($token) {
182 19
        return (substr($token, -2) == 'al' || in_array('JJ', $this->dictionary[strtolower($token)]));
183
    }
184
185
    /**
186
     * @param string $token
187
     * @return bool
188
     */
189 19
    public function isGerund($token) {
190 19
        return substr($token, -3) == 'ing';
191
    }
192
193
    /**
194
     * @param string $token
195
     * @return bool
196
     */
197 19
    public function isPastParticiple($token) {
198 19
        return substr($token, -2) == 'ed';
199
    }
200
201
    /**
202
     * @param string $token
203
     * @return bool
204
     */
205 22
    public function isAdverb($token) {
206 22
        return substr($token, -2) == 'ly';
207
    }
208
209
    /** Common noun to adj. if it ends with 'al',
210
     * to gerund if 'ing', to past tense if 'ed'
211
     *
212
     * @param string $tag
213
     * @param string $token
214
     * @return string
215
     */
216 18
    public function transformNoun($tag, $token) {
217
218 18
        if ($this->isAdjective($token)) {
219
            $tag = 'JJ';
220 18
        } elseif ($this->isGerund($token)) {
221 1
            $tag = 'VBG';
222 18
        } elseif ($this->isPastParticiple($token)) {
223
            $tag = 'VBN';
224 18
        } elseif ($token === 'I') {
225 5
            $tag = 'PPSS';
226 15
        } elseif ($this->isPluralNoun($tag, $token)) {
227 8
            $tag = 'NNS';
228
        }
229
230
        # Convert noun to number if . appears
231 18
        if (strpos($token, '.') !== false) {
232
            $tag = 'CD';
233
        }
234
235 18
        return $tag;
236
    }
237
238
    /**
239
     * @param array $tags
240
     * @param int $i
241
     * @param string $token
242
     * @return mixed
243
     */
244 12
    public function transformNounToVerb($tags, $i, $token) {
245
        # Noun to verb if the word before is 'would'
246 12
        if ($this->isSingularNoun($tags[$i]['tag']) && strtolower($tags[$i-1]['token']) == 'would') {
247
            $tags[$i]['tag'] = 'VB';
248
        }
249
250
        # If we get noun noun, and the 2nd can be a verb, convert to verb
251 12
        if ($this->isNoun($tags[$i]['tag']) &&
252 12
            $this->isNoun($tags[$i-1]['tag']) &&
253 12
            $this->tokenExists($token)
254
        ) {
255 3
            if ($this->isPastTenseVerb($token)) {
256
                $tags[$i]['tag'] = 'VBN';
257 3
            } elseif ($this->isPresentTenseVerb($token)) {
258 3
                $tags[$i]['tag'] = 'VBZ';
259
            }
260
        }
261
262 12
        return $tags[$i]['tag'];
263
    }
264
265
    /**
266
     * @param array $tags
267
     * @param int $i
268
     * @return mixed
269
     */
270 12
    public function transformVerbToNoun($tags, $i) {
271
        # Converts verbs after 'the' to nouns
272 12
        if ($tags[$i-1]['tag'] == 'DT' && $this->isVerb($tags[$i]['tag'])) {
273
            $tags[$i]['tag'] = 'NN';
274
        }
275
276 12
        return $tags[$i]['tag'];
277
    }
278
279
    /**
280
     * @param string $tag
281
     * @param string $token
282
     * @return string
283
     */
284 21
    public function transformNumerics($tag, $token) {
285
        # tag numerals, cardinals, money (NNS)
286 21
        if (preg_match(NUMERAL, $token)) {
287 1
            $tag = 'NNS';
288
        }
289
290
        # tag years
291 21
        if (preg_match(YEAR, $token, $matches)) {
292 1
            $tag = (isset($matches['nns'])) ? 'NNS' : 'CD';
293
        }
294
295
        # tag percentages
296 21
        if (preg_match(PERCENTAGE, $token)) {
297 1
            $tag = 'NN';
298
        }
299
300 21
        return $tag;
301
    }
302
}
303