@@ -21,93 +21,93 @@ |
||
21 | 21 | |
22 | 22 | preg_match_all("/[\w\d\.']+/", $text, $matches); |
23 | 23 | |
24 | - $nouns = ['NN', 'NNS']; |
|
24 | + $nouns = ['NN', 'NNS']; |
|
25 | 25 | $tags = []; |
26 | 26 | $i = 0; |
27 | 27 | |
28 | - foreach($matches[0] as $token) { |
|
28 | + foreach ($matches[0] as $token) { |
|
29 | 29 | # default to a common noun |
30 | 30 | $tags[$i] = ['token' => $token, 'tag' => 'NN']; |
31 | 31 | |
32 | 32 | # remove trailing full stops |
33 | - if(substr($token, -1) == '.') { |
|
33 | + if (substr($token, -1) == '.') { |
|
34 | 34 | $token = preg_replace('/\.+$/', '', $token); |
35 | 35 | } |
36 | 36 | |
37 | 37 | # get from dictionary if set |
38 | - if(isset($this->dictionary[strtolower($token)])) { |
|
38 | + if (isset($this->dictionary[strtolower($token)])) { |
|
39 | 39 | $tags[$i]['tag'] = $this->dictionary[strtolower($token)][0]; |
40 | 40 | } |
41 | 41 | |
42 | 42 | # Converts verbs after 'the' to nouns |
43 | - if($i > 0) { |
|
44 | - if($tags[$i - 1]['tag'] == 'DT' && in_array($tags[$i]['tag'], ['VBD', 'VBP', 'VB'])) { |
|
43 | + if ($i > 0) { |
|
44 | + if ($tags[$i-1]['tag'] == 'DT' && in_array($tags[$i]['tag'], ['VBD', 'VBP', 'VB'])) { |
|
45 | 45 | $tags[$i]['tag'] = 'NN'; |
46 | 46 | } |
47 | 47 | } |
48 | 48 | |
49 | 49 | # Convert noun to number if . appears |
50 | - if($tags[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) { |
|
50 | + if ($tags[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) { |
|
51 | 51 | $tags[$i]['tag'] = 'CD'; |
52 | 52 | } |
53 | 53 | |
54 | 54 | # manually tag numerals (years/money too) (NNS) |
55 | - if(preg_match(NUMERAL, $token)){ |
|
55 | + if (preg_match(NUMERAL, $token)) { |
|
56 | 56 | $tags[$i]['tag'] = 'NNS'; |
57 | 57 | } |
58 | 58 | |
59 | 59 | # years like: '80s (NNS) | '73 (CD) |
60 | - if(preg_match(YEAR, $token, $matches)){ |
|
60 | + if (preg_match(YEAR, $token, $matches)) { |
|
61 | 61 | $tags[$i]['tag'] = (isset($matches['nns'])) ? 'NNS' : 'CD'; |
62 | 62 | } |
63 | 63 | |
64 | 64 | # 80% NN |
65 | - if(preg_match(PERCENTAGE, $token)){ |
|
65 | + if (preg_match(PERCENTAGE, $token)) { |
|
66 | 66 | $tags[$i]['tag'] = 'NN'; |
67 | 67 | } |
68 | 68 | |
69 | 69 | # Convert noun to past participle if ends with 'ed' |
70 | - if($tags[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') { |
|
70 | + if ($tags[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') { |
|
71 | 71 | $tags[$i]['tag'] = 'VBN'; |
72 | 72 | } |
73 | 73 | |
74 | 74 | # Anything that ends 'ly' is an adverb |
75 | - if(substr($token, -2) == 'ly') { |
|
75 | + if (substr($token, -2) == 'ly') { |
|
76 | 76 | $tags[$i]['tag'] = 'RB'; |
77 | 77 | } |
78 | 78 | |
79 | 79 | # Common noun to adjective if it ends with 'al' |
80 | - if(in_array($tags[$i]['tag'], $nouns) && substr($token, -2) == 'al') { |
|
80 | + if (in_array($tags[$i]['tag'], $nouns) && substr($token, -2) == 'al') { |
|
81 | 81 | $tags[$i]['tag'] = 'JJ'; |
82 | 82 | } |
83 | 83 | |
84 | 84 | # Noun to verb if the word before is 'would' |
85 | - if($i > 0) { |
|
86 | - if($tags[$i]['tag'] == 'NN' && strtolower($tags[$i-1]['token']) == 'would') { |
|
85 | + if ($i > 0) { |
|
86 | + if ($tags[$i]['tag'] == 'NN' && strtolower($tags[$i-1]['token']) == 'would') { |
|
87 | 87 | $tags[$i]['tag'] = 'VB'; |
88 | 88 | } |
89 | 89 | } |
90 | 90 | |
91 | 91 | # Noun to plural if it ends with an 's' |
92 | - if($tags[$i]['tag'] == 'NN' && substr($token, -1) == 's') { |
|
92 | + if ($tags[$i]['tag'] == 'NN' && substr($token, -1) == 's') { |
|
93 | 93 | $tags[$i]['tag'] = 'NNS'; |
94 | 94 | } |
95 | 95 | |
96 | 96 | # Common noun to gerund |
97 | - if(in_array($tags[$i]['tag'], $nouns) && substr($token, -3) == 'ing') { |
|
97 | + if (in_array($tags[$i]['tag'], $nouns) && substr($token, -3) == 'ing') { |
|
98 | 98 | $tags[$i]['tag'] = 'VBG'; |
99 | 99 | } |
100 | 100 | |
101 | 101 | # If we get noun noun, and the 2nd can be a verb, convert to verb |
102 | - if($i > 0) { |
|
102 | + if ($i > 0) { |
|
103 | 103 | |
104 | - if( in_array($tags[$i]['tag'], $nouns) |
|
104 | + if (in_array($tags[$i]['tag'], $nouns) |
|
105 | 105 | && in_array($tags[$i-1]['tag'], $nouns) |
106 | 106 | && isset($this->dictionary[strtolower($token)]) |
107 | 107 | ) { |
108 | - if(in_array('VBN', $this->dictionary[strtolower($token)])) { |
|
108 | + if (in_array('VBN', $this->dictionary[strtolower($token)])) { |
|
109 | 109 | $tags[$i]['tag'] = 'VBN'; |
110 | - } else if(in_array('VBZ', $this->dictionary[strtolower($token)])) { |
|
110 | + } else if (in_array('VBZ', $this->dictionary[strtolower($token)])) { |
|
111 | 111 | $tags[$i]['tag'] = 'VBZ'; |
112 | 112 | } |
113 | 113 | } |