@@ -21,93 +21,93 @@ |
||
| 21 | 21 | |
| 22 | 22 | preg_match_all("/[\w\d\.']+/", $text, $matches); |
| 23 | 23 | |
| 24 | - $nouns = ['NN', 'NNS']; |
|
| 24 | + $nouns = ['NN', 'NNS']; |
|
| 25 | 25 | $tags = []; |
| 26 | 26 | $i = 0; |
| 27 | 27 | |
| 28 | - foreach($matches[0] as $token) { |
|
| 28 | + foreach ($matches[0] as $token) { |
|
| 29 | 29 | # default to a common noun |
| 30 | 30 | $tags[$i] = ['token' => $token, 'tag' => 'NN']; |
| 31 | 31 | |
| 32 | 32 | # remove trailing full stops |
| 33 | - if(substr($token, -1) == '.') { |
|
| 33 | + if (substr($token, -1) == '.') { |
|
| 34 | 34 | $token = preg_replace('/\.+$/', '', $token); |
| 35 | 35 | } |
| 36 | 36 | |
| 37 | 37 | # get from dictionary if set |
| 38 | - if(isset($this->dictionary[strtolower($token)])) { |
|
| 38 | + if (isset($this->dictionary[strtolower($token)])) { |
|
| 39 | 39 | $tags[$i]['tag'] = $this->dictionary[strtolower($token)][0]; |
| 40 | 40 | } |
| 41 | 41 | |
| 42 | 42 | # Converts verbs after 'the' to nouns |
| 43 | - if($i > 0) { |
|
| 44 | - if($tags[$i - 1]['tag'] == 'DT' && in_array($tags[$i]['tag'], ['VBD', 'VBP', 'VB'])) { |
|
| 43 | + if ($i > 0) { |
|
| 44 | + if ($tags[$i-1]['tag'] == 'DT' && in_array($tags[$i]['tag'], ['VBD', 'VBP', 'VB'])) { |
|
| 45 | 45 | $tags[$i]['tag'] = 'NN'; |
| 46 | 46 | } |
| 47 | 47 | } |
| 48 | 48 | |
| 49 | 49 | # Convert noun to number if . appears |
| 50 | - if($tags[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) { |
|
| 50 | + if ($tags[$i]['tag'][0] == 'N' && strpos($token, '.') !== false) { |
|
| 51 | 51 | $tags[$i]['tag'] = 'CD'; |
| 52 | 52 | } |
| 53 | 53 | |
| 54 | 54 | # manually tag numerals (years/money too) (NNS) |
| 55 | - if(preg_match(NUMERAL, $token)){ |
|
| 55 | + if (preg_match(NUMERAL, $token)) { |
|
| 56 | 56 | $tags[$i]['tag'] = 'NNS'; |
| 57 | 57 | } |
| 58 | 58 | |
| 59 | 59 | # years like: '80s (NNS) | '73 (CD) |
| 60 | - if(preg_match(YEAR, $token, $matches)){ |
|
| 60 | + if (preg_match(YEAR, $token, $matches)) { |
|
| 61 | 61 | $tags[$i]['tag'] = (isset($matches['nns'])) ? 'NNS' : 'CD'; |
| 62 | 62 | } |
| 63 | 63 | |
| 64 | 64 | # 80% NN |
| 65 | - if(preg_match(PERCENTAGE, $token)){ |
|
| 65 | + if (preg_match(PERCENTAGE, $token)) { |
|
| 66 | 66 | $tags[$i]['tag'] = 'NN'; |
| 67 | 67 | } |
| 68 | 68 | |
| 69 | 69 | # Convert noun to past participle if ends with 'ed' |
| 70 | - if($tags[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') { |
|
| 70 | + if ($tags[$i]['tag'][0] == 'N' && substr($token, -2) == 'ed') { |
|
| 71 | 71 | $tags[$i]['tag'] = 'VBN'; |
| 72 | 72 | } |
| 73 | 73 | |
| 74 | 74 | # Anything that ends 'ly' is an adverb |
| 75 | - if(substr($token, -2) == 'ly') { |
|
| 75 | + if (substr($token, -2) == 'ly') { |
|
| 76 | 76 | $tags[$i]['tag'] = 'RB'; |
| 77 | 77 | } |
| 78 | 78 | |
| 79 | 79 | # Common noun to adjective if it ends with 'al' |
| 80 | - if(in_array($tags[$i]['tag'], $nouns) && substr($token, -2) == 'al') { |
|
| 80 | + if (in_array($tags[$i]['tag'], $nouns) && substr($token, -2) == 'al') { |
|
| 81 | 81 | $tags[$i]['tag'] = 'JJ'; |
| 82 | 82 | } |
| 83 | 83 | |
| 84 | 84 | # Noun to verb if the word before is 'would' |
| 85 | - if($i > 0) { |
|
| 86 | - if($tags[$i]['tag'] == 'NN' && strtolower($tags[$i-1]['token']) == 'would') { |
|
| 85 | + if ($i > 0) { |
|
| 86 | + if ($tags[$i]['tag'] == 'NN' && strtolower($tags[$i-1]['token']) == 'would') { |
|
| 87 | 87 | $tags[$i]['tag'] = 'VB'; |
| 88 | 88 | } |
| 89 | 89 | } |
| 90 | 90 | |
| 91 | 91 | # Noun to plural if it ends with an 's' |
| 92 | - if($tags[$i]['tag'] == 'NN' && substr($token, -1) == 's') { |
|
| 92 | + if ($tags[$i]['tag'] == 'NN' && substr($token, -1) == 's') { |
|
| 93 | 93 | $tags[$i]['tag'] = 'NNS'; |
| 94 | 94 | } |
| 95 | 95 | |
| 96 | 96 | # Common noun to gerund |
| 97 | - if(in_array($tags[$i]['tag'], $nouns) && substr($token, -3) == 'ing') { |
|
| 97 | + if (in_array($tags[$i]['tag'], $nouns) && substr($token, -3) == 'ing') { |
|
| 98 | 98 | $tags[$i]['tag'] = 'VBG'; |
| 99 | 99 | } |
| 100 | 100 | |
| 101 | 101 | # If we get noun noun, and the 2nd can be a verb, convert to verb |
| 102 | - if($i > 0) { |
|
| 102 | + if ($i > 0) { |
|
| 103 | 103 | |
| 104 | - if( in_array($tags[$i]['tag'], $nouns) |
|
| 104 | + if (in_array($tags[$i]['tag'], $nouns) |
|
| 105 | 105 | && in_array($tags[$i-1]['tag'], $nouns) |
| 106 | 106 | && isset($this->dictionary[strtolower($token)]) |
| 107 | 107 | ) { |
| 108 | - if(in_array('VBN', $this->dictionary[strtolower($token)])) { |
|
| 108 | + if (in_array('VBN', $this->dictionary[strtolower($token)])) { |
|
| 109 | 109 | $tags[$i]['tag'] = 'VBN'; |
| 110 | - } else if(in_array('VBZ', $this->dictionary[strtolower($token)])) { |
|
| 110 | + } else if (in_array('VBZ', $this->dictionary[strtolower($token)])) { |
|
| 111 | 111 | $tags[$i]['tag'] = 'VBZ'; |
| 112 | 112 | } |
| 113 | 113 | } |