BrillTagger::isPluralNoun() - Code Metrics - Inspection of "Refactor" - ekinhbayar/BrillTagger - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Branch — unit-tests (e061ab)

by Ekin

created 2016-11-01 13:22 UTC

BrillTagger::isPluralNoun() A

↳ Parent: BrillTagger

Complexity

Conditions	2
Paths	2

Size

Total Lines	3
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	2
CRAP Score	2

Importance

Changes

Metric	Value
c	0
b	0
f	0
dl	0
loc	3
ccs	2
cts	2
cp	1
rs	10
cc	2
eloc	2
nc	2
nop	2
crap	2

<?php
/**
 * Part Of Speech Tagging
 * Brill Tagger
 *
 * @category   BrillTagger
 * @author     Ekin H. Bayar <[email protected]>
 * @version    0.1.0
 */

namespace BrillTagger;

class BrillTagger
{
    private $dictionary = LEXICON;

    public function tag($text) {

        preg_match_all("/[\w\d\.'%@]+/", $text, $matches);

        $tags = [];
        $i = 0;

        foreach ($matches[0] as $token) {
            # default to a common noun
            $tags[$i] = ['token' => $token, 'tag' => 'NN'];

            # remove trailing full stops
            if (substr(trim($token), -1) == '.') {
                $token = preg_replace('/\.+$/', '', $token);
            }

            # get from dictionary if set
            if (isset($this->dictionary[strtolower($token)])) {
                $tags[$i]['tag'] = $this->dictionary[strtolower($token)][0];
            }

            # tag numerals, cardinals, money (NNS)
            if (preg_match(NUMERAL, $token)) {
                $tags[$i]['tag'] = 'NNS';
            }

            # tag years
            if (preg_match(YEAR, $token, $matches)) {
                $tags[$i]['tag'] = (isset($matches['nns'])) ? 'NNS' : 'CD';
            }

            # tag percentages
            if (preg_match(PERCENTAGE, $token)) {
                $tags[$i]['tag'] = 'NN';
            }

            # Anything that ends 'ly' is an adverb
            if ($this->isAdverb($token)) {
                $tags[$i]['tag'] = 'RB';
            }

            # Common noun to adj. if it ends with 'al', to gerund if 'ing', to past tense if 'ed'
            if ($this->isNoun($tags[$i]['tag'])) {

                if ($this->isAdjective($token)) {
                    $tags[$i]['tag'] = 'JJ';
                } elseif ($this->isGerund($token)) {
                    $tags[$i]['tag'] = 'VBG';
                } elseif ($this->isPastParticiple($token)) {
                    $tags[$i]['tag'] = 'VBN';
                } elseif ($token === 'I') {
                    $tags[$i]['tag'] = 'PPSS';
                }
                # Convert noun to number if . appears
                if(strpos($token, '.') !== false) {
                    $tags[$i]['tag'] = 'CD';
                }
            }

            # Noun to plural if it ends with an 's'
            if ($this->isPluralNoun($tags[$i]['tag'], $token)) {
                $tags[$i]['tag'] = 'NNS';
            }

            if ($i > 0) {

                # Converts verbs after 'the' to nouns
                if ($tags[$i-1]['tag'] == 'DT' && $this->isVerb($tags[$i]['tag'])) {
                    $tags[$i]['tag'] = 'NN';
                }

                # Noun to verb if the word before is 'would'
                if ($this->isSingularNoun($tags[$i]['tag']) && strtolower($tags[$i-1]['token']) == 'would') {
                    $tags[$i]['tag'] = 'VB';
                }

                # If we get noun noun, and the 2nd can be a verb, convert to verb
                if ($this->isNoun($tags[$i]['tag']) &&
                    $this->isNoun($tags[$i-1]['tag']) &&
                    $this->tokenExists($token)
                ) {
                    if ($this->isPastTenseVerb($token)) {
                        $tags[$i]['tag'] = 'VBN';
                    } elseif ($this->isPresentTenseVerb($token)) {
                        $tags[$i]['tag'] = 'VBZ';
                    }
                }
            }

            $i++;
        }

        return $tags;
    }

    public function tokenExists($token){
        return isset($this->dictionary[strtolower($token)]);
    }

    public function isNoun($tag) {
        return substr(trim($tag), 0, 1) == 'N';
    }

    public function isSingularNoun($tag){
        return $tag == 'NN';
    }

    public function isPluralNoun($tag, $token) {
        return ($this->isNoun($tag) && substr($token, -1) == 's');
    }

    public function isVerb($tag) {
        return substr(trim($tag), 0, 2) == 'VB';
    }

    public function isPronoun($tag) {
        return substr(trim($tag), 0, 1) == 'P';
    }

    public function isPastTenseVerb($token) {
        return in_array('VBN', $this->dictionary[strtolower($token)]);
    }

    public function isPresentTenseVerb($token) {
        return in_array('VBZ', $this->dictionary[strtolower($token)]);
    }

    # it him me us you 'em thee we'uns
    public function isAccusativePronoun($tag) {
        return $tag === 'PPO';
    }

    # it he she thee
    public function isThirdPersonPronoun($tag) {
        return $tag === 'PPS';
    }

    # they we I you ye thou you'uns
    public function isSingularPersonalPronoun($tag) {
        return $tag === 'PPSS';
    }

    # itself himself myself yourself herself oneself ownself
    public function isSingularReflexivePronoun($tag) {
        return $tag === 'PPL';
    }

    # themselves ourselves yourselves
    public function isPluralReflexivePronoun($tag) {
        return $tag === 'PPLS';
    }

    #  ours mine his her/hers their/theirs our its my your/yours out thy thine
    public function isPossessivePronoun($tag) {
        return in_array($tag,['PP$$', 'PP$']);
    }

    public function isAdjective($token) {
        return substr($token, -2) == 'al';
    }

    public function isGerund($token) {
        return substr($token, -3) == 'ing';
    }

    public function isPastParticiple($token) {
        return substr($token, -2) == 'ed';
    }

    public function isAdverb($token){
        return substr($token, -2) == 'ly';
    }
}


1		<?php
2		/**
3		* Part Of Speech Tagging
4		* Brill Tagger
5		*
6		* @category BrillTagger
7		* @author Ekin H. Bayar <[email protected]>
8		* @version 0.1.0
9		*/
10
11		namespace BrillTagger;
12
13		class BrillTagger
14		{
15		private $dictionary = LEXICON;
16
17	12	public function tag($text) {
18
19	12	preg_match_all("/[\w\d\.'%@]+/", $text, $matches);
20
21	12	$tags = [];
22	12	$i = 0;
23
24	12	foreach ($matches[0] as $token) {
25		# default to a common noun
26	12	$tags[$i] = ['token' => $token, 'tag' => 'NN'];
27
28		# remove trailing full stops
29	12	if (substr(trim($token), -1) == '.') {
30	12	$token = preg_replace('/\.+$/', '', $token);
31		}
32
33		# get from dictionary if set
34	12	if (isset($this->dictionary[strtolower($token)])) {
35	12	$tags[$i]['tag'] = $this->dictionary[strtolower($token)][0];
36		}
37
38		# tag numerals, cardinals, money (NNS)
39	12	if (preg_match(NUMERAL, $token)) {
40		$tags[$i]['tag'] = 'NNS';
41		}
42
43		# tag years
44	12	if (preg_match(YEAR, $token, $matches)) {
45		$tags[$i]['tag'] = (isset($matches['nns'])) ? 'NNS' : 'CD';
46		}
47
48		# tag percentages
49	12	if (preg_match(PERCENTAGE, $token)) {
50	1	$tags[$i]['tag'] = 'NN';
51		}
52
53		# Anything that ends 'ly' is an adverb
54	12	if ($this->isAdverb($token)) {
55	1	$tags[$i]['tag'] = 'RB';
56		}
57
58		# Common noun to adj. if it ends with 'al', to gerund if 'ing', to past tense if 'ed'
59	12	if ($this->isNoun($tags[$i]['tag'])) {
60
61	11	if ($this->isAdjective($token)) {
62		$tags[$i]['tag'] = 'JJ';
63	11	} elseif ($this->isGerund($token)) {
64	1	$tags[$i]['tag'] = 'VBG';
65	11	} elseif ($this->isPastParticiple($token)) {
66		$tags[$i]['tag'] = 'VBN';
67	11	} elseif ($token === 'I') {
68	4	$tags[$i]['tag'] = 'PPSS';
69		}
70		# Convert noun to number if . appears
71	11	if(strpos($token, '.') !== false) {
72		$tags[$i]['tag'] = 'CD';
73		}
74		}
75
76		# Noun to plural if it ends with an 's'
77	12	if ($this->isPluralNoun($tags[$i]['tag'], $token)) {
78	4	$tags[$i]['tag'] = 'NNS';
79		}
80
81	12	if ($i > 0) {
82
83		# Converts verbs after 'the' to nouns
84	12	if ($tags[$i-1]['tag'] == 'DT' && $this->isVerb($tags[$i]['tag'])) {
85		$tags[$i]['tag'] = 'NN';
86		}
87
88		# Noun to verb if the word before is 'would'
89	12	if ($this->isSingularNoun($tags[$i]['tag']) && strtolower($tags[$i-1]['token']) == 'would') {
90		$tags[$i]['tag'] = 'VB';
91		}
92
93		# If we get noun noun, and the 2nd can be a verb, convert to verb
94	12	if ($this->isNoun($tags[$i]['tag']) &&
95	12	$this->isNoun($tags[$i-1]['tag']) &&
96	12	$this->tokenExists($token)
97		) {
98	3	if ($this->isPastTenseVerb($token)) {
99		$tags[$i]['tag'] = 'VBN';
100	3	} elseif ($this->isPresentTenseVerb($token)) {
101	3	$tags[$i]['tag'] = 'VBZ';
102		}
103		}
104		}
105
106	12	$i++;
107		}
108
109	12	return $tags;
110		}
111
112	3	public function tokenExists($token){
113	3	return isset($this->dictionary[strtolower($token)]);
114		}
115
116	12	public function isNoun($tag) {
117	12	return substr(trim($tag), 0, 1) == 'N';
118		}
119
120	12	public function isSingularNoun($tag){
121	12	return $tag == 'NN';
122		}
123
124	12	public function isPluralNoun($tag, $token) {
125	12	return ($this->isNoun($tag) && substr($token, -1) == 's');
126		}
127
128	1	public function isVerb($tag) {
129	1	return substr(trim($tag), 0, 2) == 'VB';
130		}
131
132		public function isPronoun($tag) {
133		return substr(trim($tag), 0, 1) == 'P';
134		}
135
136	3	public function isPastTenseVerb($token) {
137	3	return in_array('VBN', $this->dictionary[strtolower($token)]);
138		}
139
140	3	public function isPresentTenseVerb($token) {
141	3	return in_array('VBZ', $this->dictionary[strtolower($token)]);
142		}
143
144		# it him me us you 'em thee we'uns
145		public function isAccusativePronoun($tag) {
146		return $tag === 'PPO';
147		}
148
149		# it he she thee
150		public function isThirdPersonPronoun($tag) {
151		return $tag === 'PPS';
152		}
153
154		# they we I you ye thou you'uns
155		public function isSingularPersonalPronoun($tag) {
156		return $tag === 'PPSS';
157		}
158
159		# itself himself myself yourself herself oneself ownself
160		public function isSingularReflexivePronoun($tag) {
161		return $tag === 'PPL';
162		}
163
164		# themselves ourselves yourselves
165		public function isPluralReflexivePronoun($tag) {
166		return $tag === 'PPLS';
167		}
168
169		# ours mine his her/hers their/theirs our its my your/yours out thy thine
170		public function isPossessivePronoun($tag) {
171		return in_array($tag,['PP$$', 'PP$']);
172		}
173
174	11	public function isAdjective($token) {
175	11	return substr($token, -2) == 'al';
176		}
177
178	11	public function isGerund($token) {
179	11	return substr($token, -3) == 'ing';
180		}
181
182	11	public function isPastParticiple($token) {
183	11	return substr($token, -2) == 'ed';
184		}
185
186	12	public function isAdverb($token){
187	12	return substr($token, -2) == 'ly';
188		}
189		}
190

ekinhbayar / BrillTagger

Branch — unit-tests (e061ab)

BrillTagger::isPluralNoun() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like