Text::letterCount() - Code Metrics - DaveChild/Text-Statistics - Measure and Improve Code Quality continuously with Scrutinizer

Text::letterCount() B
last analyzed 2018-09-12 13:05 UTC

↳ Parent: Text

Complexity

Conditions	6
Paths	11

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	13
CRAP Score	6.7717

Importance

Changes

Metric	Value
dl	0
loc	30
ccs	13
cts	18
cp	0.7221
rs	8.8177
c	0
b	0
f	0
cc	6
nc	11
nop	2
crap	6.7717

<?php

namespace DaveChild\TextStatistics;

class Text
{

    /**
     * @var boolean $blnMbstring Efficiency: Is the MB String extension loaded?
     */
    protected static $blnMbstring = null;

    /**
     * @var array $clean Efficiency: Store strings once processed.
     */
    protected static $clean = array();

    /**
     * Trims, removes line breaks, multiple spaces and generally cleans text
     * before processing.
     * @param   string|boolean  $strText      Text to be transformed
     * @return  string
     */
    public static function cleanText($strText)
    {

        // Check for boolean before processing as string
        if (is_bool($strText)) {
            return '';
        }

        // Check to see if we already processed this text. If we did, don't
        // re-process it.
        $key = sha1($strText);
        if (isset(self::$clean[$key])) {
            return self::$clean[$key];
        }

        $strText = utf8_decode($strText);

        // Curly quotes etc
        $strText = str_replace(
            array(
                "\xe2\x80\x98",
                "\xe2\x80\x99",
                "\xe2\x80\x9c",
                "\xe2\x80\x9d",
                "\xe2\x80\x93",
                "\xe2\x80\x94",
                "\xe2\x80\xa6"
            ),
            array(
                "'",
                "'",
                '"',
                '"',
                '-',
                '--',
                '...'
            ),
            $strText
        );
        $strText = str_replace(
            array(
                chr(145),
                chr(146),
                chr(147),
                chr(148),
                chr(150),
                chr(151),
                chr(133)
            ),
            array(
                "'",
                "'",
                '"',
                '"',
                '-',
                '--',
                '...'
            ),
            $strText
        );

        // Replace periods within numbers
        $strText = preg_replace('`([^0-9][0-9]+)\.([0-9]+[^0-9])`mis', '${1}0$2', $strText);

        // Handle HTML. Treat block level elements as sentence terminators and
        // remove all other tags.
        $strText = preg_replace('`<script(.*?)>(.*?)</script>`is', '', $strText);
        $strText = preg_replace('`\</?(address|blockquote|center|dir|div|dl|dd|dt|fieldset|form|h1|h2|h3|h4|h5|h6|menu|noscript|ol|p|pre|table|ul|li)[^>]*>`is', '.', $strText);
        $strText = html_entity_decode($strText);
        $strText = strip_tags($strText);

        // Assume blank lines (i.e., paragraph breaks) end sentences (useful
        // for titles in plain text documents) and replace remaining new
        // lines with spaces
        $strText = preg_replace('`(\r\n|\n\r)`is', "\n", $strText);
        $strText = preg_replace('`(\r|\n){2,}`is', ".\n\n", $strText);
        $strText = preg_replace('`[ ]*(\n|\r\n|\r)[ ]*`', ' ', $strText);

        // Replace commas, hyphens, quotes etc (count as spaces)
        $strText = preg_replace('`[",:;()/\`-]`', ' ', $strText);

        // Unify terminators and spaces
        $strText = trim($strText, '. ') . '.'; // Add final terminator.
        $strText = preg_replace('`[\.!?]`', '.', $strText); // Unify terminators
        $strText = preg_replace('`([\.\s]*\.[\.\s]*)`mis', '. ', $strText); // Merge terminators separated by whitespace.
        $strText = preg_replace('`[ ]+`', ' ', $strText); // Remove multiple spaces
        $strText = preg_replace('`([\.])[\. ]+`', '$1', $strText); // Check for duplicated terminators
        $strText = trim(preg_replace('`[ ]*([\.])`', '$1 ', $strText)); // Pad sentence terminators

        // Lower case all words following terminators (for gunning fog score)
        $strText = preg_replace_callback('`\. [^\. ]`', function($matches) { return strtolower($matches[0]); }, $strText);

        $strText = trim($strText);

        // Cache it and return
        self::$clean[$key] = $strText;
        return $strText;
    }

    /**
     * Converts string to lower case. Tries mb_strtolower and if that fails uses regular strtolower.
     * @param   string  $strText      Text to be transformed
     * @param   string  $strEncoding  Encoding of text
     * @return  string
     */
    public static function lowerCase($strText, $strEncoding = '')

    {

        if (is_null(self::$blnMbstring)) {
            self::$blnMbstring = extension_loaded('mbstring');
        }

        if (!self::$blnMbstring) {
            $strLowerCaseText = strtolower($strText);
        } else {
            if ($strEncoding == '') {
                $strLowerCaseText = mb_strtolower($strText);
            } else {
                $strLowerCaseText = mb_strtolower($strText, $strEncoding);
            }
        }

        return $strLowerCaseText;
    }

    /**
     * Converts string to upper case. Tries mb_strtoupper and if that fails uses regular strtoupper.
     * @param   string  $strText      Text to be transformed
     * @param   string  $strEncoding  Encoding of text
     * @return  string
     */
    public static function upperCase($strText, $strEncoding = '')

    {

        if (is_null(self::$blnMbstring)) {
            self::$blnMbstring = extension_loaded('mbstring');
        }

        if (!self::$blnMbstring) {
            $strUpperCaseText = strtoupper($strText);
        } else {
            if ($strEncoding == '') {
                $strUpperCaseText = mb_strtoupper($strText);
            } else {
                $strUpperCaseText = mb_strtoupper($strText, $strEncoding);
            }
        }

        return $strUpperCaseText;
    }

    /**
     * Gets portion of string. Tries mb_substr and if that fails uses regular substr.
     * @param   string  $strText      Text to be cut up
     * @param   int     $intStart     Start character
     * @param   int     $intLength    Length
     * @param   string  $strEncoding  Encoding of text
     * @return  string
     */
    public static function substring($strText, $intStart, $intLength, $strEncoding = '')
    {

        if (is_null(self::$blnMbstring)) {
            self::$blnMbstring = extension_loaded('mbstring');
        }

        if (!self::$blnMbstring) {
            $strSubstring = substr($strText, $intStart, $intLength);
        } else {
            if ($strEncoding == '') {
                $strSubstring = mb_substr($strText, $intStart, $intLength);
            } else {
                $strSubstring = mb_substr($strText, $intStart, $intLength, $strEncoding);
            }
        }

        return $strSubstring;
    }

    /**
     * Gives string length. Tries mb_strlen and if that fails uses regular strlen.
     * @param   string  $strText      Text to be measured
     * @param   string  $strEncoding  Encoding of text
     * @return  int
     */
    public static function textLength($strText, $strEncoding = '')

    {

        if (is_null(self::$blnMbstring)) {
            self::$blnMbstring = extension_loaded('mbstring');
        }

        if (!self::$blnMbstring) {
            $intTextLength = strlen($strText);
        } else {
            if ($strEncoding == '') {
                $intTextLength = mb_strlen($strText);
            } else {
                $intTextLength = mb_strlen($strText, $strEncoding);
            }
        }

        return $intTextLength;
    }

    /**
     * Alias for textLength, as "letterCount", "wordCount" etc also used
     * @param   string  $strText      Text to be measured
     * @param   string  $strEncoding  Encoding of text
     * @return  int
     */
    public static function characterCount($strText, $strEncoding = '')
    {
        return self::textLength($strText, $strEncoding);
    }

    /**
     * Gives letter count (ignores all non-letters). Tries mb_strlen and if
     * that fails uses regular strlen.
     * @param   string  $strText      Text to be measured
     * @param   string  $strEncoding  Encoding of text
     * @return  int
     */
    public static function letterCount($strText, $strEncoding = '')
    {
        if (strlen(trim($strText)) == 0) {
            return 0;
        }

        if (is_null(self::$blnMbstring)) {
            self::$blnMbstring = extension_loaded('mbstring');
        }

        $strText = self::cleanText($strText); // To clear out newlines etc
        $intTextLength = 0;
        $strText = preg_replace('`[^A-Za-z]+`', '', $strText);
        try {

            if (!self::$blnMbstring) {
                throw new \Exception('The extension mbstring is not loaded.');
            }

            if ($strEncoding == '') {
                $intTextLength = mb_strlen($strText);
            } else {
                $intTextLength = mb_strlen($strText, $strEncoding);
            }
        } catch (\Exception $e) {
            $intTextLength = strlen($strText);
        }

        return $intTextLength;
    }

    /**
     * Returns word count for text.
     * @param   string  $strText      Text to be measured
     * @param   string  $strEncoding  Encoding of text
     * @return  int
     */
    public static function wordCount($strText, $strEncoding = '')

    {
        if (strlen(trim($strText)) == 0) {
            return 0;
        }

        // Will be tripped by em dashes with spaces either side, among other similar characters
        $intWords = 1 + self::textLength(preg_replace('`[^ ]`', '', preg_replace('`\s+`', ' ', $strText)), $strEncoding); // Space count + 1 is word count

        return $intWords;
    }

    /**
     * Returns sentence count for text.
     * @param   string  $strText      Text to be measured
     * @param   string  $strEncoding  Encoding of text
     * @return  int
     */
    public static function sentenceCount($strText, $strEncoding = '')

    {
        if (strlen(trim($strText)) == 0) {
            return 0;
        }

        // Will be tripped up by "Mr." or "U.K.". Not a major concern at this point.
        $intSentences = max(1, self::textLength(preg_replace('`[^\.!?]`', '', $strText), $strEncoding));

        return $intSentences;
    }

    /**
     * Returns average words per sentence for text.
     * @param   string  $strText      Text to be measured
     * @param   string  $strEncoding  Encoding of text
     * @return  int|float
     */
    public static function averageWordsPerSentence($strText, $strEncoding = '')
    {
        $intSentenceCount = self::sentenceCount($strText, $strEncoding);
        $intWordCount = self::wordCount($strText, $strEncoding);

        $averageWords = (Maths::bcCalc($intWordCount, '/', $intSentenceCount));
        return $averageWords;
    }
}


1			<?php
2
3			namespace DaveChild\TextStatistics;
4
5			class Text
6			{
7
8			/**
9			* @var boolean $blnMbstring Efficiency: Is the MB String extension loaded?
10			*/
11			protected static $blnMbstring = null;
12
13			/**
14			* @var array $clean Efficiency: Store strings once processed.
15			*/
16			protected static $clean = array();
17
18			/**
19			* Trims, removes line breaks, multiple spaces and generally cleans text
20			* before processing.
21			* @param string\|boolean $strText Text to be transformed
22			* @return string
23			*/
24	39		public static function cleanText($strText)
25			{
26
27			// Check for boolean before processing as string
28	39		if (is_bool($strText)) {
29	1		return '';
30			}
31
32			// Check to see if we already processed this text. If we did, don't
33			// re-process it.
34	39		$key = sha1($strText);
35	39		if (isset(self::$clean[$key])) {
36	34		return self::$clean[$key];
37			}
38
39	19		$strText = utf8_decode($strText);
40
41			// Curly quotes etc
42	19		$strText = str_replace(
43			array(
44	19		"\xe2\x80\x98",
45	19		"\xe2\x80\x99",
46	19		"\xe2\x80\x9c",
47	19		"\xe2\x80\x9d",
48	19		"\xe2\x80\x93",
49	19		"\xe2\x80\x94",
50			"\xe2\x80\xa6"
51	19		),
52			array(
53	19		"'",
54	19		"'",
55	19		'"',
56	19		'"',
57	19		'-',
58	19		'--',
59			'...'
60	19		),
61			$strText
62	19		);
63	19		$strText = str_replace(
64			array(
65	19		chr(145),
66	19		chr(146),
67	19		chr(147),
68	19		chr(148),
69	19		chr(150),
70	19		chr(151),
71	19		chr(133)
72	19		),
73			array(
74	19		"'",
75	19		"'",
76	19		'"',
77	19		'"',
78	19		'-',
79	19		'--',
80			'...'
81	19		),
82			$strText
83	19		);
84
85			// Replace periods within numbers
86	19		$strText = preg_replace('`([^0-9][0-9]+)\.([0-9]+[^0-9])`mis', '${1}0$2', $strText);
87
88			// Handle HTML. Treat block level elements as sentence terminators and
89			// remove all other tags.
90	19		$strText = preg_replace('`<script(.?)>(.?)</script>`is', '', $strText);
91	19		$strText = preg_replace('`\</?(address\|blockquote\|center\|dir\|div\|dl\|dd\|dt\|fieldset\|form\|h1\|h2\|h3\|h4\|h5\|h6\|menu\|noscript\|ol\|p\|pre\|table\|ul\|li)[^>]*>`is', '.', $strText);
92	19		$strText = html_entity_decode($strText);
93	19		$strText = strip_tags($strText);
94
95			// Assume blank lines (i.e., paragraph breaks) end sentences (useful
96			// for titles in plain text documents) and replace remaining new
97			// lines with spaces
98	19		$strText = preg_replace('`(\r\n\|\n\r)`is', "\n", $strText);
99	19		$strText = preg_replace('`(\r\|\n){2,}`is', ".\n\n", $strText);
100	19		$strText = preg_replace('`[ ](\n\|\r\n\|\r)[ ]`', ' ', $strText);
101
102			// Replace commas, hyphens, quotes etc (count as spaces)
103	19		$strText = preg_replace('`[",:;()/\`-]`', ' ', $strText);
104
105			// Unify terminators and spaces
106	19		$strText = trim($strText, '. ') . '.'; // Add final terminator.
107	19		$strText = preg_replace('`[\.!?]`', '.', $strText); // Unify terminators
108	19		$strText = preg_replace('`([\.\s]\.[\.\s])`mis', '. ', $strText); // Merge terminators separated by whitespace.
109	19		$strText = preg_replace('`[ ]+`', ' ', $strText); // Remove multiple spaces
110	19		$strText = preg_replace('`([\.])[\. ]+`', '$1', $strText); // Check for duplicated terminators
111	19		$strText = trim(preg_replace('`[ ]*([\.])`', '$1 ', $strText)); // Pad sentence terminators
112
113			// Lower case all words following terminators (for gunning fog score)
114	19		$strText = preg_replace_callback('`\. [^\. ]`', function($matches) { return strtolower($matches[0]); }, $strText);
115
116	19		$strText = trim($strText);
117
118			// Cache it and return
119	19		self::$clean[$key] = $strText;
120	19		return $strText;
121			}
122
123			/**
124			* Converts string to lower case. Tries mb_strtolower and if that fails uses regular strtolower.
125			* @param string $strText Text to be transformed
126			* @param string $strEncoding Encoding of text
127			* @return string
128			*/
129	22	View Code Duplication	public static function lowerCase($strText, $strEncoding = '')
			0 ignored issues – show Duplication introduced 2016-02-03 13:43 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
130			{
131
132	22		if (is_null(self::$blnMbstring)) {
133			self::$blnMbstring = extension_loaded('mbstring');
134			}
135
136	22		if (!self::$blnMbstring) {
137			$strLowerCaseText = strtolower($strText);
138			} else {
139	22		if ($strEncoding == '') {
140	22		$strLowerCaseText = mb_strtolower($strText);
141	22		} else {
142			$strLowerCaseText = mb_strtolower($strText, $strEncoding);
143			}
144			}
145
146	22		return $strLowerCaseText;
147			}
148
149			/**
150			* Converts string to upper case. Tries mb_strtoupper and if that fails uses regular strtoupper.
151			* @param string $strText Text to be transformed
152			* @param string $strEncoding Encoding of text
153			* @return string
154			*/
155	6	View Code Duplication	public static function upperCase($strText, $strEncoding = '')
			0 ignored issues – show Duplication introduced 2016-02-03 13:43 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
156			{
157
158	6		if (is_null(self::$blnMbstring)) {
159			self::$blnMbstring = extension_loaded('mbstring');
160			}
161
162	6		if (!self::$blnMbstring) {
163			$strUpperCaseText = strtoupper($strText);
164			} else {
165	6		if ($strEncoding == '') {
166	6		$strUpperCaseText = mb_strtoupper($strText);
167	6		} else {
168			$strUpperCaseText = mb_strtoupper($strText, $strEncoding);
169			}
170			}
171
172	6		return $strUpperCaseText;
173			}
174
175			/**
176			* Gets portion of string. Tries mb_substr and if that fails uses regular substr.
177			* @param string $strText Text to be cut up
178			* @param int $intStart Start character
179			* @param int $intLength Length
180			* @param string $strEncoding Encoding of text
181			* @return string
182			*/
183	5		public static function substring($strText, $intStart, $intLength, $strEncoding = '')
184			{
185
186	5		if (is_null(self::$blnMbstring)) {
187			self::$blnMbstring = extension_loaded('mbstring');
188			}
189
190	5		if (!self::$blnMbstring) {
191			$strSubstring = substr($strText, $intStart, $intLength);
192			} else {
193	5		if ($strEncoding == '') {
194	5		$strSubstring = mb_substr($strText, $intStart, $intLength);
195	5		} else {
196			$strSubstring = mb_substr($strText, $intStart, $intLength, $strEncoding);
197			}
198			}
199
200	5		return $strSubstring;
201			}
202
203			/**
204			* Gives string length. Tries mb_strlen and if that fails uses regular strlen.
205			* @param string $strText Text to be measured
206			* @param string $strEncoding Encoding of text
207			* @return int
208			*/
209	29	View Code Duplication	public static function textLength($strText, $strEncoding = '')
			0 ignored issues – show Duplication introduced 2016-02-03 13:43 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
210			{
211
212	29		if (is_null(self::$blnMbstring)) {
213			self::$blnMbstring = extension_loaded('mbstring');
214			}
215
216	29		if (!self::$blnMbstring) {
217			$intTextLength = strlen($strText);
218			} else {
219	29		if ($strEncoding == '') {
220	29		$intTextLength = mb_strlen($strText);
221	29		} else {
222			$intTextLength = mb_strlen($strText, $strEncoding);
223			}
224			}
225
226	29		return $intTextLength;
227			}
228
229			/**
230			* Alias for textLength, as "letterCount", "wordCount" etc also used
231			* @param string $strText Text to be measured
232			* @param string $strEncoding Encoding of text
233			* @return int
234			*/
235	1		public static function characterCount($strText, $strEncoding = '')
236			{
237	1		return self::textLength($strText, $strEncoding);
238			}
239
240			/**
241			* Gives letter count (ignores all non-letters). Tries mb_strlen and if
242			* that fails uses regular strlen.
243			* @param string $strText Text to be measured
244			* @param string $strEncoding Encoding of text
245			* @return int
246			*/
247	31		public static function letterCount($strText, $strEncoding = '')
248			{
249	31		if (strlen(trim($strText)) == 0) {
250	1		return 0;
251			}
252
253	31		if (is_null(self::$blnMbstring)) {
254			self::$blnMbstring = extension_loaded('mbstring');
255			}
256
257	31		$strText = self::cleanText($strText); // To clear out newlines etc
258	31		$intTextLength = 0;
259	31		$strText = preg_replace('`[^A-Za-z]+`', '', $strText);
260			try {
261
262	31		if (!self::$blnMbstring) {
263			throw new \Exception('The extension mbstring is not loaded.');
264			}
265
266	31		if ($strEncoding == '') {
267	31		$intTextLength = mb_strlen($strText);
268	31		} else {
269			$intTextLength = mb_strlen($strText, $strEncoding);
270			}
271	31		} catch (\Exception $e) {
272			$intTextLength = strlen($strText);
273			}
274
275	31		return $intTextLength;
276			}
277
278			/**
279			* Returns word count for text.
280			* @param string $strText Text to be measured
281			* @param string $strEncoding Encoding of text
282			* @return int
283			*/
284	26	View Code Duplication	public static function wordCount($strText, $strEncoding = '')
			0 ignored issues – show Duplication introduced 2016-02-03 13:43 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
285			{
286	26		if (strlen(trim($strText)) == 0) {
287	1		return 0;
288			}
289
290			// Will be tripped by em dashes with spaces either side, among other similar characters
291	25		$intWords = 1 + self::textLength(preg_replace('`[^ ]`', '', preg_replace('`\s+`', ' ', $strText)), $strEncoding); // Space count + 1 is word count
292
293	25		return $intWords;
294			}
295
296			/**
297			* Returns sentence count for text.
298			* @param string $strText Text to be measured
299			* @param string $strEncoding Encoding of text
300			* @return int
301			*/
302	23	View Code Duplication	public static function sentenceCount($strText, $strEncoding = '')
			0 ignored issues – show Duplication introduced 2016-02-03 13:43 UTC by Report Bug Copy Issue Report This method seems to be duplicated in your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
303			{
304	23		if (strlen(trim($strText)) == 0) {
305	1		return 0;
306			}
307
308			// Will be tripped up by "Mr." or "U.K.". Not a major concern at this point.
309	22		$intSentences = max(1, self::textLength(preg_replace('`[^\.!?]`', '', $strText), $strEncoding));
310
311	22		return $intSentences;
312			}
313
314			/**
315			* Returns average words per sentence for text.
316			* @param string $strText Text to be measured
317			* @param string $strEncoding Encoding of text
318			* @return int\|float
319			*/
320	10		public static function averageWordsPerSentence($strText, $strEncoding = '')
321			{
322	10		$intSentenceCount = self::sentenceCount($strText, $strEncoding);
323	10		$intWordCount = self::wordCount($strText, $strEncoding);
324
325	10		$averageWords = (Maths::bcCalc($intWordCount, '/', $intSentenceCount));
326	10		return $averageWords;
327			}
328			}
329

DaveChild / Text-Statistics

GitHub Access Token became invalid

Text::letterCount() B last analyzed 2018-09-12 13:05 UTC

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like

Text::letterCount() B
last analyzed 2018-09-12 13:05 UTC