Completed
Push — master ( 747f1c...419887 )
by Martijn
15s queued 12s
created
src/Sentence.php 1 patch
Indentation   +409 added lines, -409 removed lines patch added patch discarded remove patch
@@ -17,415 +17,415 @@
 block discarded – undo
17 17
 class Sentence
18 18
 {
19 19
 
20
-    /**
21
-     * Specify this flag with the split method to trim whitespace.
22
-     */
23
-    const SPLIT_TRIM = 0x1;
24
-
25
-    /**
26
-     * List of characters used to terminate sentences.
27
-     *
28
-     * @var string[]
29
-     */
30
-    private $terminals = ['.', '!', '?'];
31
-
32
-    /**
33
-     * List of characters used for abbreviations.
34
-     *
35
-     * @var string[]
36
-     */
37
-    private $abbreviators = ['.'];
38
-
39
-    /**
40
-     * List of float numbers in the text
41
-     *
42
-     * @var string[]
43
-     */
44
-    private $floatNumbers = [];
45
-
46
-    /**
47
-     * Clean floating point numbers by replace them with their md5 hash
48
-     *
49
-     * @param string $text
50
-     *
51
-     * @return string
52
-     */
53
-    private function floatNumberClean(string $text)
54
-    {
55
-        preg_match_all('!\d+(?:\.\d+)?!', $text, $matches);
56
-
57
-        foreach ($matches[0] as $floatNumber) {
58
-            $this->floatNumbers[$floatNumber] = md5($floatNumber);
59
-
60
-            $text = str_replace($floatNumber, md5($floatNumber), $text);
61
-        }
62
-
63
-        return $text;
64
-    }
65
-
66
-    /**
67
-     * Revert the hashed floating number back
68
-     *
69
-     * @param string[] $text
70
-     *
71
-     * @return string[]
72
-     */
73
-    private function floatNumberRevert($text)
74
-    {
20
+	/**
21
+	 * Specify this flag with the split method to trim whitespace.
22
+	 */
23
+	const SPLIT_TRIM = 0x1;
24
+
25
+	/**
26
+	 * List of characters used to terminate sentences.
27
+	 *
28
+	 * @var string[]
29
+	 */
30
+	private $terminals = ['.', '!', '?'];
31
+
32
+	/**
33
+	 * List of characters used for abbreviations.
34
+	 *
35
+	 * @var string[]
36
+	 */
37
+	private $abbreviators = ['.'];
38
+
39
+	/**
40
+	 * List of float numbers in the text
41
+	 *
42
+	 * @var string[]
43
+	 */
44
+	private $floatNumbers = [];
45
+
46
+	/**
47
+	 * Clean floating point numbers by replace them with their md5 hash
48
+	 *
49
+	 * @param string $text
50
+	 *
51
+	 * @return string
52
+	 */
53
+	private function floatNumberClean(string $text)
54
+	{
55
+		preg_match_all('!\d+(?:\.\d+)?!', $text, $matches);
56
+
57
+		foreach ($matches[0] as $floatNumber) {
58
+			$this->floatNumbers[$floatNumber] = md5($floatNumber);
59
+
60
+			$text = str_replace($floatNumber, md5($floatNumber), $text);
61
+		}
62
+
63
+		return $text;
64
+	}
65
+
66
+	/**
67
+	 * Revert the hashed floating number back
68
+	 *
69
+	 * @param string[] $text
70
+	 *
71
+	 * @return string[]
72
+	 */
73
+	private function floatNumberRevert($text)
74
+	{
75 75
         
76
-        return array_map(function($value) {
77
-            foreach ($this->floatNumbers as $number => $hash) {
78
-                $value = str_replace($hash, $number, $value);
79
-            }
80
-            return $value;
81
-        }, $text);
82
-    }
83
-
84
-    /**
85
-     * Breaks a piece of text into lines by linebreak.
86
-     * Eats up any linebreak characters as if one.
87
-     *
88
-     * Multibyte.php safe
89
-     *
90
-     * @param string $text
91
-     * @return string[]
92
-     */
93
-    private static function linebreakSplit($text)
94
-    {
95
-        $lines = [];
96
-        $line = '';
97
-
98
-        foreach (Multibyte::split('([\r\n]+)', $text, -1, PREG_SPLIT_DELIM_CAPTURE) as $part) {
99
-            $line .= $part;
100
-            if (Multibyte::trim($part) === '') {
101
-                $lines[] = $line;
102
-                $line = '';
103
-            }
104
-        }
105
-        $lines[] = $line;
106
-
107
-        return $lines;
108
-    }
109
-
110
-    /**
111
-     * Splits an array of lines by (consecutive sequences of)
112
-     * terminals, keeping terminals.
113
-     *
114
-     * Multibyte.php safe (atleast for UTF-8)
115
-     *
116
-     * For example:
117
-     *    "There ... is. More!"
118
-     *        ... becomes ...
119
-     *    [ "There ", "...", " is", ".", " More", "!" ]
120
-     *
121
-     * @param string $line
122
-     * @return string[]
123
-     */
124
-    private function punctuationSplit($line)
125
-    {
126
-        $parts = [];
127
-
128
-        $chars = preg_split('//u', $line, -1, PREG_SPLIT_NO_EMPTY); // This is UTF8 multibyte safe!
129
-        $is_terminal = in_array($chars[0], $this->terminals);
130
-
131
-        $part = '';
132
-        foreach ($chars as $index => $char) {
133
-            if (in_array($char, $this->terminals) !== $is_terminal) {
134
-                $parts[] = $part;
135
-                $part = '';
136
-                $is_terminal = !$is_terminal;
137
-            }
138
-            $part .= $char;
139
-        }
140
-
141
-        if (!empty($part)) {
142
-            $parts[] = $part;
143
-        }
144
-
145
-        return $parts;
146
-    }
147
-
148
-    /**
149
-     * Appends each terminal item after it's preceding
150
-     * non-terminals.
151
-     *
152
-     * Multibyte.php safe (atleast for UTF-8)
153
-     *
154
-     * For example:
155
-     *    [ "There ", "...", " is", ".", " More", "!" ]
156
-     *        ... becomes ...
157
-     *    [ "There ... is.", "More!" ]
158
-     *
159
-     * @param string[] $punctuations
160
-     * @return string[]
161
-     */
162
-    private function punctuationMerge($punctuations)
163
-    {
164
-        $definite_terminals = array_diff($this->terminals, $this->abbreviators);
165
-
166
-        $merges = [];
167
-        $merge = '';
168
-
169
-        $filtered = array_filter($punctuations, function ($p) {
170
-            return $p !== '';
171
-        });
172
-
173
-        foreach ($filtered as $punctuation) {
174
-            $merge .= $punctuation;
175
-            if (mb_strlen($punctuation) === 1
176
-                && in_array($punctuation, $this->terminals)) {
177
-                $merges[] = $merge;
178
-                $merge = '';
179
-            } else {
180
-                foreach ($definite_terminals as $terminal) {
181
-                    if (mb_strpos($punctuation, $terminal) !== false) {
182
-                        $merges[] = $merge;
183
-                        $merge = '';
184
-                        break;
185
-                    }
186
-                }
187
-            }
188
-        }
189
-        if (!empty($merge)) {
190
-            $merges[] = $merge;
191
-        }
192
-
193
-        return $merges;
194
-    }
195
-
196
-    /**
197
-     * Looks for capitalized abbreviations & includes them with the following fragment.
198
-     *
199
-     * For example:
200
-     *    [ "Last week, former director of the F.B.I. James B. Comey was fired. Mr. Comey was not available for comment." ]
201
-     *        ... becomes ...
202
-     *    [ "Last week, former director of the F.B.I. James B. Comey was fired." ]
203
-     *  [ "Mr. Comey was not available for comment." ]
204
-     *
205
-     * @param string[] $fragments
206
-     * @return string[]
207
-     */
208
-    private function abbreviationMerge($fragments)
209
-    {
210
-        $return_fragment = [];
211
-
212
-        $previous_fragment = '';
213
-        $previous_is_abbreviation = false;
214
-        $i = 0;
215
-        foreach ($fragments as $fragment) {
216
-            $is_abbreviation = self::isAbreviation($fragment);
217
-
218
-            // merge previous fragment with this
219
-            if ($previous_is_abbreviation) {
220
-                $fragment = $previous_fragment . $fragment;
221
-            }
222
-            $return_fragment[$i] = $fragment;
223
-
224
-            $previous_is_abbreviation = $is_abbreviation;
225
-            $previous_fragment = $fragment;
226
-
227
-            // only increment if this isn't an abbreviation
228
-            if (!$is_abbreviation) {
229
-                $i++;
230
-            }
231
-        }
232
-        return $return_fragment;
233
-    }
234
-
235
-    /**
236
-     * Check if the last word of fragment starts with a Capital, ends in "." & has less than 3 characters.
237
-     *
238
-     * @param $fragment
239
-     * @return bool
240
-     */
241
-    private static function isAbreviation($fragment)
242
-    {
243
-        $words = mb_split('\s+', Multibyte::trim($fragment));
244
-
245
-        $word_count = count($words);
246
-
247
-        $last_word = Multibyte::trim($words[$word_count - 1]);
248
-        $last_is_capital = preg_match('#^\p{Lu}#u', $last_word);
249
-        $last_is_abbreviation = mb_substr(Multibyte::trim($fragment), -1) === '.';
250
-
251
-        return $last_is_capital > 0
252
-            && $last_is_abbreviation > 0
253
-            && mb_strlen($last_word) <= 3;
254
-    }
255
-
256
-    /**
257
-     * Merges any part starting with a closing parenthesis ')' to the previous
258
-     * part.
259
-     *
260
-     * @param string[] $parts
261
-     * @return string[]
262
-     */
263
-    private function parenthesesMerge($parts)
264
-    {
265
-        $subsentences = [];
266
-
267
-        foreach ($parts as $part) {
268
-            if ($part[0] === ')') {
269
-                $subsentences[count($subsentences) - 1] .= $part;
270
-            } else {
271
-                $subsentences[] = $part;
272
-            }
273
-        }
274
-
275
-        return $subsentences;
276
-    }
277
-
278
-    /**
279
-     * Looks for closing quotes to include them with the previous statement.
280
-     * "That was very interesting," he said.
281
-     * "That was very interesting."
282
-     *
283
-     * @param string[] $statements
284
-     * @return string[]
285
-     */
286
-    private function closeQuotesMerge($statements)
287
-    {
288
-        $i = 0;
289
-        $previous_statement = '';
290
-        $return = [];
291
-        foreach ($statements as $statement) {
292
-            if (self::isEndQuote($statement)) {
293
-                $statement = $previous_statement . $statement;
294
-            } else {
295
-                $i++;
296
-            }
297
-
298
-            $return[$i] = $statement;
299
-            $previous_statement = $statement;
300
-        }
301
-
302
-        return $return;
303
-    }
304
-
305
-    /**
306
-     * Check if the entire string is a quotation mark or quote, then space, then lowercase.
307
-     *
308
-     * @param $statement
309
-     * @return bool
310
-     */
311
-    private static function isEndQuote($statement)
312
-    {
313
-        $trimmed = Multibyte::trim($statement);
314
-        $first = mb_substr($statement, 0, 1);
315
-
316
-        return in_array($trimmed, ['"', '\''])
317
-            || (
318
-                in_array($first, ['"', '\''])
319
-                && mb_substr($statement, 1, 1) === ' '
320
-                && ctype_lower(mb_substr($statement, 2, 1)) === true
321
-            );
322
-    }
323
-
324
-    /**
325
-     * Merges items into larger sentences.
326
-     * Multibyte.php safe
327
-     *
328
-     * @param string[] $shorts
329
-     * @return string[]
330
-     */
331
-    private function sentenceMerge($shorts)
332
-    {
333
-        $non_abbreviating_terminals = array_diff($this->terminals, $this->abbreviators);
334
-
335
-        $sentences = [];
336
-
337
-        $sentence = '';
338
-        $has_words = false;
339
-        $previous_word_ending = null;
340
-        foreach ($shorts as $short) {
341
-            $word_count = count(mb_split('\s+', Multibyte::trim($short)));
342
-            $after_non_abbreviating_terminal = in_array($previous_word_ending, $non_abbreviating_terminals);
343
-
344
-            if ($after_non_abbreviating_terminal
345
-                || ($has_words && $word_count > 1)) {
346
-
347
-                $sentences[] = $sentence;
348
-
349
-                $sentence = '';
350
-                $has_words = false;
351
-            }
352
-
353
-            $has_words = $has_words
354
-                || $word_count > 1;
355
-
356
-            $sentence .= $short;
357
-            $previous_word_ending = mb_substr($short, -1);
358
-        }
359
-
360
-        if (!empty($sentence)) {
361
-            $sentences[] = $sentence;
362
-        }
363
-
364
-        return $sentences;
365
-    }
366
-
367
-    /**
368
-     * Return the sentences sentences detected in the provided text.
369
-     * Set the Sentence::SPLIT_TRIM flag to trim whitespace.
370
-     * @param string $text
371
-     * @param integer $flags
372
-     * @return string[]
373
-     */
374
-    public function split($text, $flags = 0)
375
-    {
376
-        static $pipeline = [
377
-            'floatNumberClean',
378
-            'punctuationSplit',
379
-            'parenthesesMerge', // also works after punctuationMerge or abbreviationMerge
380
-            'punctuationMerge',
381
-            'abbreviationMerge',
382
-            'closeQuotesMerge',
383
-            'sentenceMerge',
384
-            'floatNumberRevert'
385
-        ];
386
-
387
-        // clean funny quotes
388
-        $text = Multibyte::cleanUnicode($text);
389
-
390
-        // Split
391
-        $sentences = [];
392
-        foreach (self::linebreakSplit($text) as $input) {
393
-            if (Multibyte::trim($input) !== '') {
394
-                foreach ($pipeline as $method) {
395
-                    $input = $this->$method($input);
396
-                }
397
-                $sentences = array_merge($sentences, $input);
398
-            }
399
-        }
400
-
401
-        // Post process
402
-        if ($flags & self::SPLIT_TRIM) {
403
-            return self::trimSentences($sentences);
404
-        }
405
-
406
-        return $sentences;
407
-    }
408
-
409
-    /**
410
-     * Multibyte.php trim each string in an array.
411
-     * @param string[] $sentences
412
-     * @return string[]
413
-     */
414
-    private static function trimSentences($sentences)
415
-    {
416
-        return array_map(function ($sentence) {
417
-            return Multibyte::trim($sentence);
418
-        }, $sentences);
419
-    }
420
-
421
-    /**
422
-     * Return the number of sentences detected in the provided text.
423
-     * @param string $text
424
-     * @return integer
425
-     */
426
-    public function count($text)
427
-    {
428
-        return count($this->split($text));
429
-    }
76
+		return array_map(function($value) {
77
+			foreach ($this->floatNumbers as $number => $hash) {
78
+				$value = str_replace($hash, $number, $value);
79
+			}
80
+			return $value;
81
+		}, $text);
82
+	}
83
+
84
+	/**
85
+	 * Breaks a piece of text into lines by linebreak.
86
+	 * Eats up any linebreak characters as if one.
87
+	 *
88
+	 * Multibyte.php safe
89
+	 *
90
+	 * @param string $text
91
+	 * @return string[]
92
+	 */
93
+	private static function linebreakSplit($text)
94
+	{
95
+		$lines = [];
96
+		$line = '';
97
+
98
+		foreach (Multibyte::split('([\r\n]+)', $text, -1, PREG_SPLIT_DELIM_CAPTURE) as $part) {
99
+			$line .= $part;
100
+			if (Multibyte::trim($part) === '') {
101
+				$lines[] = $line;
102
+				$line = '';
103
+			}
104
+		}
105
+		$lines[] = $line;
106
+
107
+		return $lines;
108
+	}
109
+
110
+	/**
111
+	 * Splits an array of lines by (consecutive sequences of)
112
+	 * terminals, keeping terminals.
113
+	 *
114
+	 * Multibyte.php safe (atleast for UTF-8)
115
+	 *
116
+	 * For example:
117
+	 *    "There ... is. More!"
118
+	 *        ... becomes ...
119
+	 *    [ "There ", "...", " is", ".", " More", "!" ]
120
+	 *
121
+	 * @param string $line
122
+	 * @return string[]
123
+	 */
124
+	private function punctuationSplit($line)
125
+	{
126
+		$parts = [];
127
+
128
+		$chars = preg_split('//u', $line, -1, PREG_SPLIT_NO_EMPTY); // This is UTF8 multibyte safe!
129
+		$is_terminal = in_array($chars[0], $this->terminals);
130
+
131
+		$part = '';
132
+		foreach ($chars as $index => $char) {
133
+			if (in_array($char, $this->terminals) !== $is_terminal) {
134
+				$parts[] = $part;
135
+				$part = '';
136
+				$is_terminal = !$is_terminal;
137
+			}
138
+			$part .= $char;
139
+		}
140
+
141
+		if (!empty($part)) {
142
+			$parts[] = $part;
143
+		}
144
+
145
+		return $parts;
146
+	}
147
+
148
+	/**
149
+	 * Appends each terminal item after it's preceding
150
+	 * non-terminals.
151
+	 *
152
+	 * Multibyte.php safe (atleast for UTF-8)
153
+	 *
154
+	 * For example:
155
+	 *    [ "There ", "...", " is", ".", " More", "!" ]
156
+	 *        ... becomes ...
157
+	 *    [ "There ... is.", "More!" ]
158
+	 *
159
+	 * @param string[] $punctuations
160
+	 * @return string[]
161
+	 */
162
+	private function punctuationMerge($punctuations)
163
+	{
164
+		$definite_terminals = array_diff($this->terminals, $this->abbreviators);
165
+
166
+		$merges = [];
167
+		$merge = '';
168
+
169
+		$filtered = array_filter($punctuations, function ($p) {
170
+			return $p !== '';
171
+		});
172
+
173
+		foreach ($filtered as $punctuation) {
174
+			$merge .= $punctuation;
175
+			if (mb_strlen($punctuation) === 1
176
+				&& in_array($punctuation, $this->terminals)) {
177
+				$merges[] = $merge;
178
+				$merge = '';
179
+			} else {
180
+				foreach ($definite_terminals as $terminal) {
181
+					if (mb_strpos($punctuation, $terminal) !== false) {
182
+						$merges[] = $merge;
183
+						$merge = '';
184
+						break;
185
+					}
186
+				}
187
+			}
188
+		}
189
+		if (!empty($merge)) {
190
+			$merges[] = $merge;
191
+		}
192
+
193
+		return $merges;
194
+	}
195
+
196
+	/**
197
+	 * Looks for capitalized abbreviations & includes them with the following fragment.
198
+	 *
199
+	 * For example:
200
+	 *    [ "Last week, former director of the F.B.I. James B. Comey was fired. Mr. Comey was not available for comment." ]
201
+	 *        ... becomes ...
202
+	 *    [ "Last week, former director of the F.B.I. James B. Comey was fired." ]
203
+	 *  [ "Mr. Comey was not available for comment." ]
204
+	 *
205
+	 * @param string[] $fragments
206
+	 * @return string[]
207
+	 */
208
+	private function abbreviationMerge($fragments)
209
+	{
210
+		$return_fragment = [];
211
+
212
+		$previous_fragment = '';
213
+		$previous_is_abbreviation = false;
214
+		$i = 0;
215
+		foreach ($fragments as $fragment) {
216
+			$is_abbreviation = self::isAbreviation($fragment);
217
+
218
+			// merge previous fragment with this
219
+			if ($previous_is_abbreviation) {
220
+				$fragment = $previous_fragment . $fragment;
221
+			}
222
+			$return_fragment[$i] = $fragment;
223
+
224
+			$previous_is_abbreviation = $is_abbreviation;
225
+			$previous_fragment = $fragment;
226
+
227
+			// only increment if this isn't an abbreviation
228
+			if (!$is_abbreviation) {
229
+				$i++;
230
+			}
231
+		}
232
+		return $return_fragment;
233
+	}
234
+
235
+	/**
236
+	 * Check if the last word of fragment starts with a Capital, ends in "." & has less than 3 characters.
237
+	 *
238
+	 * @param $fragment
239
+	 * @return bool
240
+	 */
241
+	private static function isAbreviation($fragment)
242
+	{
243
+		$words = mb_split('\s+', Multibyte::trim($fragment));
244
+
245
+		$word_count = count($words);
246
+
247
+		$last_word = Multibyte::trim($words[$word_count - 1]);
248
+		$last_is_capital = preg_match('#^\p{Lu}#u', $last_word);
249
+		$last_is_abbreviation = mb_substr(Multibyte::trim($fragment), -1) === '.';
250
+
251
+		return $last_is_capital > 0
252
+			&& $last_is_abbreviation > 0
253
+			&& mb_strlen($last_word) <= 3;
254
+	}
255
+
256
+	/**
257
+	 * Merges any part starting with a closing parenthesis ')' to the previous
258
+	 * part.
259
+	 *
260
+	 * @param string[] $parts
261
+	 * @return string[]
262
+	 */
263
+	private function parenthesesMerge($parts)
264
+	{
265
+		$subsentences = [];
266
+
267
+		foreach ($parts as $part) {
268
+			if ($part[0] === ')') {
269
+				$subsentences[count($subsentences) - 1] .= $part;
270
+			} else {
271
+				$subsentences[] = $part;
272
+			}
273
+		}
274
+
275
+		return $subsentences;
276
+	}
277
+
278
+	/**
279
+	 * Looks for closing quotes to include them with the previous statement.
280
+	 * "That was very interesting," he said.
281
+	 * "That was very interesting."
282
+	 *
283
+	 * @param string[] $statements
284
+	 * @return string[]
285
+	 */
286
+	private function closeQuotesMerge($statements)
287
+	{
288
+		$i = 0;
289
+		$previous_statement = '';
290
+		$return = [];
291
+		foreach ($statements as $statement) {
292
+			if (self::isEndQuote($statement)) {
293
+				$statement = $previous_statement . $statement;
294
+			} else {
295
+				$i++;
296
+			}
297
+
298
+			$return[$i] = $statement;
299
+			$previous_statement = $statement;
300
+		}
301
+
302
+		return $return;
303
+	}
304
+
305
+	/**
306
+	 * Check if the entire string is a quotation mark or quote, then space, then lowercase.
307
+	 *
308
+	 * @param $statement
309
+	 * @return bool
310
+	 */
311
+	private static function isEndQuote($statement)
312
+	{
313
+		$trimmed = Multibyte::trim($statement);
314
+		$first = mb_substr($statement, 0, 1);
315
+
316
+		return in_array($trimmed, ['"', '\''])
317
+			|| (
318
+				in_array($first, ['"', '\''])
319
+				&& mb_substr($statement, 1, 1) === ' '
320
+				&& ctype_lower(mb_substr($statement, 2, 1)) === true
321
+			);
322
+	}
323
+
324
+	/**
325
+	 * Merges items into larger sentences.
326
+	 * Multibyte.php safe
327
+	 *
328
+	 * @param string[] $shorts
329
+	 * @return string[]
330
+	 */
331
+	private function sentenceMerge($shorts)
332
+	{
333
+		$non_abbreviating_terminals = array_diff($this->terminals, $this->abbreviators);
334
+
335
+		$sentences = [];
336
+
337
+		$sentence = '';
338
+		$has_words = false;
339
+		$previous_word_ending = null;
340
+		foreach ($shorts as $short) {
341
+			$word_count = count(mb_split('\s+', Multibyte::trim($short)));
342
+			$after_non_abbreviating_terminal = in_array($previous_word_ending, $non_abbreviating_terminals);
343
+
344
+			if ($after_non_abbreviating_terminal
345
+				|| ($has_words && $word_count > 1)) {
346
+
347
+				$sentences[] = $sentence;
348
+
349
+				$sentence = '';
350
+				$has_words = false;
351
+			}
352
+
353
+			$has_words = $has_words
354
+				|| $word_count > 1;
355
+
356
+			$sentence .= $short;
357
+			$previous_word_ending = mb_substr($short, -1);
358
+		}
359
+
360
+		if (!empty($sentence)) {
361
+			$sentences[] = $sentence;
362
+		}
363
+
364
+		return $sentences;
365
+	}
366
+
367
+	/**
368
+	 * Return the sentences sentences detected in the provided text.
369
+	 * Set the Sentence::SPLIT_TRIM flag to trim whitespace.
370
+	 * @param string $text
371
+	 * @param integer $flags
372
+	 * @return string[]
373
+	 */
374
+	public function split($text, $flags = 0)
375
+	{
376
+		static $pipeline = [
377
+			'floatNumberClean',
378
+			'punctuationSplit',
379
+			'parenthesesMerge', // also works after punctuationMerge or abbreviationMerge
380
+			'punctuationMerge',
381
+			'abbreviationMerge',
382
+			'closeQuotesMerge',
383
+			'sentenceMerge',
384
+			'floatNumberRevert'
385
+		];
386
+
387
+		// clean funny quotes
388
+		$text = Multibyte::cleanUnicode($text);
389
+
390
+		// Split
391
+		$sentences = [];
392
+		foreach (self::linebreakSplit($text) as $input) {
393
+			if (Multibyte::trim($input) !== '') {
394
+				foreach ($pipeline as $method) {
395
+					$input = $this->$method($input);
396
+				}
397
+				$sentences = array_merge($sentences, $input);
398
+			}
399
+		}
400
+
401
+		// Post process
402
+		if ($flags & self::SPLIT_TRIM) {
403
+			return self::trimSentences($sentences);
404
+		}
405
+
406
+		return $sentences;
407
+	}
408
+
409
+	/**
410
+	 * Multibyte.php trim each string in an array.
411
+	 * @param string[] $sentences
412
+	 * @return string[]
413
+	 */
414
+	private static function trimSentences($sentences)
415
+	{
416
+		return array_map(function ($sentence) {
417
+			return Multibyte::trim($sentence);
418
+		}, $sentences);
419
+	}
420
+
421
+	/**
422
+	 * Return the number of sentences detected in the provided text.
423
+	 * @param string $text
424
+	 * @return integer
425
+	 */
426
+	public function count($text)
427
+	{
428
+		return count($this->split($text));
429
+	}
430 430
 
431 431
 }
Please login to merge, or discard this patch.