Passed
Push — master ( 8e4ab6...831065 )
by Martijn
03:06
created
src/Sentence.php 1 patch
Indentation   +413 added lines, -413 removed lines patch added patch discarded remove patch
@@ -17,418 +17,418 @@
 block discarded – undo
17 17
 class Sentence
18 18
 {
19 19
 
20
-    /**
21
-     * Specify this flag with the split method to trim whitespace.
22
-     */
23
-    const SPLIT_TRIM = 0x1;
24
-
25
-    /**
26
-     * List of characters used to terminate sentences.
27
-     *
28
-     * @var string[]
29
-     */
30
-    private $terminals = ['.', '!', '?'];
31
-
32
-    /**
33
-     * List of characters used for abbreviations.
34
-     *
35
-     * @var string[]
36
-     */
37
-    private $abbreviators = ['.'];
38
-
39
-    /**
40
-     * List of float numbers in the text
41
-     *
42
-     * @var string[]
43
-     */
44
-    private $floatNumbers = [];
45
-
46
-    /**
47
-     * Clean floating point numbers by replace them with their md5 hash
48
-     *
49
-     * @param string $text
50
-     *
51
-     * @return string
52
-     */
53
-    private function floatNumberClean(string $text)
54
-    {
55
-        preg_match_all('!\d+(?:\.\d+)?!', $text, $matches);
56
-
57
-        foreach ($matches[0] as $floatNumber) {
58
-            if (isset($this->floatNumbers[$floatNumber])) {
59
-                continue;
60
-            }
61
-
62
-            $hash = md5($floatNumber);
63
-            $this->floatNumbers[$floatNumber] = $hash;
64
-            $text = str_replace($floatNumber, $hash, $text);
65
-        }
66
-
67
-        return $text;
68
-    }
69
-
70
-    /**
71
-     * Revert the hashed floating number back
72
-     *
73
-     * @param string[] $text
74
-     *
75
-     * @return string[]
76
-     */
77
-    private function floatNumberRevert($text)
78
-    {
79
-        return array_map(function($value) {
80
-            foreach ($this->floatNumbers as $number => $hash) {
81
-                $value = str_replace($hash, $number, $value);
82
-            }
83
-            return $value;
84
-        }, $text);
85
-    }
86
-
87
-    /**
88
-     * Breaks a piece of text into lines by linebreak.
89
-     * Eats up any linebreak characters as if one.
90
-     *
91
-     * Multibyte.php safe
92
-     *
93
-     * @param string $text
94
-     * @return string[]
95
-     */
96
-    private static function linebreakSplit($text)
97
-    {
98
-        $lines = [];
99
-        $line = '';
100
-
101
-        foreach (Multibyte::split('([\r\n]+)', $text, -1, PREG_SPLIT_DELIM_CAPTURE) as $part) {
102
-            $line .= $part;
103
-            if (Multibyte::trim($part) === '') {
104
-                $lines[] = $line;
105
-                $line = '';
106
-            }
107
-        }
108
-        $lines[] = $line;
109
-
110
-        return $lines;
111
-    }
112
-
113
-    /**
114
-     * Splits an array of lines by (consecutive sequences of)
115
-     * terminals, keeping terminals.
116
-     *
117
-     * Multibyte.php safe (atleast for UTF-8)
118
-     *
119
-     * For example:
120
-     *    "There ... is. More!"
121
-     *        ... becomes ...
122
-     *    [ "There ", "...", " is", ".", " More", "!" ]
123
-     *
124
-     * @param string $line
125
-     * @return string[]
126
-     */
127
-    private function punctuationSplit($line)
128
-    {
129
-        $parts = [];
130
-
131
-        $chars = preg_split('//u', $line, -1, PREG_SPLIT_NO_EMPTY); // This is UTF8 multibyte safe!
132
-        $is_terminal = in_array($chars[0], $this->terminals);
133
-
134
-        $part = '';
135
-        foreach ($chars as $index => $char) {
136
-            if (in_array($char, $this->terminals) !== $is_terminal) {
137
-                $parts[] = $part;
138
-                $part = '';
139
-                $is_terminal = !$is_terminal;
140
-            }
141
-            $part .= $char;
142
-        }
143
-
144
-        if (!empty($part)) {
145
-            $parts[] = $part;
146
-        }
147
-
148
-        return $parts;
149
-    }
150
-
151
-    /**
152
-     * Appends each terminal item after it's preceding
153
-     * non-terminals.
154
-     *
155
-     * Multibyte.php safe (atleast for UTF-8)
156
-     *
157
-     * For example:
158
-     *    [ "There ", "...", " is", ".", " More", "!" ]
159
-     *        ... becomes ...
160
-     *    [ "There ... is.", "More!" ]
161
-     *
162
-     * @param string[] $punctuations
163
-     * @return string[]
164
-     */
165
-    private function punctuationMerge($punctuations)
166
-    {
167
-        $definite_terminals = array_diff($this->terminals, $this->abbreviators);
168
-
169
-        $merges = [];
170
-        $merge = '';
171
-
172
-        $filtered = array_filter($punctuations, function ($p) {
173
-            return $p !== '';
174
-        });
175
-
176
-        foreach ($filtered as $punctuation) {
177
-            $merge .= $punctuation;
178
-            if (mb_strlen($punctuation) === 1
179
-                && in_array($punctuation, $this->terminals)) {
180
-                $merges[] = $merge;
181
-                $merge = '';
182
-            } else {
183
-                foreach ($definite_terminals as $terminal) {
184
-                    if (mb_strpos($punctuation, $terminal) !== false) {
185
-                        $merges[] = $merge;
186
-                        $merge = '';
187
-                        break;
188
-                    }
189
-                }
190
-            }
191
-        }
192
-        if (!empty($merge)) {
193
-            $merges[] = $merge;
194
-        }
195
-
196
-        return $merges;
197
-    }
198
-
199
-    /**
200
-     * Looks for capitalized abbreviations & includes them with the following fragment.
201
-     *
202
-     * For example:
203
-     *    [ "Last week, former director of the F.B.I. James B. Comey was fired. Mr. Comey was not available for comment." ]
204
-     *        ... becomes ...
205
-     *    [ "Last week, former director of the F.B.I. James B. Comey was fired." ]
206
-     *  [ "Mr. Comey was not available for comment." ]
207
-     *
208
-     * @param string[] $fragments
209
-     * @return string[]
210
-     */
211
-    private function abbreviationMerge($fragments)
212
-    {
213
-        $return_fragment = [];
214
-
215
-        $previous_fragment = '';
216
-        $previous_is_abbreviation = false;
217
-        $i = 0;
218
-        foreach ($fragments as $fragment) {
219
-            $is_abbreviation = self::isAbreviation($fragment);
220
-
221
-            // merge previous fragment with this
222
-            if ($previous_is_abbreviation) {
223
-                $fragment = $previous_fragment . $fragment;
224
-            }
225
-            $return_fragment[$i] = $fragment;
226
-
227
-            $previous_is_abbreviation = $is_abbreviation;
228
-            $previous_fragment = $fragment;
229
-
230
-            // only increment if this isn't an abbreviation
231
-            if (!$is_abbreviation) {
232
-                $i++;
233
-            }
234
-        }
235
-        return $return_fragment;
236
-    }
237
-
238
-    /**
239
-     * Check if the last word of fragment starts with a Capital, ends in "." & has less than 3 characters.
240
-     *
241
-     * @param $fragment
242
-     * @return bool
243
-     */
244
-    private static function isAbreviation($fragment)
245
-    {
246
-        $words = mb_split('\s+', Multibyte::trim($fragment));
247
-
248
-        $word_count = count($words);
249
-
250
-        $last_word = Multibyte::trim($words[$word_count - 1]);
251
-        $last_is_capital = preg_match('#^\p{Lu}#u', $last_word);
252
-        $last_is_abbreviation = mb_substr(Multibyte::trim($fragment), -1) === '.';
253
-
254
-        return $last_is_capital > 0
255
-            && $last_is_abbreviation > 0
256
-            && mb_strlen($last_word) <= 3;
257
-    }
258
-
259
-    /**
260
-     * Merges any part starting with a closing parenthesis ')' to the previous
261
-     * part.
262
-     *
263
-     * @param string[] $parts
264
-     * @return string[]
265
-     */
266
-    private function parenthesesMerge($parts)
267
-    {
268
-        $subsentences = [];
269
-
270
-        foreach ($parts as $part) {
271
-            if ($part[0] === ')') {
272
-                $subsentences[count($subsentences) - 1] .= $part;
273
-            } else {
274
-                $subsentences[] = $part;
275
-            }
276
-        }
277
-
278
-        return $subsentences;
279
-    }
280
-
281
-    /**
282
-     * Looks for closing quotes to include them with the previous statement.
283
-     * "That was very interesting," he said.
284
-     * "That was very interesting."
285
-     *
286
-     * @param string[] $statements
287
-     * @return string[]
288
-     */
289
-    private function closeQuotesMerge($statements)
290
-    {
291
-        $i = 0;
292
-        $previous_statement = '';
293
-        $return = [];
294
-        foreach ($statements as $statement) {
295
-            if (self::isEndQuote($statement)) {
296
-                $statement = $previous_statement . $statement;
297
-            } else {
298
-                $i++;
299
-            }
300
-
301
-            $return[$i] = $statement;
302
-            $previous_statement = $statement;
303
-        }
304
-
305
-        return $return;
306
-    }
307
-
308
-    /**
309
-     * Check if the entire string is a quotation mark or quote, then space, then lowercase.
310
-     *
311
-     * @param $statement
312
-     * @return bool
313
-     */
314
-    private static function isEndQuote($statement)
315
-    {
316
-        $trimmed = Multibyte::trim($statement);
317
-        $first = mb_substr($statement, 0, 1);
318
-
319
-        return in_array($trimmed, ['"', '\''])
320
-            || (
321
-                in_array($first, ['"', '\''])
322
-                && mb_substr($statement, 1, 1) === ' '
323
-                && ctype_lower(mb_substr($statement, 2, 1)) === true
324
-            );
325
-    }
326
-
327
-    /**
328
-     * Merges items into larger sentences.
329
-     * Multibyte.php safe
330
-     *
331
-     * @param string[] $shorts
332
-     * @return string[]
333
-     */
334
-    private function sentenceMerge($shorts)
335
-    {
336
-        $non_abbreviating_terminals = array_diff($this->terminals, $this->abbreviators);
337
-
338
-        $sentences = [];
339
-
340
-        $sentence = '';
341
-        $has_words = false;
342
-        $previous_word_ending = null;
343
-        foreach ($shorts as $short) {
344
-            $word_count = count(mb_split('\s+', Multibyte::trim($short)));
345
-            $after_non_abbreviating_terminal = in_array($previous_word_ending, $non_abbreviating_terminals);
346
-
347
-            if ($after_non_abbreviating_terminal
348
-                || ($has_words && $word_count > 1)) {
349
-
350
-                $sentences[] = $sentence;
351
-
352
-                $sentence = '';
353
-                $has_words = false;
354
-            }
355
-
356
-            $has_words = $has_words
357
-                || $word_count > 1;
358
-
359
-            $sentence .= $short;
360
-            $previous_word_ending = mb_substr($short, -1);
361
-        }
362
-
363
-        if (!empty($sentence)) {
364
-            $sentences[] = $sentence;
365
-        }
366
-
367
-        return $sentences;
368
-    }
369
-
370
-    /**
371
-     * Return the sentences sentences detected in the provided text.
372
-     * Set the Sentence::SPLIT_TRIM flag to trim whitespace.
373
-     * @param string $text
374
-     * @param integer $flags
375
-     * @return string[]
376
-     */
377
-    public function split($text, $flags = 0)
378
-    {
379
-        static $pipeline = [
380
-            'floatNumberClean',
381
-            'punctuationSplit',
382
-            'parenthesesMerge', // also works after punctuationMerge or abbreviationMerge
383
-            'punctuationMerge',
384
-            'abbreviationMerge',
385
-            'closeQuotesMerge',
386
-            'sentenceMerge',
387
-            'floatNumberRevert'
388
-        ];
389
-
390
-        // clean funny quotes
391
-        $text = Multibyte::cleanUnicode($text);
392
-
393
-        // Split
394
-        $sentences = [];
395
-        foreach (self::linebreakSplit($text) as $input) {
396
-            if (Multibyte::trim($input) !== '') {
397
-                foreach ($pipeline as $method) {
398
-                    $input = $this->$method($input);
399
-                }
400
-                $sentences = array_merge($sentences, $input);
401
-            }
402
-        }
403
-
404
-        // Post process
405
-        if ($flags & self::SPLIT_TRIM) {
406
-            return self::trimSentences($sentences);
407
-        }
408
-
409
-        return $sentences;
410
-    }
411
-
412
-    /**
413
-     * Multibyte.php trim each string in an array.
414
-     * @param string[] $sentences
415
-     * @return string[]
416
-     */
417
-    private static function trimSentences($sentences)
418
-    {
419
-        return array_map(function ($sentence) {
420
-            return Multibyte::trim($sentence);
421
-        }, $sentences);
422
-    }
423
-
424
-    /**
425
-     * Return the number of sentences detected in the provided text.
426
-     * @param string $text
427
-     * @return integer
428
-     */
429
-    public function count($text)
430
-    {
431
-        return count($this->split($text));
432
-    }
20
+	/**
21
+	 * Specify this flag with the split method to trim whitespace.
22
+	 */
23
+	const SPLIT_TRIM = 0x1;
24
+
25
+	/**
26
+	 * List of characters used to terminate sentences.
27
+	 *
28
+	 * @var string[]
29
+	 */
30
+	private $terminals = ['.', '!', '?'];
31
+
32
+	/**
33
+	 * List of characters used for abbreviations.
34
+	 *
35
+	 * @var string[]
36
+	 */
37
+	private $abbreviators = ['.'];
38
+
39
+	/**
40
+	 * List of float numbers in the text
41
+	 *
42
+	 * @var string[]
43
+	 */
44
+	private $floatNumbers = [];
45
+
46
+	/**
47
+	 * Clean floating point numbers by replace them with their md5 hash
48
+	 *
49
+	 * @param string $text
50
+	 *
51
+	 * @return string
52
+	 */
53
+	private function floatNumberClean(string $text)
54
+	{
55
+		preg_match_all('!\d+(?:\.\d+)?!', $text, $matches);
56
+
57
+		foreach ($matches[0] as $floatNumber) {
58
+			if (isset($this->floatNumbers[$floatNumber])) {
59
+				continue;
60
+			}
61
+
62
+			$hash = md5($floatNumber);
63
+			$this->floatNumbers[$floatNumber] = $hash;
64
+			$text = str_replace($floatNumber, $hash, $text);
65
+		}
66
+
67
+		return $text;
68
+	}
69
+
70
+	/**
71
+	 * Revert the hashed floating number back
72
+	 *
73
+	 * @param string[] $text
74
+	 *
75
+	 * @return string[]
76
+	 */
77
+	private function floatNumberRevert($text)
78
+	{
79
+		return array_map(function($value) {
80
+			foreach ($this->floatNumbers as $number => $hash) {
81
+				$value = str_replace($hash, $number, $value);
82
+			}
83
+			return $value;
84
+		}, $text);
85
+	}
86
+
87
+	/**
88
+	 * Breaks a piece of text into lines by linebreak.
89
+	 * Eats up any linebreak characters as if one.
90
+	 *
91
+	 * Multibyte.php safe
92
+	 *
93
+	 * @param string $text
94
+	 * @return string[]
95
+	 */
96
+	private static function linebreakSplit($text)
97
+	{
98
+		$lines = [];
99
+		$line = '';
100
+
101
+		foreach (Multibyte::split('([\r\n]+)', $text, -1, PREG_SPLIT_DELIM_CAPTURE) as $part) {
102
+			$line .= $part;
103
+			if (Multibyte::trim($part) === '') {
104
+				$lines[] = $line;
105
+				$line = '';
106
+			}
107
+		}
108
+		$lines[] = $line;
109
+
110
+		return $lines;
111
+	}
112
+
113
+	/**
114
+	 * Splits an array of lines by (consecutive sequences of)
115
+	 * terminals, keeping terminals.
116
+	 *
117
+	 * Multibyte.php safe (atleast for UTF-8)
118
+	 *
119
+	 * For example:
120
+	 *    "There ... is. More!"
121
+	 *        ... becomes ...
122
+	 *    [ "There ", "...", " is", ".", " More", "!" ]
123
+	 *
124
+	 * @param string $line
125
+	 * @return string[]
126
+	 */
127
+	private function punctuationSplit($line)
128
+	{
129
+		$parts = [];
130
+
131
+		$chars = preg_split('//u', $line, -1, PREG_SPLIT_NO_EMPTY); // This is UTF8 multibyte safe!
132
+		$is_terminal = in_array($chars[0], $this->terminals);
133
+
134
+		$part = '';
135
+		foreach ($chars as $index => $char) {
136
+			if (in_array($char, $this->terminals) !== $is_terminal) {
137
+				$parts[] = $part;
138
+				$part = '';
139
+				$is_terminal = !$is_terminal;
140
+			}
141
+			$part .= $char;
142
+		}
143
+
144
+		if (!empty($part)) {
145
+			$parts[] = $part;
146
+		}
147
+
148
+		return $parts;
149
+	}
150
+
151
+	/**
152
+	 * Appends each terminal item after it's preceding
153
+	 * non-terminals.
154
+	 *
155
+	 * Multibyte.php safe (atleast for UTF-8)
156
+	 *
157
+	 * For example:
158
+	 *    [ "There ", "...", " is", ".", " More", "!" ]
159
+	 *        ... becomes ...
160
+	 *    [ "There ... is.", "More!" ]
161
+	 *
162
+	 * @param string[] $punctuations
163
+	 * @return string[]
164
+	 */
165
+	private function punctuationMerge($punctuations)
166
+	{
167
+		$definite_terminals = array_diff($this->terminals, $this->abbreviators);
168
+
169
+		$merges = [];
170
+		$merge = '';
171
+
172
+		$filtered = array_filter($punctuations, function ($p) {
173
+			return $p !== '';
174
+		});
175
+
176
+		foreach ($filtered as $punctuation) {
177
+			$merge .= $punctuation;
178
+			if (mb_strlen($punctuation) === 1
179
+				&& in_array($punctuation, $this->terminals)) {
180
+				$merges[] = $merge;
181
+				$merge = '';
182
+			} else {
183
+				foreach ($definite_terminals as $terminal) {
184
+					if (mb_strpos($punctuation, $terminal) !== false) {
185
+						$merges[] = $merge;
186
+						$merge = '';
187
+						break;
188
+					}
189
+				}
190
+			}
191
+		}
192
+		if (!empty($merge)) {
193
+			$merges[] = $merge;
194
+		}
195
+
196
+		return $merges;
197
+	}
198
+
199
+	/**
200
+	 * Looks for capitalized abbreviations & includes them with the following fragment.
201
+	 *
202
+	 * For example:
203
+	 *    [ "Last week, former director of the F.B.I. James B. Comey was fired. Mr. Comey was not available for comment." ]
204
+	 *        ... becomes ...
205
+	 *    [ "Last week, former director of the F.B.I. James B. Comey was fired." ]
206
+	 *  [ "Mr. Comey was not available for comment." ]
207
+	 *
208
+	 * @param string[] $fragments
209
+	 * @return string[]
210
+	 */
211
+	private function abbreviationMerge($fragments)
212
+	{
213
+		$return_fragment = [];
214
+
215
+		$previous_fragment = '';
216
+		$previous_is_abbreviation = false;
217
+		$i = 0;
218
+		foreach ($fragments as $fragment) {
219
+			$is_abbreviation = self::isAbreviation($fragment);
220
+
221
+			// merge previous fragment with this
222
+			if ($previous_is_abbreviation) {
223
+				$fragment = $previous_fragment . $fragment;
224
+			}
225
+			$return_fragment[$i] = $fragment;
226
+
227
+			$previous_is_abbreviation = $is_abbreviation;
228
+			$previous_fragment = $fragment;
229
+
230
+			// only increment if this isn't an abbreviation
231
+			if (!$is_abbreviation) {
232
+				$i++;
233
+			}
234
+		}
235
+		return $return_fragment;
236
+	}
237
+
238
+	/**
239
+	 * Check if the last word of fragment starts with a Capital, ends in "." & has less than 3 characters.
240
+	 *
241
+	 * @param $fragment
242
+	 * @return bool
243
+	 */
244
+	private static function isAbreviation($fragment)
245
+	{
246
+		$words = mb_split('\s+', Multibyte::trim($fragment));
247
+
248
+		$word_count = count($words);
249
+
250
+		$last_word = Multibyte::trim($words[$word_count - 1]);
251
+		$last_is_capital = preg_match('#^\p{Lu}#u', $last_word);
252
+		$last_is_abbreviation = mb_substr(Multibyte::trim($fragment), -1) === '.';
253
+
254
+		return $last_is_capital > 0
255
+			&& $last_is_abbreviation > 0
256
+			&& mb_strlen($last_word) <= 3;
257
+	}
258
+
259
+	/**
260
+	 * Merges any part starting with a closing parenthesis ')' to the previous
261
+	 * part.
262
+	 *
263
+	 * @param string[] $parts
264
+	 * @return string[]
265
+	 */
266
+	private function parenthesesMerge($parts)
267
+	{
268
+		$subsentences = [];
269
+
270
+		foreach ($parts as $part) {
271
+			if ($part[0] === ')') {
272
+				$subsentences[count($subsentences) - 1] .= $part;
273
+			} else {
274
+				$subsentences[] = $part;
275
+			}
276
+		}
277
+
278
+		return $subsentences;
279
+	}
280
+
281
+	/**
282
+	 * Looks for closing quotes to include them with the previous statement.
283
+	 * "That was very interesting," he said.
284
+	 * "That was very interesting."
285
+	 *
286
+	 * @param string[] $statements
287
+	 * @return string[]
288
+	 */
289
+	private function closeQuotesMerge($statements)
290
+	{
291
+		$i = 0;
292
+		$previous_statement = '';
293
+		$return = [];
294
+		foreach ($statements as $statement) {
295
+			if (self::isEndQuote($statement)) {
296
+				$statement = $previous_statement . $statement;
297
+			} else {
298
+				$i++;
299
+			}
300
+
301
+			$return[$i] = $statement;
302
+			$previous_statement = $statement;
303
+		}
304
+
305
+		return $return;
306
+	}
307
+
308
+	/**
309
+	 * Check if the entire string is a quotation mark or quote, then space, then lowercase.
310
+	 *
311
+	 * @param $statement
312
+	 * @return bool
313
+	 */
314
+	private static function isEndQuote($statement)
315
+	{
316
+		$trimmed = Multibyte::trim($statement);
317
+		$first = mb_substr($statement, 0, 1);
318
+
319
+		return in_array($trimmed, ['"', '\''])
320
+			|| (
321
+				in_array($first, ['"', '\''])
322
+				&& mb_substr($statement, 1, 1) === ' '
323
+				&& ctype_lower(mb_substr($statement, 2, 1)) === true
324
+			);
325
+	}
326
+
327
+	/**
328
+	 * Merges items into larger sentences.
329
+	 * Multibyte.php safe
330
+	 *
331
+	 * @param string[] $shorts
332
+	 * @return string[]
333
+	 */
334
+	private function sentenceMerge($shorts)
335
+	{
336
+		$non_abbreviating_terminals = array_diff($this->terminals, $this->abbreviators);
337
+
338
+		$sentences = [];
339
+
340
+		$sentence = '';
341
+		$has_words = false;
342
+		$previous_word_ending = null;
343
+		foreach ($shorts as $short) {
344
+			$word_count = count(mb_split('\s+', Multibyte::trim($short)));
345
+			$after_non_abbreviating_terminal = in_array($previous_word_ending, $non_abbreviating_terminals);
346
+
347
+			if ($after_non_abbreviating_terminal
348
+				|| ($has_words && $word_count > 1)) {
349
+
350
+				$sentences[] = $sentence;
351
+
352
+				$sentence = '';
353
+				$has_words = false;
354
+			}
355
+
356
+			$has_words = $has_words
357
+				|| $word_count > 1;
358
+
359
+			$sentence .= $short;
360
+			$previous_word_ending = mb_substr($short, -1);
361
+		}
362
+
363
+		if (!empty($sentence)) {
364
+			$sentences[] = $sentence;
365
+		}
366
+
367
+		return $sentences;
368
+	}
369
+
370
+	/**
371
+	 * Return the sentences sentences detected in the provided text.
372
+	 * Set the Sentence::SPLIT_TRIM flag to trim whitespace.
373
+	 * @param string $text
374
+	 * @param integer $flags
375
+	 * @return string[]
376
+	 */
377
+	public function split($text, $flags = 0)
378
+	{
379
+		static $pipeline = [
380
+			'floatNumberClean',
381
+			'punctuationSplit',
382
+			'parenthesesMerge', // also works after punctuationMerge or abbreviationMerge
383
+			'punctuationMerge',
384
+			'abbreviationMerge',
385
+			'closeQuotesMerge',
386
+			'sentenceMerge',
387
+			'floatNumberRevert'
388
+		];
389
+
390
+		// clean funny quotes
391
+		$text = Multibyte::cleanUnicode($text);
392
+
393
+		// Split
394
+		$sentences = [];
395
+		foreach (self::linebreakSplit($text) as $input) {
396
+			if (Multibyte::trim($input) !== '') {
397
+				foreach ($pipeline as $method) {
398
+					$input = $this->$method($input);
399
+				}
400
+				$sentences = array_merge($sentences, $input);
401
+			}
402
+		}
403
+
404
+		// Post process
405
+		if ($flags & self::SPLIT_TRIM) {
406
+			return self::trimSentences($sentences);
407
+		}
408
+
409
+		return $sentences;
410
+	}
411
+
412
+	/**
413
+	 * Multibyte.php trim each string in an array.
414
+	 * @param string[] $sentences
415
+	 * @return string[]
416
+	 */
417
+	private static function trimSentences($sentences)
418
+	{
419
+		return array_map(function ($sentence) {
420
+			return Multibyte::trim($sentence);
421
+		}, $sentences);
422
+	}
423
+
424
+	/**
425
+	 * Return the number of sentences detected in the provided text.
426
+	 * @param string $text
427
+	 * @return integer
428
+	 */
429
+	public function count($text)
430
+	{
431
+		return count($this->split($text));
432
+	}
433 433
 
434 434
 }
Please login to merge, or discard this patch.