Passed
Push — master ( 831065...bd113f )
by Martijn
01:58
created
src/Sentence.php 1 patch
Indentation   +414 added lines, -414 removed lines patch added patch discarded remove patch
@@ -17,419 +17,419 @@
 block discarded – undo
17 17
 class Sentence
18 18
 {
19 19
 
20
-    /**
21
-     * Specify this flag with the split method to trim whitespace.
22
-     */
23
-    const SPLIT_TRIM = 0x1;
24
-
25
-    /**
26
-     * List of characters used to terminate sentences.
27
-     *
28
-     * @var string[]
29
-     */
30
-    private $terminals = ['.', '!', '?'];
31
-
32
-    /**
33
-     * List of characters used for abbreviations.
34
-     *
35
-     * @var string[]
36
-     */
37
-    private $abbreviators = ['.'];
38
-
39
-    /**
40
-     * List of float numbers in the text
41
-     *
42
-     * @var string[]
43
-     */
44
-    private $floatNumbers = [];
45
-
46
-    /**
47
-     * Clean floating point numbers by replace them with their md5 hash
48
-     *
49
-     * @param string $text
50
-     *
51
-     * @return string
52
-     */
53
-    private function floatNumberClean(string $text)
54
-    {
55
-        preg_match_all('!\d+(?:\.\d+)?!', $text, $matches);
56
-
57
-        $this->floatNumbers = [];
58
-        foreach ($matches[0] as $floatNumber) {
59
-            if (isset($this->floatNumbers[$floatNumber])) {
60
-                continue;
61
-            }
62
-
63
-            $hash = md5($floatNumber);
64
-            $this->floatNumbers[$floatNumber] = $hash;
65
-            $text = str_replace($floatNumber, $hash, $text);
66
-        }
67
-
68
-        return $text;
69
-    }
70
-
71
-    /**
72
-     * Revert the hashed floating number back
73
-     *
74
-     * @param string[] $text
75
-     *
76
-     * @return string[]
77
-     */
78
-    private function floatNumberRevert($text)
79
-    {
80
-        return array_map(function($value) {
81
-            foreach ($this->floatNumbers as $number => $hash) {
82
-                $value = str_replace($hash, $number, $value);
83
-            }
84
-            return $value;
85
-        }, $text);
86
-    }
87
-
88
-    /**
89
-     * Breaks a piece of text into lines by linebreak.
90
-     * Eats up any linebreak characters as if one.
91
-     *
92
-     * Multibyte.php safe
93
-     *
94
-     * @param string $text
95
-     * @return string[]
96
-     */
97
-    private static function linebreakSplit($text)
98
-    {
99
-        $lines = [];
100
-        $line = '';
101
-
102
-        foreach (Multibyte::split('([\r\n]+)', $text, -1, PREG_SPLIT_DELIM_CAPTURE) as $part) {
103
-            $line .= $part;
104
-            if (Multibyte::trim($part) === '') {
105
-                $lines[] = $line;
106
-                $line = '';
107
-            }
108
-        }
109
-        $lines[] = $line;
110
-
111
-        return $lines;
112
-    }
113
-
114
-    /**
115
-     * Splits an array of lines by (consecutive sequences of)
116
-     * terminals, keeping terminals.
117
-     *
118
-     * Multibyte.php safe (atleast for UTF-8)
119
-     *
120
-     * For example:
121
-     *    "There ... is. More!"
122
-     *        ... becomes ...
123
-     *    [ "There ", "...", " is", ".", " More", "!" ]
124
-     *
125
-     * @param string $line
126
-     * @return string[]
127
-     */
128
-    private function punctuationSplit($line)
129
-    {
130
-        $parts = [];
131
-
132
-        $chars = preg_split('//u', $line, -1, PREG_SPLIT_NO_EMPTY); // This is UTF8 multibyte safe!
133
-        $is_terminal = in_array($chars[0], $this->terminals);
134
-
135
-        $part = '';
136
-        foreach ($chars as $index => $char) {
137
-            if (in_array($char, $this->terminals) !== $is_terminal) {
138
-                $parts[] = $part;
139
-                $part = '';
140
-                $is_terminal = !$is_terminal;
141
-            }
142
-            $part .= $char;
143
-        }
144
-
145
-        if (!empty($part)) {
146
-            $parts[] = $part;
147
-        }
148
-
149
-        return $parts;
150
-    }
151
-
152
-    /**
153
-     * Appends each terminal item after it's preceding
154
-     * non-terminals.
155
-     *
156
-     * Multibyte.php safe (atleast for UTF-8)
157
-     *
158
-     * For example:
159
-     *    [ "There ", "...", " is", ".", " More", "!" ]
160
-     *        ... becomes ...
161
-     *    [ "There ... is.", "More!" ]
162
-     *
163
-     * @param string[] $punctuations
164
-     * @return string[]
165
-     */
166
-    private function punctuationMerge($punctuations)
167
-    {
168
-        $definite_terminals = array_diff($this->terminals, $this->abbreviators);
169
-
170
-        $merges = [];
171
-        $merge = '';
172
-
173
-        $filtered = array_filter($punctuations, function ($p) {
174
-            return $p !== '';
175
-        });
176
-
177
-        foreach ($filtered as $punctuation) {
178
-            $merge .= $punctuation;
179
-            if (mb_strlen($punctuation) === 1
180
-                && in_array($punctuation, $this->terminals)) {
181
-                $merges[] = $merge;
182
-                $merge = '';
183
-            } else {
184
-                foreach ($definite_terminals as $terminal) {
185
-                    if (mb_strpos($punctuation, $terminal) !== false) {
186
-                        $merges[] = $merge;
187
-                        $merge = '';
188
-                        break;
189
-                    }
190
-                }
191
-            }
192
-        }
193
-        if (!empty($merge)) {
194
-            $merges[] = $merge;
195
-        }
196
-
197
-        return $merges;
198
-    }
199
-
200
-    /**
201
-     * Looks for capitalized abbreviations & includes them with the following fragment.
202
-     *
203
-     * For example:
204
-     *    [ "Last week, former director of the F.B.I. James B. Comey was fired. Mr. Comey was not available for comment." ]
205
-     *        ... becomes ...
206
-     *    [ "Last week, former director of the F.B.I. James B. Comey was fired." ]
207
-     *  [ "Mr. Comey was not available for comment." ]
208
-     *
209
-     * @param string[] $fragments
210
-     * @return string[]
211
-     */
212
-    private function abbreviationMerge($fragments)
213
-    {
214
-        $return_fragment = [];
215
-
216
-        $previous_fragment = '';
217
-        $previous_is_abbreviation = false;
218
-        $i = 0;
219
-        foreach ($fragments as $fragment) {
220
-            $is_abbreviation = self::isAbreviation($fragment);
221
-
222
-            // merge previous fragment with this
223
-            if ($previous_is_abbreviation) {
224
-                $fragment = $previous_fragment . $fragment;
225
-            }
226
-            $return_fragment[$i] = $fragment;
227
-
228
-            $previous_is_abbreviation = $is_abbreviation;
229
-            $previous_fragment = $fragment;
230
-
231
-            // only increment if this isn't an abbreviation
232
-            if (!$is_abbreviation) {
233
-                $i++;
234
-            }
235
-        }
236
-        return $return_fragment;
237
-    }
238
-
239
-    /**
240
-     * Check if the last word of fragment starts with a Capital, ends in "." & has less than 3 characters.
241
-     *
242
-     * @param $fragment
243
-     * @return bool
244
-     */
245
-    private static function isAbreviation($fragment)
246
-    {
247
-        $words = mb_split('\s+', Multibyte::trim($fragment));
248
-
249
-        $word_count = count($words);
250
-
251
-        $last_word = Multibyte::trim($words[$word_count - 1]);
252
-        $last_is_capital = preg_match('#^\p{Lu}#u', $last_word);
253
-        $last_is_abbreviation = mb_substr(Multibyte::trim($fragment), -1) === '.';
254
-
255
-        return $last_is_capital > 0
256
-            && $last_is_abbreviation > 0
257
-            && mb_strlen($last_word) <= 3;
258
-    }
259
-
260
-    /**
261
-     * Merges any part starting with a closing parenthesis ')' to the previous
262
-     * part.
263
-     *
264
-     * @param string[] $parts
265
-     * @return string[]
266
-     */
267
-    private function parenthesesMerge($parts)
268
-    {
269
-        $subsentences = [];
270
-
271
-        foreach ($parts as $part) {
272
-            if ($part[0] === ')') {
273
-                $subsentences[count($subsentences) - 1] .= $part;
274
-            } else {
275
-                $subsentences[] = $part;
276
-            }
277
-        }
278
-
279
-        return $subsentences;
280
-    }
281
-
282
-    /**
283
-     * Looks for closing quotes to include them with the previous statement.
284
-     * "That was very interesting," he said.
285
-     * "That was very interesting."
286
-     *
287
-     * @param string[] $statements
288
-     * @return string[]
289
-     */
290
-    private function closeQuotesMerge($statements)
291
-    {
292
-        $i = 0;
293
-        $previous_statement = '';
294
-        $return = [];
295
-        foreach ($statements as $statement) {
296
-            if (self::isEndQuote($statement)) {
297
-                $statement = $previous_statement . $statement;
298
-            } else {
299
-                $i++;
300
-            }
301
-
302
-            $return[$i] = $statement;
303
-            $previous_statement = $statement;
304
-        }
305
-
306
-        return $return;
307
-    }
308
-
309
-    /**
310
-     * Check if the entire string is a quotation mark or quote, then space, then lowercase.
311
-     *
312
-     * @param $statement
313
-     * @return bool
314
-     */
315
-    private static function isEndQuote($statement)
316
-    {
317
-        $trimmed = Multibyte::trim($statement);
318
-        $first = mb_substr($statement, 0, 1);
319
-
320
-        return in_array($trimmed, ['"', '\''])
321
-            || (
322
-                in_array($first, ['"', '\''])
323
-                && mb_substr($statement, 1, 1) === ' '
324
-                && ctype_lower(mb_substr($statement, 2, 1)) === true
325
-            );
326
-    }
327
-
328
-    /**
329
-     * Merges items into larger sentences.
330
-     * Multibyte.php safe
331
-     *
332
-     * @param string[] $shorts
333
-     * @return string[]
334
-     */
335
-    private function sentenceMerge($shorts)
336
-    {
337
-        $non_abbreviating_terminals = array_diff($this->terminals, $this->abbreviators);
338
-
339
-        $sentences = [];
340
-
341
-        $sentence = '';
342
-        $has_words = false;
343
-        $previous_word_ending = null;
344
-        foreach ($shorts as $short) {
345
-            $word_count = count(mb_split('\s+', Multibyte::trim($short)));
346
-            $after_non_abbreviating_terminal = in_array($previous_word_ending, $non_abbreviating_terminals);
347
-
348
-            if ($after_non_abbreviating_terminal
349
-                || ($has_words && $word_count > 1)) {
350
-
351
-                $sentences[] = $sentence;
352
-
353
-                $sentence = '';
354
-                $has_words = false;
355
-            }
356
-
357
-            $has_words = $has_words
358
-                || $word_count > 1;
359
-
360
-            $sentence .= $short;
361
-            $previous_word_ending = mb_substr($short, -1);
362
-        }
363
-
364
-        if (!empty($sentence)) {
365
-            $sentences[] = $sentence;
366
-        }
367
-
368
-        return $sentences;
369
-    }
370
-
371
-    /**
372
-     * Return the sentences sentences detected in the provided text.
373
-     * Set the Sentence::SPLIT_TRIM flag to trim whitespace.
374
-     * @param string $text
375
-     * @param integer $flags
376
-     * @return string[]
377
-     */
378
-    public function split($text, $flags = 0)
379
-    {
380
-        static $pipeline = [
381
-            'floatNumberClean',
382
-            'punctuationSplit',
383
-            'parenthesesMerge', // also works after punctuationMerge or abbreviationMerge
384
-            'punctuationMerge',
385
-            'abbreviationMerge',
386
-            'closeQuotesMerge',
387
-            'sentenceMerge',
388
-            'floatNumberRevert'
389
-        ];
390
-
391
-        // clean funny quotes
392
-        $text = Multibyte::cleanUnicode($text);
393
-
394
-        // Split
395
-        $sentences = [];
396
-        foreach (self::linebreakSplit($text) as $input) {
397
-            if (Multibyte::trim($input) !== '') {
398
-                foreach ($pipeline as $method) {
399
-                    $input = $this->$method($input);
400
-                }
401
-                $sentences = array_merge($sentences, $input);
402
-            }
403
-        }
404
-
405
-        // Post process
406
-        if ($flags & self::SPLIT_TRIM) {
407
-            return self::trimSentences($sentences);
408
-        }
409
-
410
-        return $sentences;
411
-    }
412
-
413
-    /**
414
-     * Multibyte.php trim each string in an array.
415
-     * @param string[] $sentences
416
-     * @return string[]
417
-     */
418
-    private static function trimSentences($sentences)
419
-    {
420
-        return array_map(function ($sentence) {
421
-            return Multibyte::trim($sentence);
422
-        }, $sentences);
423
-    }
424
-
425
-    /**
426
-     * Return the number of sentences detected in the provided text.
427
-     * @param string $text
428
-     * @return integer
429
-     */
430
-    public function count($text)
431
-    {
432
-        return count($this->split($text));
433
-    }
20
+	/**
21
+	 * Specify this flag with the split method to trim whitespace.
22
+	 */
23
+	const SPLIT_TRIM = 0x1;
24
+
25
+	/**
26
+	 * List of characters used to terminate sentences.
27
+	 *
28
+	 * @var string[]
29
+	 */
30
+	private $terminals = ['.', '!', '?'];
31
+
32
+	/**
33
+	 * List of characters used for abbreviations.
34
+	 *
35
+	 * @var string[]
36
+	 */
37
+	private $abbreviators = ['.'];
38
+
39
+	/**
40
+	 * List of float numbers in the text
41
+	 *
42
+	 * @var string[]
43
+	 */
44
+	private $floatNumbers = [];
45
+
46
+	/**
47
+	 * Clean floating point numbers by replace them with their md5 hash
48
+	 *
49
+	 * @param string $text
50
+	 *
51
+	 * @return string
52
+	 */
53
+	private function floatNumberClean(string $text)
54
+	{
55
+		preg_match_all('!\d+(?:\.\d+)?!', $text, $matches);
56
+
57
+		$this->floatNumbers = [];
58
+		foreach ($matches[0] as $floatNumber) {
59
+			if (isset($this->floatNumbers[$floatNumber])) {
60
+				continue;
61
+			}
62
+
63
+			$hash = md5($floatNumber);
64
+			$this->floatNumbers[$floatNumber] = $hash;
65
+			$text = str_replace($floatNumber, $hash, $text);
66
+		}
67
+
68
+		return $text;
69
+	}
70
+
71
+	/**
72
+	 * Revert the hashed floating number back
73
+	 *
74
+	 * @param string[] $text
75
+	 *
76
+	 * @return string[]
77
+	 */
78
+	private function floatNumberRevert($text)
79
+	{
80
+		return array_map(function($value) {
81
+			foreach ($this->floatNumbers as $number => $hash) {
82
+				$value = str_replace($hash, $number, $value);
83
+			}
84
+			return $value;
85
+		}, $text);
86
+	}
87
+
88
+	/**
89
+	 * Breaks a piece of text into lines by linebreak.
90
+	 * Eats up any linebreak characters as if one.
91
+	 *
92
+	 * Multibyte.php safe
93
+	 *
94
+	 * @param string $text
95
+	 * @return string[]
96
+	 */
97
+	private static function linebreakSplit($text)
98
+	{
99
+		$lines = [];
100
+		$line = '';
101
+
102
+		foreach (Multibyte::split('([\r\n]+)', $text, -1, PREG_SPLIT_DELIM_CAPTURE) as $part) {
103
+			$line .= $part;
104
+			if (Multibyte::trim($part) === '') {
105
+				$lines[] = $line;
106
+				$line = '';
107
+			}
108
+		}
109
+		$lines[] = $line;
110
+
111
+		return $lines;
112
+	}
113
+
114
+	/**
115
+	 * Splits an array of lines by (consecutive sequences of)
116
+	 * terminals, keeping terminals.
117
+	 *
118
+	 * Multibyte.php safe (atleast for UTF-8)
119
+	 *
120
+	 * For example:
121
+	 *    "There ... is. More!"
122
+	 *        ... becomes ...
123
+	 *    [ "There ", "...", " is", ".", " More", "!" ]
124
+	 *
125
+	 * @param string $line
126
+	 * @return string[]
127
+	 */
128
+	private function punctuationSplit($line)
129
+	{
130
+		$parts = [];
131
+
132
+		$chars = preg_split('//u', $line, -1, PREG_SPLIT_NO_EMPTY); // This is UTF8 multibyte safe!
133
+		$is_terminal = in_array($chars[0], $this->terminals);
134
+
135
+		$part = '';
136
+		foreach ($chars as $index => $char) {
137
+			if (in_array($char, $this->terminals) !== $is_terminal) {
138
+				$parts[] = $part;
139
+				$part = '';
140
+				$is_terminal = !$is_terminal;
141
+			}
142
+			$part .= $char;
143
+		}
144
+
145
+		if (!empty($part)) {
146
+			$parts[] = $part;
147
+		}
148
+
149
+		return $parts;
150
+	}
151
+
152
+	/**
153
+	 * Appends each terminal item after it's preceding
154
+	 * non-terminals.
155
+	 *
156
+	 * Multibyte.php safe (atleast for UTF-8)
157
+	 *
158
+	 * For example:
159
+	 *    [ "There ", "...", " is", ".", " More", "!" ]
160
+	 *        ... becomes ...
161
+	 *    [ "There ... is.", "More!" ]
162
+	 *
163
+	 * @param string[] $punctuations
164
+	 * @return string[]
165
+	 */
166
+	private function punctuationMerge($punctuations)
167
+	{
168
+		$definite_terminals = array_diff($this->terminals, $this->abbreviators);
169
+
170
+		$merges = [];
171
+		$merge = '';
172
+
173
+		$filtered = array_filter($punctuations, function ($p) {
174
+			return $p !== '';
175
+		});
176
+
177
+		foreach ($filtered as $punctuation) {
178
+			$merge .= $punctuation;
179
+			if (mb_strlen($punctuation) === 1
180
+				&& in_array($punctuation, $this->terminals)) {
181
+				$merges[] = $merge;
182
+				$merge = '';
183
+			} else {
184
+				foreach ($definite_terminals as $terminal) {
185
+					if (mb_strpos($punctuation, $terminal) !== false) {
186
+						$merges[] = $merge;
187
+						$merge = '';
188
+						break;
189
+					}
190
+				}
191
+			}
192
+		}
193
+		if (!empty($merge)) {
194
+			$merges[] = $merge;
195
+		}
196
+
197
+		return $merges;
198
+	}
199
+
200
+	/**
201
+	 * Looks for capitalized abbreviations & includes them with the following fragment.
202
+	 *
203
+	 * For example:
204
+	 *    [ "Last week, former director of the F.B.I. James B. Comey was fired. Mr. Comey was not available for comment." ]
205
+	 *        ... becomes ...
206
+	 *    [ "Last week, former director of the F.B.I. James B. Comey was fired." ]
207
+	 *  [ "Mr. Comey was not available for comment." ]
208
+	 *
209
+	 * @param string[] $fragments
210
+	 * @return string[]
211
+	 */
212
+	private function abbreviationMerge($fragments)
213
+	{
214
+		$return_fragment = [];
215
+
216
+		$previous_fragment = '';
217
+		$previous_is_abbreviation = false;
218
+		$i = 0;
219
+		foreach ($fragments as $fragment) {
220
+			$is_abbreviation = self::isAbreviation($fragment);
221
+
222
+			// merge previous fragment with this
223
+			if ($previous_is_abbreviation) {
224
+				$fragment = $previous_fragment . $fragment;
225
+			}
226
+			$return_fragment[$i] = $fragment;
227
+
228
+			$previous_is_abbreviation = $is_abbreviation;
229
+			$previous_fragment = $fragment;
230
+
231
+			// only increment if this isn't an abbreviation
232
+			if (!$is_abbreviation) {
233
+				$i++;
234
+			}
235
+		}
236
+		return $return_fragment;
237
+	}
238
+
239
+	/**
240
+	 * Check if the last word of fragment starts with a Capital, ends in "." & has less than 3 characters.
241
+	 *
242
+	 * @param $fragment
243
+	 * @return bool
244
+	 */
245
+	private static function isAbreviation($fragment)
246
+	{
247
+		$words = mb_split('\s+', Multibyte::trim($fragment));
248
+
249
+		$word_count = count($words);
250
+
251
+		$last_word = Multibyte::trim($words[$word_count - 1]);
252
+		$last_is_capital = preg_match('#^\p{Lu}#u', $last_word);
253
+		$last_is_abbreviation = mb_substr(Multibyte::trim($fragment), -1) === '.';
254
+
255
+		return $last_is_capital > 0
256
+			&& $last_is_abbreviation > 0
257
+			&& mb_strlen($last_word) <= 3;
258
+	}
259
+
260
+	/**
261
+	 * Merges any part starting with a closing parenthesis ')' to the previous
262
+	 * part.
263
+	 *
264
+	 * @param string[] $parts
265
+	 * @return string[]
266
+	 */
267
+	private function parenthesesMerge($parts)
268
+	{
269
+		$subsentences = [];
270
+
271
+		foreach ($parts as $part) {
272
+			if ($part[0] === ')') {
273
+				$subsentences[count($subsentences) - 1] .= $part;
274
+			} else {
275
+				$subsentences[] = $part;
276
+			}
277
+		}
278
+
279
+		return $subsentences;
280
+	}
281
+
282
+	/**
283
+	 * Looks for closing quotes to include them with the previous statement.
284
+	 * "That was very interesting," he said.
285
+	 * "That was very interesting."
286
+	 *
287
+	 * @param string[] $statements
288
+	 * @return string[]
289
+	 */
290
+	private function closeQuotesMerge($statements)
291
+	{
292
+		$i = 0;
293
+		$previous_statement = '';
294
+		$return = [];
295
+		foreach ($statements as $statement) {
296
+			if (self::isEndQuote($statement)) {
297
+				$statement = $previous_statement . $statement;
298
+			} else {
299
+				$i++;
300
+			}
301
+
302
+			$return[$i] = $statement;
303
+			$previous_statement = $statement;
304
+		}
305
+
306
+		return $return;
307
+	}
308
+
309
+	/**
310
+	 * Check if the entire string is a quotation mark or quote, then space, then lowercase.
311
+	 *
312
+	 * @param $statement
313
+	 * @return bool
314
+	 */
315
+	private static function isEndQuote($statement)
316
+	{
317
+		$trimmed = Multibyte::trim($statement);
318
+		$first = mb_substr($statement, 0, 1);
319
+
320
+		return in_array($trimmed, ['"', '\''])
321
+			|| (
322
+				in_array($first, ['"', '\''])
323
+				&& mb_substr($statement, 1, 1) === ' '
324
+				&& ctype_lower(mb_substr($statement, 2, 1)) === true
325
+			);
326
+	}
327
+
328
+	/**
329
+	 * Merges items into larger sentences.
330
+	 * Multibyte.php safe
331
+	 *
332
+	 * @param string[] $shorts
333
+	 * @return string[]
334
+	 */
335
+	private function sentenceMerge($shorts)
336
+	{
337
+		$non_abbreviating_terminals = array_diff($this->terminals, $this->abbreviators);
338
+
339
+		$sentences = [];
340
+
341
+		$sentence = '';
342
+		$has_words = false;
343
+		$previous_word_ending = null;
344
+		foreach ($shorts as $short) {
345
+			$word_count = count(mb_split('\s+', Multibyte::trim($short)));
346
+			$after_non_abbreviating_terminal = in_array($previous_word_ending, $non_abbreviating_terminals);
347
+
348
+			if ($after_non_abbreviating_terminal
349
+				|| ($has_words && $word_count > 1)) {
350
+
351
+				$sentences[] = $sentence;
352
+
353
+				$sentence = '';
354
+				$has_words = false;
355
+			}
356
+
357
+			$has_words = $has_words
358
+				|| $word_count > 1;
359
+
360
+			$sentence .= $short;
361
+			$previous_word_ending = mb_substr($short, -1);
362
+		}
363
+
364
+		if (!empty($sentence)) {
365
+			$sentences[] = $sentence;
366
+		}
367
+
368
+		return $sentences;
369
+	}
370
+
371
+	/**
372
+	 * Return the sentences sentences detected in the provided text.
373
+	 * Set the Sentence::SPLIT_TRIM flag to trim whitespace.
374
+	 * @param string $text
375
+	 * @param integer $flags
376
+	 * @return string[]
377
+	 */
378
+	public function split($text, $flags = 0)
379
+	{
380
+		static $pipeline = [
381
+			'floatNumberClean',
382
+			'punctuationSplit',
383
+			'parenthesesMerge', // also works after punctuationMerge or abbreviationMerge
384
+			'punctuationMerge',
385
+			'abbreviationMerge',
386
+			'closeQuotesMerge',
387
+			'sentenceMerge',
388
+			'floatNumberRevert'
389
+		];
390
+
391
+		// clean funny quotes
392
+		$text = Multibyte::cleanUnicode($text);
393
+
394
+		// Split
395
+		$sentences = [];
396
+		foreach (self::linebreakSplit($text) as $input) {
397
+			if (Multibyte::trim($input) !== '') {
398
+				foreach ($pipeline as $method) {
399
+					$input = $this->$method($input);
400
+				}
401
+				$sentences = array_merge($sentences, $input);
402
+			}
403
+		}
404
+
405
+		// Post process
406
+		if ($flags & self::SPLIT_TRIM) {
407
+			return self::trimSentences($sentences);
408
+		}
409
+
410
+		return $sentences;
411
+	}
412
+
413
+	/**
414
+	 * Multibyte.php trim each string in an array.
415
+	 * @param string[] $sentences
416
+	 * @return string[]
417
+	 */
418
+	private static function trimSentences($sentences)
419
+	{
420
+		return array_map(function ($sentence) {
421
+			return Multibyte::trim($sentence);
422
+		}, $sentences);
423
+	}
424
+
425
+	/**
426
+	 * Return the number of sentences detected in the provided text.
427
+	 * @param string $text
428
+	 * @return integer
429
+	 */
430
+	public function count($text)
431
+	{
432
+		return count($this->split($text));
433
+	}
434 434
 
435 435
 }
Please login to merge, or discard this patch.