1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Copyright (c) Andreas Heigl<[email protected]> |
4
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy |
5
|
|
|
* of this software and associated documentation files (the "Software"), to deal |
6
|
|
|
* in the Software without restriction, including without limitation the rights |
7
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8
|
|
|
* copies of the Software, and to permit persons to whom the Software is |
9
|
|
|
* furnished to do so, subject to the following conditions: |
10
|
|
|
* The above copyright notice and this permission notice shall be included in |
11
|
|
|
* all copies or substantial portions of the Software. |
12
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
13
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
14
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
15
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
16
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
17
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
18
|
|
|
* THE SOFTWARE. |
19
|
|
|
* |
20
|
|
|
* @author Andreas Heigl<[email protected]> |
21
|
|
|
* @copyright Andreas Heigl |
22
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php MIT-License |
23
|
|
|
* @since 12.10.2016 |
24
|
|
|
* @link http://github.com/heiglandreas/org.heigl.TextStatistics |
25
|
|
|
*/ |
26
|
|
|
|
27
|
|
|
namespace Org_Heigl\TextStatistics\Calculator; |
28
|
|
|
|
29
|
|
|
use Org\Heigl\Hyphenator\Tokenizer\PunctuationTokenizer; |
30
|
|
|
use Org\Heigl\Hyphenator\Tokenizer\TokenizerRegistry; |
31
|
|
|
use Org\Heigl\Hyphenator\Tokenizer\WhitespaceTokenizer; |
32
|
|
|
use Org\Heigl\Hyphenator\Tokenizer\WordToken; |
33
|
|
|
use Org_Heigl\TextStatistics\Text; |
34
|
|
|
|
35
|
|
|
class WordMaxSyllablesCounter implements CalculatorInterface |
36
|
|
|
{ |
37
|
|
|
protected $tokenizer; |
38
|
|
|
|
39
|
|
|
protected $syllableCounter; |
40
|
|
|
|
41
|
|
|
public function __construct(Hyphenator $hyphenator) |
42
|
|
|
{ |
43
|
|
|
$this->tokenizer = new TokenizerRegistry(); |
44
|
|
|
$this->tokenizer->add(new PunctuationTokenizer()); |
45
|
|
|
$this->tokenizer->add(new WhitespaceTokenizer()); |
46
|
|
|
|
47
|
|
|
$this->syllableCounter = new SyllableCounter($hyphenator); |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* Do the actual calculation of a statistic |
52
|
|
|
* |
53
|
|
|
* @param Text $text |
54
|
|
|
* |
55
|
|
|
* @return mixed |
56
|
|
|
*/ |
57
|
|
|
public function calculate(Text $text) |
58
|
|
|
{ |
59
|
|
|
$tokens = $this->tokenizer->tokenize($text->getPlainText()); |
60
|
|
|
|
61
|
|
|
$maxSyllables = 0; |
62
|
|
|
|
63
|
|
|
foreach ($tokens as $token) { |
64
|
|
|
if (! $token instanceof WordToken) { |
65
|
|
|
continue; |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
$syllables = $this->syllableCounter->calculate(new Text($token->getFilteredContent())); |
69
|
|
|
if ($syllables > $maxSyllables) { |
70
|
|
|
$maxSyllables = $syllables; |
71
|
|
|
} |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
return $maxSyllables; |
75
|
|
|
} |
76
|
|
|
} |
77
|
|
|
|