Dictionary   A
last analyzed

Complexity

Total Complexity 25

Size/Duplication

Total Lines 208
Duplicated Lines 2.88 %

Coupling/Cohesion

Components 1
Dependencies 2

Importance

Changes 0
Metric Value
wmc 25
lcom 1
cbo 2
dl 6
loc 208
rs 10
c 0
b 0
f 0

9 Methods

Rating   Name   Duplication   Size   Complexity  
A setFileLocation() 0 4 1
A factory() 0 7 1
A fromLocale() 0 7 1
A fromFile() 3 14 3
A load() 3 14 3
B parseFile() 0 43 8
A getPatternsForWord() 0 18 4
A addPattern() 0 6 1
A unifyLocale() 0 11 3

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
/**
3
 * Copyright (c) 2008-2011 Andreas Heigl<[email protected]>
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a copy
6
 * of this software and associated documentation files (the "Software"), to deal
7
 * in the Software without restriction, including without limitation the rights
8
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
 * copies of the Software, and to permit persons to whom the Software is
10
 * furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be included in
13
 * all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
 * THE SOFTWARE.
22
 *
23
 * @category   Hyphenation
24
 * @package    Org_Heigl_Hyphenator
25
 * @subpackage Dictionary
26
 * @author     Andreas Heigl <[email protected]>
27
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
28
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
29
 * @version    2.0.1
30
 * @link       http://github.com/heiglandreas/Hyphenator
31
 * @since      01.11.2011
32
 */
33
34
namespace Org\Heigl\Hyphenator\Dictionary;
35
36
use RuntimeException;
37
use function mb_substr;
38
use function parse_ini_file;
39
use function str_replace;
40
41
/**
42
 * This class provides a generic dictionary containing hyphenation-patterns
43
 *
44
 * @category   Hyphenation
45
 * @package    Org_Heigl_Hyphenator
46
 * @subpackage Dictionary
47
 * @author     Andreas Heigl <[email protected]>
48
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
49
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
50
 * @version    2.0.1
51
 * @link       http://github.com/heiglandreas/Hyphenator
52
 * @since      01.11.2011
53
 */
54
class Dictionary
55
{
56
    /**
57
     * The internal storage for the dictionary.
58
     *
59
     * @var array $dictionary
60
     */
61
    private $dictionary = array();
62
63
    /**
64
     * Where to look for the basic files.
65
     *
66
     * @var string $fileLocation
67
     */
68
    private static $fileLocation = '';
69
70
    /**
71
     * Set the file-location.
72
     *
73
     * @param string $fileLocation THe default file-location for ini-files
74
     *
75
     * @return void
76
     */
77
    public static function setFileLocation($fileLocation)
78
    {
79
        self::$fileLocation = $fileLocation;
80
    }
81
82
    /**
83
     * Create an instance for a given file
84
     *
85
     * @param string $locale The locale to be set for this Dictionary
86
     *
87
     * @return Dictionary
88
     */
89
    public static function factory($locale)
90
    {
91
        $dict = new Dictionary();
92
        $dict->load($locale);
93
94
        return $dict;
95
    }
96
97
    public static function fromLocale($locale): Dictionary
98
    {
99
        $dictionary = new Dictionary();
100
        $dictionary->load($locale);
101
102
        return $dictionary;
103
    }
104
105
    public static function fromFile(string $file): Dictionary
106
    {
107
        if (! is_file($file)) {
108
            throw new RuntimeException(sprintf("The file \"%s\" is not readable", $file));
109
        }
110
111
        $dictionary = new Dictionary();
112
113 View Code Duplication
        foreach (parse_ini_file($file) as $key => $val) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
114
            $dictionary->dictionary[str_replace('@:', '', $key)] = $val;
115
        }
116
117
        return $dictionary;
118
    }
119
120
    /**
121
     * Load a given locale-file as base for the dictionary
122
     *
123
     * @param string $locale Load the file for the given locale
124
     *
125
     * @return Dictionary
126
     */
127
    public function load($locale)
128
    {
129
        $locale           = $this->unifyLocale($locale);
130
        $file             = self::$fileLocation . DIRECTORY_SEPARATOR . $locale . '.ini';
131
        $this->dictionary = array();
132
        if (! file_exists(realpath($file))) {
133
            return $this;
134
        }
135 View Code Duplication
        foreach (parse_ini_file($file) as $key => $val) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
136
            $this->dictionary[str_replace('@:', '', $key)] = $val;
137
        }
138
139
        return $this;
140
    }
141
142
    /**
143
     * Parse a dictionary-file to create an ini-file from it.
144
     *
145
     * @param string $locale Parse the file for the given locale
146
     *
147
     * @throws \Org\Heigl\Hyphenator\Exception\PathNotFoundException
148
     * @return string
149
     */
150
    public static function parseFile($locale)
151
    {
152
        $path = self::$fileLocation . DIRECTORY_SEPARATOR;
153
        $file = $path . 'hyph_' . $locale . '.dic';
154
        if (! file_Exists($file)) {
155
            throw new \Org\Heigl\Hyphenator\Exception\PathNotFoundException('The given Path does not exist');
156
        }
157
158
        $items = file($file);
159
        $source = trim($items[0]);
160
        if (0===strpos($source, 'ISO8859')) {
161
            $source = str_Replace('ISO8859', 'ISO-8859', $source);
162
        }
163
        unset($items[0]);
164
        $fh = fopen($path . $locale . '.ini', 'w+');
165
        foreach ($items as $item) {
166
            // Remove comment-lines starting with '#' or '%'.
167
            if (in_array(mb_substr($item, 0, 1), array('#', '%'))) {
168
                continue;
169
            }
170
            // Ignore empty lines.
171
            if ('' == trim($item)) {
172
                continue;
173
            }
174
            // Remove all Upper-case items as they are OOo-specific
175
            if ($item === mb_strtoupper($item)) {
176
                continue;
177
            }
178
            // Ignore lines containing an '=' sign as these are specific
179
            // instructions for non-standard-hyphenations. These will be
180
            // implemented later.
181
            if (false !== mb_strpos($item, '=')) {
182
                continue;
183
            }
184
            $item = mb_convert_Encoding($item, 'UTF-8', $source);
185
            $result = Pattern::factory($item);
186
            $string = '@:' . $result->getText() . ' = "' . $result->getPattern() . '"' . "\n";
187
            fwrite($fh, $string);
188
        }
189
        fclose($fh);
190
191
        return $path . $locale . '.ini';
192
    }
193
194
    /**
195
     * Get all patterns for a given word.
196
     *
197
     * @param string $word The word to get the patterns for.
198
     *
199
     * @return array
200
     */
201
    public function getPatternsForWord($word)
202
    {
203
        $return = array();
204
        $word = '.' . $word . '.';
205
        $strlen = mb_strlen($word);
206
        for ($i = 0; $i <= $strlen; $i ++) {
207
            for ($j = 2; $j <= ($strlen-$i); $j++) {
208
                $substr = mb_substr($word, $i, $j);
209
                $lowerSubstring = mb_strtolower($substr);
210
                if (! isset($this->dictionary[$lowerSubstring])) {
211
                    continue;
212
                }
213
                $return[$substr] = $this->dictionary[$lowerSubstring];
214
            }
215
        }
216
217
        return $return;
218
    }
219
220
    /**
221
     * Manually add or overwrite a pattern
222
     *
223
     * @param string $string  String to be matched
224
     * @param string $pattern Numerical hyphenation-pattern
225
     *
226
     * @return \Org\Heigl\Hyphenator\Dictionary\Dictionary
227
     */
228
    public function addPattern($string, $pattern)
229
    {
230
        $this->dictionary[$string] = $pattern;
231
232
        return $this;
233
    }
234
235
    /**
236
     * Unify the given locale to a default format.
237
     *
238
     * For that in a 2 by 2 format the whole string is split, the first part
239
     * lowercased, the second part uppercased and concatenated with n under-
240
     * score.
241
     *
242
     * a 2-letter locale will simply be lowercased.
243
     *
244
     * everything else will be returned AS IS
245
     *
246
     * @param string $locale The locale to unify
247
     *
248
     * @return string
249
     */
250
    private function unifyLocale($locale)
251
    {
252
        if (2 == strlen($locale)) {
253
            return strtolower($locale);
254
        }
255
        if (preg_match('/([a-zA-Z]{2})[^a-zA-Z]+([a-zA-Z]{2})/i', $locale, $result)) {
256
            return strtolower($result[1]) . '_' . strtoupper($result[2]);
257
        }
258
259
        return (string) $locale;
260
    }
261
}
262