Completed
Pull Request — master (#37)
by Andreas
02:36 queued 01:19
created

Dictionary::unifyLocale()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 11
rs 9.4285
cc 3
eloc 6
nc 3
nop 1
1
<?php
2
/**
3
 * Copyright (c) 2008-2011 Andreas Heigl<[email protected]>
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a copy
6
 * of this software and associated documentation files (the "Software"), to deal
7
 * in the Software without restriction, including without limitation the rights
8
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
 * copies of the Software, and to permit persons to whom the Software is
10
 * furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be included in
13
 * all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
 * THE SOFTWARE.
22
 *
23
 * @category   Hyphenation
24
 * @package    Org_Heigl_Hyphenator
25
 * @subpackage Dictionary
26
 * @author     Andreas Heigl <[email protected]>
27
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
28
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
29
 * @version    2.0.1
30
 * @link       http://github.com/heiglandreas/Hyphenator
31
 * @since      01.11.2011
32
 */
33
34
namespace Org\Heigl\Hyphenator\Dictionary;
35
36
/**
37
 * This class provides a generic dictionary contianing hyphenation-patterns
38
 *
39
 * @category   Hyphenation
40
 * @package    Org_Heigl_Hyphenator
41
 * @subpackage Dictionary
42
 * @author     Andreas Heigl <[email protected]>
43
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
44
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
45
 * @version    2.0.1
46
 * @link       http://github.com/heiglandreas/Hyphenator
47
 * @since      01.11.2011
48
 */
49
class Dictionary
50
{
51
    /**
52
     * The internal storage for the dictionary.
53
     *
54
     * @var array $dictionary
55
     */
56
    private $dictionary = array();
57
58
    /**
59
     * Where to look for the basic files.
60
     *
61
     * @var string $_fileLocation
62
     */
63
    private static $fileLocation = '';
64
65
    /**
66
     * Set the file-location.
67
     *
68
     * @param string $fileLocation THe default file-location for ini-files
69
     *
70
     * @return void
71
     */
72
    public static function setFileLocation($fileLocation)
73
    {
74
        self::$fileLocation = $fileLocation;
75
    }
76
77
    /**
78
     * Create a new Instance of the Dictionary
79
     *
80
     * @return void
0 ignored issues
show
Comprehensibility Best Practice introduced by
Adding a @return annotation to constructors is generally not recommended as a constructor does not have a meaningful return value.

Adding a @return annotation to a constructor is not recommended, since a constructor does not have a meaningful return value.

Please refer to the PHP core documentation on constructors.

Loading history...
81
     */
82
    public function __construct()
83
    {
84
    }
85
86
    /**
87
     * Create an instance for a given file
88
     *
89
     * @param string $locale The locale to be set for this Dictionary
90
     *
91
     * @return Dictionary
92
     */
93
    public static function factory($locale)
94
    {
95
        $dict = new Dictionary();
96
        $dict->load($locale);
97
98
        return $dict;
99
    }
100
101
    /**
102
     * Load a given locale-file as base for the dictionary
103
     *
104
     * @param string $locale Load the file for the given locale
105
     *
106
     * @return Dictionary
107
     */
108
    public function load($locale)
109
    {
110
        $locale           = $this->unifyLocale($locale);
111
        $file             = self::$fileLocation . DIRECTORY_SEPARATOR . $locale . '.ini';
112
        $this->dictionary = array();
113
        if (! file_exists(realpath($file))) {
114
            return $this;
115
        }
116
        foreach (parse_ini_file($file) as $key => $val) {
117
            $this->dictionary[str_replace('@:', '', $key)] = $val;
118
        }
119
120
        return $this;
121
    }
122
123
    /**
124
     * parse a dictionary-file to create an ini-file from it.
125
     *
126
     * @param string $locale Parse the file for the given locale
127
     *
128
     * @throws \Org\Heigl\Hyphenator\Exception\PathNotFoundException
129
     * @return string
130
     */
131
    public static function parseFile($locale)
132
    {
133
        $path = self::$fileLocation . DIRECTORY_SEPARATOR;
134
        $file = $path . 'hyph_' . $locale . '.dic';
135
        if (! file_Exists($file)) {
136
            throw new \Org\Heigl\Hyphenator\Exception\PathNotFoundException('The given Path does not exist');
137
        }
138
139
        $items = file($file);
140
        $source = trim($items[0]);
141
        if (0===strpos($source, 'ISO8859')) {
142
            $source = str_Replace('ISO8859', 'ISO-8859', $source);
143
        }
144
        unset($items[0]);
145
        $fh = fopen($path . $locale . '.ini', 'w+');
146
        foreach ($items as $item) {
147
            // Remove comment-lines starting with '#' or '%'.
148
            if (in_array(mb_substr($item, 0, 1), array('#', '%'))) {
149
                continue;
150
            }
151
            // Ignore empty lines.
152
            if ('' == trim($item)) {
153
                continue;
154
            }
155
            // Remove all Upper-case items as they are OOo-specific
156
            if ($item === mb_strtoupper($item)) {
157
                continue;
158
            }
159
            // Ignore lines containing an '=' sign as these are specific
160
            // instructions for non-standard-hyphenations. These will be
161
            // implemented later.
162
            if (false !== mb_strpos($item, '=')) {
163
                continue;
164
            }
165
            $item = mb_convert_Encoding($item, 'UTF-8', $source);
166
            $result = Pattern::factory($item);
167
            $string = '@:' . $result->getText() . ' = "' . $result->getPattern() . '"' . "\n";
168
            fwrite($fh, $string);
169
        }
170
        fclose($fh);
171
172
        return $path . $locale . '.ini';
173
    }
174
175
    /**
176
     * Get all patterns for a given word.
177
     *
178
     * @param string $word The word to get the patterns for.
179
     *
180
     * @return array
181
     */
182
    public function getPatternsForWord($word)
183
    {
184
        $return = array();
185
        $word = '.' . $word . '.';
186
        $strlen = mb_strlen($word);
187
        for ($i = 0; $i <= $strlen; $i ++) {
188
            for ($j = 2; $j <= ($strlen-$i); $j++) {
189
                $substr = mb_substr($word, $i, $j);
190
                if (! isset($this->dictionary[$substr])) {
191
                    continue;
192
                }
193
                $return[$substr] = $this->dictionary[$substr];
194
            }
195
        }
196
197
        return $return;
198
    }
199
200
    /**
201
     * Manually add or overwrite a pattern
202
     *
203
     * @param string $string  String to be maatched
204
     * @param string $pattern Numerical hyphenation-pattern
205
     *
206
     * @return \Org\Heigl\Hyphenator\Dictionary\Dictionary
207
     */
208
    public function addPAttern($string, $pattern)
209
    {
210
        $this->dictionary[$string] = $pattern;
211
212
        return $this;
213
    }
214
215
    /**
216
     * Unify the given locale to a default format.
217
     *
218
     * For that in a 2 by 2 format the whole string is splited, the first part
219
     * lowercased, the sewcond part upercased and concatenated with n under-
220
     * score.
221
     *
222
     * a 2letter locae will simply be lowercased.
223
     *
224
     * everything else will be returned AS IS
225
     *
226
     * @param string $locale The locale to unify
227
     *
228
     * @return string
229
     */
230
    private function unifyLocale($locale)
231
    {
232
        if (2 == strlen($locale)) {
233
            return strtolower($locale);
234
        }
235
        if (preg_match('/([a-zA-Z]{2})[^a-zA-Z]+([a-zA-Z]{2})/i', $locale, $result)) {
236
            return strtolower($result[1]) . '_' . strtoupper($result[2]);
237
        }
238
239
        return (string) $locale;
240
    }
241
}
242