Completed
Push — main ( c35f7d...c26048 )
by Andreas
01:12
created

Dictionary::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
1
<?php
2
/**
3
 * Copyright (c) 2008-2011 Andreas Heigl<[email protected]>
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a copy
6
 * of this software and associated documentation files (the "Software"), to deal
7
 * in the Software without restriction, including without limitation the rights
8
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
 * copies of the Software, and to permit persons to whom the Software is
10
 * furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice shall be included in
13
 * all copies or substantial portions of the Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
 * THE SOFTWARE.
22
 *
23
 * @category   Hyphenation
24
 * @package    Org_Heigl_Hyphenator
25
 * @subpackage Dictionary
26
 * @author     Andreas Heigl <[email protected]>
27
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
28
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
29
 * @version    2.0.1
30
 * @link       http://github.com/heiglandreas/Hyphenator
31
 * @since      01.11.2011
32
 */
33
34
namespace Org\Heigl\Hyphenator\Dictionary;
35
36
use function mb_substr;
37
38
/**
39
 * This class provides a generic dictionary containing hyphenation-patterns
40
 *
41
 * @category   Hyphenation
42
 * @package    Org_Heigl_Hyphenator
43
 * @subpackage Dictionary
44
 * @author     Andreas Heigl <[email protected]>
45
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
46
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
47
 * @version    2.0.1
48
 * @link       http://github.com/heiglandreas/Hyphenator
49
 * @since      01.11.2011
50
 */
51
class Dictionary
52
{
53
    /**
54
     * The internal storage for the dictionary.
55
     *
56
     * @var array $dictionary
57
     */
58
    private $dictionary = array();
59
60
    /**
61
     * Where to look for the basic files.
62
     *
63
     * @var string $fileLocation
64
     */
65
    private static $fileLocation = '';
66
67
    /**
68
     * Set the file-location.
69
     *
70
     * @param string $fileLocation THe default file-location for ini-files
71
     *
72
     * @return void
73
     */
74
    public static function setFileLocation($fileLocation)
75
    {
76
        self::$fileLocation = $fileLocation;
77
    }
78
79
    /**
80
     * Create an instance for a given file
81
     *
82
     * @param string $locale The locale to be set for this Dictionary
83
     *
84
     * @return Dictionary
85
     */
86
    public static function factory($locale)
87
    {
88
        $dict = new Dictionary();
89
        $dict->load($locale);
90
91
        return $dict;
92
    }
93
94
    /**
95
     * Load a given locale-file as base for the dictionary
96
     *
97
     * @param string $locale Load the file for the given locale
98
     *
99
     * @return Dictionary
100
     */
101
    public function load($locale)
102
    {
103
        $locale           = $this->unifyLocale($locale);
104
        $file             = self::$fileLocation . DIRECTORY_SEPARATOR . $locale . '.ini';
105
        $this->dictionary = array();
106
        if (! file_exists(realpath($file))) {
107
            return $this;
108
        }
109
        foreach (parse_ini_file($file) as $key => $val) {
110
            $this->dictionary[str_replace('@:', '', $key)] = $val;
111
        }
112
113
        return $this;
114
    }
115
116
    /**
117
     * Parse a dictionary-file to create an ini-file from it.
118
     *
119
     * @param string $locale Parse the file for the given locale
120
     *
121
     * @throws \Org\Heigl\Hyphenator\Exception\PathNotFoundException
122
     * @return string
123
     */
124
    public static function parseFile($locale)
125
    {
126
        $path = self::$fileLocation . DIRECTORY_SEPARATOR;
127
        $file = $path . 'hyph_' . $locale . '.dic';
128
        if (! file_Exists($file)) {
129
            throw new \Org\Heigl\Hyphenator\Exception\PathNotFoundException('The given Path does not exist');
130
        }
131
132
        $items = file($file);
133
        $source = trim($items[0]);
134
        if (0===strpos($source, 'ISO8859')) {
135
            $source = str_Replace('ISO8859', 'ISO-8859', $source);
136
        }
137
        unset($items[0]);
138
        $fh = fopen($path . $locale . '.ini', 'w+');
139
        foreach ($items as $item) {
140
            // Remove comment-lines starting with '#' or '%'.
141
            if (in_array(mb_substr($item, 0, 1), array('#', '%'))) {
142
                continue;
143
            }
144
            // Ignore empty lines.
145
            if ('' == trim($item)) {
146
                continue;
147
            }
148
            // Remove all Upper-case items as they are OOo-specific
149
            if ($item === mb_strtoupper($item)) {
150
                continue;
151
            }
152
            // Ignore lines containing an '=' sign as these are specific
153
            // instructions for non-standard-hyphenations. These will be
154
            // implemented later.
155
            if (false !== mb_strpos($item, '=')) {
156
                continue;
157
            }
158
            $item = mb_convert_Encoding($item, 'UTF-8', $source);
159
            $result = Pattern::factory($item);
160
            $string = '@:' . $result->getText() . ' = "' . $result->getPattern() . '"' . "\n";
161
            fwrite($fh, $string);
162
        }
163
        fclose($fh);
164
165
        return $path . $locale . '.ini';
166
    }
167
168
    /**
169
     * Get all patterns for a given word.
170
     *
171
     * @param string $word The word to get the patterns for.
172
     *
173
     * @return array
174
     */
175
    public function getPatternsForWord($word)
176
    {
177
        $return = array();
178
        $word = '.' . $word . '.';
179
        $strlen = mb_strlen($word);
180
        for ($i = 0; $i <= $strlen; $i ++) {
181
            for ($j = 2; $j <= ($strlen-$i); $j++) {
182
                $substr = mb_substr($word, $i, $j);
183
                if (! isset($this->dictionary[$substr])) {
184
                    continue;
185
                }
186
                $return[$substr] = $this->dictionary[$substr];
187
            }
188
        }
189
190
        return $return;
191
    }
192
193
    /**
194
     * Manually add or overwrite a pattern
195
     *
196
     * @param string $string  String to be matched
197
     * @param string $pattern Numerical hyphenation-pattern
198
     *
199
     * @return \Org\Heigl\Hyphenator\Dictionary\Dictionary
200
     */
201
    public function addPAttern($string, $pattern)
202
    {
203
        $this->dictionary[$string] = $pattern;
204
205
        return $this;
206
    }
207
208
    /**
209
     * Unify the given locale to a default format.
210
     *
211
     * For that in a 2 by 2 format the whole string is split, the first part
212
     * lowercased, the second part uppercased and concatenated with n under-
213
     * score.
214
     *
215
     * a 2-letter locale will simply be lowercased.
216
     *
217
     * everything else will be returned AS IS
218
     *
219
     * @param string $locale The locale to unify
220
     *
221
     * @return string
222
     */
223
    private function unifyLocale($locale)
224
    {
225
        if (2 == strlen($locale)) {
226
            return strtolower($locale);
227
        }
228
        if (preg_match('/([a-zA-Z]{2})[^a-zA-Z]+([a-zA-Z]{2})/i', $locale, $result)) {
229
            return strtolower($result[1]) . '_' . strtoupper($result[2]);
230
        }
231
232
        return (string) $locale;
233
    }
234
}
235