Completed
Push — master ( e72970...28f2b4 )
by Markus
04:39
created
transliteration.php 2 patches
Indentation   +80 added lines, -80 removed lines patch added patch discarded remove patch
@@ -26,56 +26,56 @@  discard block
 block discarded – undo
26 26
  *   Transliterated text.
27 27
  */
28 28
 function _transliteration_process($string, $unknown = '?', $source_langcode = NULL) {
29
-  // ASCII is always valid NFC! If we're only ever given plain ASCII, we can
30
-  // avoid the overhead of initializing the decomposition tables by skipping
31
-  // out early.
32
-  if (!preg_match('/[\x80-\xff]/', $string)) {
29
+    // ASCII is always valid NFC! If we're only ever given plain ASCII, we can
30
+    // avoid the overhead of initializing the decomposition tables by skipping
31
+    // out early.
32
+    if (!preg_match('/[\x80-\xff]/', $string)) {
33 33
     return $string;
34
-  }
34
+    }
35 35
 
36
-  static $tail_bytes;
36
+    static $tail_bytes;
37 37
 
38
-  if (!isset($tail_bytes)) {
38
+    if (!isset($tail_bytes)) {
39 39
     // Each UTF-8 head byte is followed by a certain number of tail bytes.
40 40
     $tail_bytes = array();
41 41
     for ($n = 0; $n < 256; $n++) {
42
-      if ($n < 0xc0) {
42
+        if ($n < 0xc0) {
43 43
         $remaining = 0;
44
-      }
45
-      elseif ($n < 0xe0) {
44
+        }
45
+        elseif ($n < 0xe0) {
46 46
         $remaining = 1;
47
-      }
48
-      elseif ($n < 0xf0) {
47
+        }
48
+        elseif ($n < 0xf0) {
49 49
         $remaining = 2;
50
-      }
51
-      elseif ($n < 0xf8) {
50
+        }
51
+        elseif ($n < 0xf8) {
52 52
         $remaining = 3;
53
-      }
54
-      elseif ($n < 0xfc) {
53
+        }
54
+        elseif ($n < 0xfc) {
55 55
         $remaining = 4;
56
-      }
57
-      elseif ($n < 0xfe) {
56
+        }
57
+        elseif ($n < 0xfe) {
58 58
         $remaining = 5;
59
-      }
60
-      else {
59
+        }
60
+        else {
61 61
         $remaining = 0;
62
-      }
63
-      $tail_bytes[chr($n)] = $remaining;
62
+        }
63
+        $tail_bytes[chr($n)] = $remaining;
64
+    }
64 65
     }
65
-  }
66 66
 
67
-  // Chop the text into pure-ASCII and non-ASCII areas; large ASCII parts can
68
-  // be handled much more quickly. Don't chop up Unicode areas for punctuation,
69
-  // though, that wastes energy.
70
-  preg_match_all('/[\x00-\x7f]+|[\x80-\xff][\x00-\x40\x5b-\x5f\x7b-\xff]*/', $string, $matches);
67
+    // Chop the text into pure-ASCII and non-ASCII areas; large ASCII parts can
68
+    // be handled much more quickly. Don't chop up Unicode areas for punctuation,
69
+    // though, that wastes energy.
70
+    preg_match_all('/[\x00-\x7f]+|[\x80-\xff][\x00-\x40\x5b-\x5f\x7b-\xff]*/', $string, $matches);
71 71
 
72
-  $result = '';
73
-  foreach ($matches[0] as $str) {
72
+    $result = '';
73
+    foreach ($matches[0] as $str) {
74 74
     if ($str[0] < "\x80") {
75
-      // ASCII chunk: guaranteed to be valid UTF-8 and in normal form C, so
76
-      // skip over it.
77
-      $result .= $str;
78
-      continue;
75
+        // ASCII chunk: guaranteed to be valid UTF-8 and in normal form C, so
76
+        // skip over it.
77
+        $result .= $str;
78
+        continue;
79 79
     }
80 80
 
81 81
     // We'll have to examine the chunk byte by byte to ensure that it consists
@@ -91,72 +91,72 @@  discard block
 block discarded – undo
91 91
     $len = $chunk + 1;
92 92
 
93 93
     for ($i = -1; --$len; ) {
94
-      $c = $str[++$i];
95
-      if ($remaining = $tail_bytes[$c]) {
94
+        $c = $str[++$i];
95
+        if ($remaining = $tail_bytes[$c]) {
96 96
         // UTF-8 head byte!
97 97
         $sequence = $head = $c;
98 98
         do {
99
-          // Look for the defined number of tail bytes...
100
-          if (--$len && ($c = $str[++$i]) >= "\x80" && $c < "\xc0") {
99
+            // Look for the defined number of tail bytes...
100
+            if (--$len && ($c = $str[++$i]) >= "\x80" && $c < "\xc0") {
101 101
             // Legal tail bytes are nice.
102 102
             $sequence .= $c;
103
-          }
104
-          else {
103
+            }
104
+            else {
105 105
             if ($len == 0) {
106
-              // Premature end of string! Drop a replacement character into
107
-              // output to represent the invalid UTF-8 sequence.
108
-              $result .= $unknown;
109
-              break 2;
106
+                // Premature end of string! Drop a replacement character into
107
+                // output to represent the invalid UTF-8 sequence.
108
+                $result .= $unknown;
109
+                break 2;
110 110
             }
111 111
             else {
112
-              // Illegal tail byte; abandon the sequence.
113
-              $result .= $unknown;
114
-              // Back up and reprocess this byte; it may itself be a legal
115
-              // ASCII or UTF-8 sequence head.
116
-              --$i;
117
-              ++$len;
118
-              continue 2;
112
+                // Illegal tail byte; abandon the sequence.
113
+                $result .= $unknown;
114
+                // Back up and reprocess this byte; it may itself be a legal
115
+                // ASCII or UTF-8 sequence head.
116
+                --$i;
117
+                ++$len;
118
+                continue 2;
119
+            }
119 120
             }
120
-          }
121 121
         } while (--$remaining);
122 122
 
123 123
         $n = ord($head);
124 124
         if ($n <= 0xdf) {
125
-          $ord = ($n - 192) * 64 + (ord($sequence[1]) - 128);
125
+            $ord = ($n - 192) * 64 + (ord($sequence[1]) - 128);
126 126
         }
127 127
         elseif ($n <= 0xef) {
128
-          $ord = ($n - 224) * 4096 + (ord($sequence[1]) - 128) * 64 + (ord($sequence[2]) - 128);
128
+            $ord = ($n - 224) * 4096 + (ord($sequence[1]) - 128) * 64 + (ord($sequence[2]) - 128);
129 129
         }
130 130
         elseif ($n <= 0xf7) {
131
-          $ord = ($n - 240) * 262144 + (ord($sequence[1]) - 128) * 4096 + (ord($sequence[2]) - 128) * 64 + (ord($sequence[3]) - 128);
131
+            $ord = ($n - 240) * 262144 + (ord($sequence[1]) - 128) * 4096 + (ord($sequence[2]) - 128) * 64 + (ord($sequence[3]) - 128);
132 132
         }
133 133
         elseif ($n <= 0xfb) {
134
-          $ord = ($n - 248) * 16777216 + (ord($sequence[1]) - 128) * 262144 + (ord($sequence[2]) - 128) * 4096 + (ord($sequence[3]) - 128) * 64 + (ord($sequence[4]) - 128);
134
+            $ord = ($n - 248) * 16777216 + (ord($sequence[1]) - 128) * 262144 + (ord($sequence[2]) - 128) * 4096 + (ord($sequence[3]) - 128) * 64 + (ord($sequence[4]) - 128);
135 135
         }
136 136
         elseif ($n <= 0xfd) {
137
-          $ord = ($n - 252) * 1073741824 + (ord($sequence[1]) - 128) * 16777216 + (ord($sequence[2]) - 128) * 262144 + (ord($sequence[3]) - 128) * 4096 + (ord($sequence[4]) - 128) * 64 + (ord($sequence[5]) - 128);
137
+            $ord = ($n - 252) * 1073741824 + (ord($sequence[1]) - 128) * 16777216 + (ord($sequence[2]) - 128) * 262144 + (ord($sequence[3]) - 128) * 4096 + (ord($sequence[4]) - 128) * 64 + (ord($sequence[5]) - 128);
138 138
         } else {
139
-          $ord = $n;
139
+            $ord = $n;
140 140
         }
141 141
         $result .= _transliteration_replace($ord, $unknown, $source_langcode);
142 142
         $head = '';
143
-      } elseif ($c < "\x80") {
143
+        } elseif ($c < "\x80") {
144 144
         // ASCII byte.
145 145
         $result .= $c;
146 146
         $head = '';
147
-      } elseif ($c < "\xc0") {
147
+        } elseif ($c < "\xc0") {
148 148
         // Illegal tail bytes.
149 149
         if ($head == '') {
150
-          $result .= $unknown;
150
+            $result .= $unknown;
151 151
         }
152
-      } else {
152
+        } else {
153 153
         // Miscellaneous freaks.
154 154
         $result .= $unknown;
155 155
         $head = '';
156
-      }
156
+        }
157 157
     }
158
-  }
159
-  return $result;
158
+    }
159
+    return $result;
160 160
 }
161 161
 
162 162
 /**
@@ -175,38 +175,38 @@  discard block
 block discarded – undo
175 175
  *   ASCII replacement character.
176 176
  */
177 177
 function _transliteration_replace($ord, $unknown = '?', $langcode = NULL) {
178
-  static $map = array();
178
+    static $map = array();
179 179
 
180
-  //GL: set language later
181
-  /*
180
+    //GL: set language later
181
+    /*
182 182
   if (!isset($langcode)) {
183 183
     global $language;
184 184
     $langcode = $language->language;
185 185
   }
186 186
   */
187 187
 
188
-  $bank = $ord >> 8;
188
+    $bank = $ord >> 8;
189 189
 
190
-  if (!isset($map[$bank][$langcode])) {
190
+    if (!isset($map[$bank][$langcode])) {
191 191
     $file = './resources/transliteration-data/' . sprintf('x%02x', $bank) . '.php';  
192 192
     if (file_exists($file)) {
193
-      $base = array();
194
-      $variant = array();
195
-      include $file;
196
-      if ($langcode != 'en' && isset($variant[$langcode])) {
193
+        $base = array();
194
+        $variant = array();
195
+        include $file;
196
+        if ($langcode != 'en' && isset($variant[$langcode])) {
197 197
         // Merge in language specific mappings.
198 198
         $map[$bank][$langcode] = $variant[$langcode] + $base;
199
-      }
200
-      else {
199
+        }
200
+        else {
201 201
         $map[$bank][$langcode] = $base;
202
-      }
202
+        }
203 203
     }
204 204
     else {
205
-      $map[$bank][$langcode] = array();
205
+        $map[$bank][$langcode] = array();
206
+    }
206 207
     }
207
-  }
208 208
 
209
-  $ord = $ord & 255;
209
+    $ord = $ord & 255;
210 210
 
211
-  return isset($map[$bank][$langcode][$ord]) ? $map[$bank][$langcode][$ord] : $unknown;
211
+    return isset($map[$bank][$langcode][$ord]) ? $map[$bank][$langcode][$ord] : $unknown;
212 212
 }
Please login to merge, or discard this patch.
Braces   +18 added lines, -30 removed lines patch added patch discarded remove patch
@@ -25,7 +25,8 @@  discard block
 block discarded – undo
25 25
  * @return
26 26
  *   Transliterated text.
27 27
  */
28
-function _transliteration_process($string, $unknown = '?', $source_langcode = NULL) {
28
+function _transliteration_process($string, $unknown = '?', $source_langcode = NULL)
29
+{
29 30
   // ASCII is always valid NFC! If we're only ever given plain ASCII, we can
30 31
   // avoid the overhead of initializing the decomposition tables by skipping
31 32
   // out early.
@@ -41,23 +42,17 @@  discard block
 block discarded – undo
41 42
     for ($n = 0; $n < 256; $n++) {
42 43
       if ($n < 0xc0) {
43 44
         $remaining = 0;
44
-      }
45
-      elseif ($n < 0xe0) {
45
+      } elseif ($n < 0xe0) {
46 46
         $remaining = 1;
47
-      }
48
-      elseif ($n < 0xf0) {
47
+      } elseif ($n < 0xf0) {
49 48
         $remaining = 2;
50
-      }
51
-      elseif ($n < 0xf8) {
49
+      } elseif ($n < 0xf8) {
52 50
         $remaining = 3;
53
-      }
54
-      elseif ($n < 0xfc) {
51
+      } elseif ($n < 0xfc) {
55 52
         $remaining = 4;
56
-      }
57
-      elseif ($n < 0xfe) {
53
+      } elseif ($n < 0xfe) {
58 54
         $remaining = 5;
59
-      }
60
-      else {
55
+      } else {
61 56
         $remaining = 0;
62 57
       }
63 58
       $tail_bytes[chr($n)] = $remaining;
@@ -100,15 +95,13 @@  discard block
 block discarded – undo
100 95
           if (--$len && ($c = $str[++$i]) >= "\x80" && $c < "\xc0") {
101 96
             // Legal tail bytes are nice.
102 97
             $sequence .= $c;
103
-          }
104
-          else {
98
+          } else {
105 99
             if ($len == 0) {
106 100
               // Premature end of string! Drop a replacement character into
107 101
               // output to represent the invalid UTF-8 sequence.
108 102
               $result .= $unknown;
109 103
               break 2;
110
-            }
111
-            else {
104
+            } else {
112 105
               // Illegal tail byte; abandon the sequence.
113 106
               $result .= $unknown;
114 107
               // Back up and reprocess this byte; it may itself be a legal
@@ -123,17 +116,13 @@  discard block
 block discarded – undo
123 116
         $n = ord($head);
124 117
         if ($n <= 0xdf) {
125 118
           $ord = ($n - 192) * 64 + (ord($sequence[1]) - 128);
126
-        }
127
-        elseif ($n <= 0xef) {
119
+        } elseif ($n <= 0xef) {
128 120
           $ord = ($n - 224) * 4096 + (ord($sequence[1]) - 128) * 64 + (ord($sequence[2]) - 128);
129
-        }
130
-        elseif ($n <= 0xf7) {
121
+        } elseif ($n <= 0xf7) {
131 122
           $ord = ($n - 240) * 262144 + (ord($sequence[1]) - 128) * 4096 + (ord($sequence[2]) - 128) * 64 + (ord($sequence[3]) - 128);
132
-        }
133
-        elseif ($n <= 0xfb) {
123
+        } elseif ($n <= 0xfb) {
134 124
           $ord = ($n - 248) * 16777216 + (ord($sequence[1]) - 128) * 262144 + (ord($sequence[2]) - 128) * 4096 + (ord($sequence[3]) - 128) * 64 + (ord($sequence[4]) - 128);
135
-        }
136
-        elseif ($n <= 0xfd) {
125
+        } elseif ($n <= 0xfd) {
137 126
           $ord = ($n - 252) * 1073741824 + (ord($sequence[1]) - 128) * 16777216 + (ord($sequence[2]) - 128) * 262144 + (ord($sequence[3]) - 128) * 4096 + (ord($sequence[4]) - 128) * 64 + (ord($sequence[5]) - 128);
138 127
         } else {
139 128
           $ord = $n;
@@ -174,7 +163,8 @@  discard block
 block discarded – undo
174 163
  * @return
175 164
  *   ASCII replacement character.
176 165
  */
177
-function _transliteration_replace($ord, $unknown = '?', $langcode = NULL) {
166
+function _transliteration_replace($ord, $unknown = '?', $langcode = NULL)
167
+{
178 168
   static $map = array();
179 169
 
180 170
   //GL: set language later
@@ -196,12 +186,10 @@  discard block
 block discarded – undo
196 186
       if ($langcode != 'en' && isset($variant[$langcode])) {
197 187
         // Merge in language specific mappings.
198 188
         $map[$bank][$langcode] = $variant[$langcode] + $base;
199
-      }
200
-      else {
189
+      } else {
201 190
         $map[$bank][$langcode] = $base;
202 191
       }
203
-    }
204
-    else {
192
+    } else {
205 193
       $map[$bank][$langcode] = array();
206 194
     }
207 195
   }
Please login to merge, or discard this patch.