1 | <?php |
||
16 | class SafeFN { |
||
17 | |||
18 | // 'safe' characters are a superset of $plain, $pre_indicator and $post_indicator |
||
19 | private static $plain = '-./[_0123456789abcdefghijklmnopqrstuvwxyz'; // these characters aren't converted |
||
20 | private static $pre_indicator = '%'; |
||
21 | private static $post_indicator = ']'; |
||
22 | |||
23 | /** |
||
24 | * Convert an UTF-8 string to a safe ASCII String |
||
25 | * |
||
26 | * conversion process |
||
27 | * - if codepoint is a plain or post_indicator character, |
||
28 | * - if previous character was "converted", append post_indicator to output, clear "converted" flag |
||
29 | * - append ascii byte for character to output |
||
30 | * (continue to next character) |
||
31 | * |
||
32 | * - if codepoint is a pre_indicator character, |
||
33 | * - append ascii byte for character to output, set "converted" flag |
||
34 | * (continue to next character) |
||
35 | * |
||
36 | * (all remaining characters) |
||
37 | * - reduce codepoint value for non-printable ASCII characters (0x00 - 0x1f). Space becomes our zero. |
||
38 | * - convert reduced value to base36 (0-9a-z) |
||
39 | * - append $pre_indicator characater followed by base36 string to output, set converted flag |
||
40 | * (continue to next character) |
||
41 | * |
||
42 | * @param string $filename a utf8 string, should only include printable characters - not 0x00-0x1f |
||
43 | * @return string an encoded representation of $filename using only 'safe' ASCII characters |
||
44 | * |
||
45 | * @author Christopher Smith <[email protected]> |
||
46 | */ |
||
47 | public static function encode($filename) { |
||
50 | |||
51 | /** |
||
52 | * decoding process |
||
53 | * - split the string into substrings at any occurrence of pre or post indicator characters |
||
54 | * - check the first character of the substring |
||
55 | * - if its not a pre_indicator character |
||
56 | * - if previous character was converted, skip over post_indicator character |
||
57 | * - copy codepoint values of remaining characters to the output array |
||
58 | * - clear any converted flag |
||
59 | * (continue to next substring) |
||
60 | * |
||
61 | * _ else (its a pre_indicator character) |
||
62 | * - if string length is 1, copy the post_indicator character to the output array |
||
63 | * (continue to next substring) |
||
64 | * |
||
65 | * - else (string length > 1) |
||
66 | * - skip the pre-indicator character and convert remaining string from base36 to base10 |
||
67 | * - increase codepoint value for non-printable ASCII characters (add 0x20) |
||
68 | * - append codepoint to output array |
||
69 | * (continue to next substring) |
||
70 | * |
||
71 | * @param string $filename a 'safe' encoded ASCII string, |
||
72 | * @return string decoded utf8 representation of $filename |
||
73 | * |
||
74 | * @author Christopher Smith <[email protected]> |
||
75 | */ |
||
76 | public static function decode($filename) { |
||
79 | |||
80 | public static function validatePrintableUtf8($printable_utf8) { |
||
83 | |||
84 | public static function validateSafe($safe) { |
||
87 | |||
88 | /** |
||
89 | * convert an array of unicode codepoints into 'safe_filename' format |
||
90 | * |
||
91 | * @param array int $unicode an array of unicode codepoints |
||
92 | * @return string the unicode represented in 'safe_filename' format |
||
93 | * |
||
94 | * @author Christopher Smith <[email protected]> |
||
95 | */ |
||
96 | private static function unicodeToSafe($unicode) { |
||
120 | |||
121 | /** |
||
122 | * convert a 'safe_filename' string into an array of unicode codepoints |
||
123 | * |
||
124 | * @param string $safe a filename in 'safe_filename' format |
||
125 | * @return array int an array of unicode codepoints |
||
126 | * |
||
127 | * @author Christopher Smith <[email protected]> |
||
128 | */ |
||
129 | private static function safeToUnicode($safe) { |
||
157 | |||
158 | } |
||
159 |