1 | <?php |
||
2 | |||
3 | /** |
||
4 | * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under |
||
5 | * the terms of the GPL-2 license. |
||
6 | * |
||
7 | * (c) Konrad Abicht <[email protected]> |
||
8 | * (c) Benjamin Nowack |
||
9 | * |
||
10 | * For the full copyright and license information, please view the LICENSE |
||
11 | * file that was distributed with this source code. |
||
12 | */ |
||
13 | |||
14 | namespace sweetrdf\InMemoryStoreSqlite; |
||
15 | |||
16 | function calcURI(string $path, ?string $base = null): string |
||
17 | { |
||
18 | /* quick check */ |
||
19 | 98 | if (preg_match("/^[a-z0-9\_]+\:/i", $path)) {/* abs path or bnode */ |
|
20 | 98 | return $path; |
|
21 | } |
||
22 | 3 | if (preg_match('/^\$\{.*\}/', $path)) {/* placeholder, assume abs URI */ |
|
23 | return $path; |
||
24 | } |
||
25 | 3 | if (preg_match("/^\/\//", $path)) {/* net path, assume http */ |
|
26 | return 'http:'.$path; |
||
27 | } |
||
28 | /* other URIs */ |
||
29 | 3 | $base = $base ?: NamespaceHelper::BASE_NAMESPACE; |
|
30 | 3 | $base = preg_replace('/\#.*$/', '', $base); |
|
31 | 3 | if (true === $path) {/* empty (but valid) URIref via turtle parser: <> */ |
|
0 ignored issues
–
show
introduced
by
![]() |
|||
32 | return $base; |
||
33 | } |
||
34 | 3 | $path = preg_replace("/^\.\//", '', $path); |
|
35 | 3 | $root = preg_match('/(^[a-z0-9]+\:[\/]{1,3}[^\/]+)[\/|$]/i', $base, $m) ? $m[1] : $base; /* w/o trailing slash */ |
|
36 | 3 | $base .= ($base == $root) ? '/' : ''; |
|
37 | 3 | if (preg_match('/^\//', $path)) {/* leading slash */ |
|
38 | return $root.$path; |
||
39 | } |
||
40 | 3 | if (!$path) { |
|
41 | return $base; |
||
42 | } |
||
43 | 3 | if (preg_match('/^([\#\?])/', $path, $m)) { |
|
44 | 1 | return preg_replace('/\\'.$m[1].'.*$/', '', $base).$path; |
|
45 | } |
||
46 | 2 | if (preg_match('/^(\&)(.*)$/', $path, $m)) {/* not perfect yet */ |
|
47 | return preg_match('/\?/', $base) ? $base.$m[1].$m[2] : $base.'?'.$m[2]; |
||
48 | } |
||
49 | 2 | if (preg_match("/^[a-z0-9]+\:/i", $path)) {/* abs path */ |
|
50 | return $path; |
||
51 | } |
||
52 | /* rel path: remove stuff after last slash */ |
||
53 | 2 | $base = substr($base, 0, strrpos($base, '/') + 1); |
|
54 | |||
55 | /* resolve ../ */ |
||
56 | 2 | while (preg_match('/^(\.\.\/)(.*)$/', $path, $m)) { |
|
57 | $path = $m[2]; |
||
58 | $base = ($base == $root.'/') ? $base : preg_replace('/^(.*\/)[^\/]+\/$/', '\\1', $base); |
||
59 | } |
||
60 | |||
61 | 2 | return $base.$path; |
|
62 | } |
||
63 | |||
64 | function calcBase(string $path): string |
||
65 | { |
||
66 | 2 | $r = $path; |
|
67 | 2 | $r = preg_replace('/\#.*$/', '', $r); /* remove hash */ |
|
68 | 2 | $r = preg_replace('/^\/\//', 'http://', $r); /* net path (//), assume http */ |
|
69 | 2 | if (preg_match('/^[a-z0-9]+\:/', $r)) {/* scheme, abs path */ |
|
70 | 2 | while (preg_match('/^(.+\/)(\.\.\/.*)$/U', $r, $m)) { |
|
71 | $r = calcURI($m[1], $m[2]); |
||
72 | } |
||
73 | |||
74 | 2 | return $r; |
|
75 | } |
||
76 | |||
77 | return 'file://'.realpath($r); /* real path */ |
||
78 | } |
||
79 | |||
80 | /** |
||
81 | * Normalize value for ORDER BY operations. |
||
82 | */ |
||
83 | function getNormalizedValue(string $val): string |
||
84 | { |
||
85 | /* try date (e.g. 21 August 2007) */ |
||
86 | if ( |
||
87 | 95 | preg_match('/^[0-9]{1,2}\s+[a-z]+\s+[0-9]{4}/i', $val) |
|
88 | 95 | && ($uts = strtotime($val)) |
|
89 | 95 | && (-1 !== $uts) |
|
90 | ) { |
||
91 | 1 | return (string) date("Y-m-d\TH:i:s", $uts); |
|
92 | } |
||
93 | |||
94 | /* xsd date (e.g. 2009-05-28T18:03:38+09:00 2009-05-28T18:03:38GMT) */ |
||
95 | 95 | if (true === (bool) strtotime($val)) { |
|
96 | 10 | return (string) date('Y-m-d\TH:i:s\Z', strtotime($val)); |
|
97 | } |
||
98 | |||
99 | 93 | if (is_numeric($val)) { |
|
100 | 29 | $val = sprintf('%f', $val); |
|
101 | 29 | if (preg_match("/([\-\+])([0-9]*)\.([0-9]*)/", $val, $m)) { |
|
102 | return $m[1].sprintf('%018s', $m[2]).'.'.sprintf('%-015s', $m[3]); |
||
103 | } |
||
104 | 29 | if (preg_match("/([0-9]*)\.([0-9]*)/", $val, $m)) { |
|
105 | 29 | return '+'.sprintf('%018s', $m[1]).'.'.sprintf('%-015s', $m[2]); |
|
106 | } |
||
107 | |||
108 | return $val; |
||
109 | } |
||
110 | |||
111 | /* any other string: remove tags, linebreaks etc., but keep MB-chars */ |
||
112 | // [\PL\s]+ ( = non-Letters) kills digits |
||
113 | 69 | $re = '/[\PL\s]+/isu'; |
|
0 ignored issues
–
show
|
|||
114 | 69 | $re = '/[\s\'\"\ยด\`]+/is'; |
|
115 | 69 | $val = trim(preg_replace($re, '-', strip_tags($val))); |
|
116 | 69 | if (\strlen($val) > 35) { |
|
117 | 5 | $fnc = \function_exists('mb_substr') ? 'mb_substr' : 'substr'; |
|
118 | 5 | $val = $fnc($val, 0, 17).'-'.$fnc($val, -17); |
|
119 | } |
||
120 | |||
121 | 69 | return $val; |
|
122 | } |
||
123 | |||
124 | /** |
||
125 | * @return array<string,string> |
||
126 | */ |
||
127 | function splitURI($v): array |
||
128 | { |
||
129 | /* |
||
130 | * the following namespaces may lead to conflated URIs, |
||
131 | * we have to set the split position manually |
||
132 | */ |
||
133 | if (strpos($v, 'www.w3.org')) { |
||
134 | /* |
||
135 | * @todo port to NamespaceHelper |
||
136 | */ |
||
137 | $specials = [ |
||
138 | 'http://www.w3.org/XML/1998/namespace', |
||
139 | 'http://www.w3.org/2005/Atom', |
||
140 | 'http://www.w3.org/1999/xhtml', |
||
141 | ]; |
||
142 | foreach ($specials as $ns) { |
||
143 | if (str_contains($v, $ns)) { |
||
144 | $local_part = substr($v, \strlen($ns)); |
||
145 | if (!preg_match('/^[\/\#]/', $local_part)) { |
||
146 | return [$ns, $local_part]; |
||
147 | } |
||
148 | } |
||
149 | } |
||
150 | } |
||
151 | /* auto-splitting on / or # */ |
||
152 | //$re = '^(.*?)([A-Z_a-z][-A-Z_a-z0-9.]*)$'; |
||
153 | if (preg_match('/^(.*[\/\#])([^\/\#]+)$/', $v, $m)) { |
||
154 | return [$m[1], $m[2]]; |
||
155 | } |
||
156 | /* auto-splitting on last special char, e.g. urn:foo:bar */ |
||
157 | if (preg_match('/^(.*[\:\/])([^\:\/]+)$/', $v, $m)) { |
||
158 | return [$m[1], $m[2]]; |
||
159 | } |
||
160 | |||
161 | return [$v, '']; |
||
162 | } |
||
163 |