sweetrdf /
in-memory-store-sqlite
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * This file is part of the sweetrdf/InMemoryStoreSqlite package and licensed under |
||
| 5 | * the terms of the GPL-2 license. |
||
| 6 | * |
||
| 7 | * (c) Konrad Abicht <[email protected]> |
||
| 8 | * (c) Benjamin Nowack |
||
| 9 | * |
||
| 10 | * For the full copyright and license information, please view the LICENSE |
||
| 11 | * file that was distributed with this source code. |
||
| 12 | */ |
||
| 13 | |||
| 14 | namespace sweetrdf\InMemoryStoreSqlite; |
||
| 15 | |||
| 16 | function calcURI(string $path, ?string $base = null): string |
||
| 17 | { |
||
| 18 | /* quick check */ |
||
| 19 | 98 | if (preg_match("/^[a-z0-9\_]+\:/i", $path)) {/* abs path or bnode */ |
|
| 20 | 98 | return $path; |
|
| 21 | } |
||
| 22 | 3 | if (preg_match('/^\$\{.*\}/', $path)) {/* placeholder, assume abs URI */ |
|
| 23 | return $path; |
||
| 24 | } |
||
| 25 | 3 | if (preg_match("/^\/\//", $path)) {/* net path, assume http */ |
|
| 26 | return 'http:'.$path; |
||
| 27 | } |
||
| 28 | /* other URIs */ |
||
| 29 | 3 | $base = $base ?: NamespaceHelper::BASE_NAMESPACE; |
|
| 30 | 3 | $base = preg_replace('/\#.*$/', '', $base); |
|
| 31 | 3 | if (true === $path) {/* empty (but valid) URIref via turtle parser: <> */ |
|
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 32 | return $base; |
||
| 33 | } |
||
| 34 | 3 | $path = preg_replace("/^\.\//", '', $path); |
|
| 35 | 3 | $root = preg_match('/(^[a-z0-9]+\:[\/]{1,3}[^\/]+)[\/|$]/i', $base, $m) ? $m[1] : $base; /* w/o trailing slash */ |
|
| 36 | 3 | $base .= ($base == $root) ? '/' : ''; |
|
| 37 | 3 | if (preg_match('/^\//', $path)) {/* leading slash */ |
|
| 38 | return $root.$path; |
||
| 39 | } |
||
| 40 | 3 | if (!$path) { |
|
| 41 | return $base; |
||
| 42 | } |
||
| 43 | 3 | if (preg_match('/^([\#\?])/', $path, $m)) { |
|
| 44 | 1 | return preg_replace('/\\'.$m[1].'.*$/', '', $base).$path; |
|
| 45 | } |
||
| 46 | 2 | if (preg_match('/^(\&)(.*)$/', $path, $m)) {/* not perfect yet */ |
|
| 47 | return preg_match('/\?/', $base) ? $base.$m[1].$m[2] : $base.'?'.$m[2]; |
||
| 48 | } |
||
| 49 | 2 | if (preg_match("/^[a-z0-9]+\:/i", $path)) {/* abs path */ |
|
| 50 | return $path; |
||
| 51 | } |
||
| 52 | /* rel path: remove stuff after last slash */ |
||
| 53 | 2 | $base = substr($base, 0, strrpos($base, '/') + 1); |
|
| 54 | |||
| 55 | /* resolve ../ */ |
||
| 56 | 2 | while (preg_match('/^(\.\.\/)(.*)$/', $path, $m)) { |
|
| 57 | $path = $m[2]; |
||
| 58 | $base = ($base == $root.'/') ? $base : preg_replace('/^(.*\/)[^\/]+\/$/', '\\1', $base); |
||
| 59 | } |
||
| 60 | |||
| 61 | 2 | return $base.$path; |
|
| 62 | } |
||
| 63 | |||
| 64 | function calcBase(string $path): string |
||
| 65 | { |
||
| 66 | 2 | $r = $path; |
|
| 67 | 2 | $r = preg_replace('/\#.*$/', '', $r); /* remove hash */ |
|
| 68 | 2 | $r = preg_replace('/^\/\//', 'http://', $r); /* net path (//), assume http */ |
|
| 69 | 2 | if (preg_match('/^[a-z0-9]+\:/', $r)) {/* scheme, abs path */ |
|
| 70 | 2 | while (preg_match('/^(.+\/)(\.\.\/.*)$/U', $r, $m)) { |
|
| 71 | $r = calcURI($m[1], $m[2]); |
||
| 72 | } |
||
| 73 | |||
| 74 | 2 | return $r; |
|
| 75 | } |
||
| 76 | |||
| 77 | return 'file://'.realpath($r); /* real path */ |
||
| 78 | } |
||
| 79 | |||
| 80 | /** |
||
| 81 | * Normalize value for ORDER BY operations. |
||
| 82 | */ |
||
| 83 | function getNormalizedValue(string $val): string |
||
| 84 | { |
||
| 85 | /* try date (e.g. 21 August 2007) */ |
||
| 86 | if ( |
||
| 87 | 95 | preg_match('/^[0-9]{1,2}\s+[a-z]+\s+[0-9]{4}/i', $val) |
|
| 88 | 95 | && ($uts = strtotime($val)) |
|
| 89 | 95 | && (-1 !== $uts) |
|
| 90 | ) { |
||
| 91 | 1 | return (string) date("Y-m-d\TH:i:s", $uts); |
|
| 92 | } |
||
| 93 | |||
| 94 | /* xsd date (e.g. 2009-05-28T18:03:38+09:00 2009-05-28T18:03:38GMT) */ |
||
| 95 | 95 | if (true === (bool) strtotime($val)) { |
|
| 96 | 10 | return (string) date('Y-m-d\TH:i:s\Z', strtotime($val)); |
|
| 97 | } |
||
| 98 | |||
| 99 | 93 | if (is_numeric($val)) { |
|
| 100 | 29 | $val = sprintf('%f', $val); |
|
| 101 | 29 | if (preg_match("/([\-\+])([0-9]*)\.([0-9]*)/", $val, $m)) { |
|
| 102 | return $m[1].sprintf('%018s', $m[2]).'.'.sprintf('%-015s', $m[3]); |
||
| 103 | } |
||
| 104 | 29 | if (preg_match("/([0-9]*)\.([0-9]*)/", $val, $m)) { |
|
| 105 | 29 | return '+'.sprintf('%018s', $m[1]).'.'.sprintf('%-015s', $m[2]); |
|
| 106 | } |
||
| 107 | |||
| 108 | return $val; |
||
| 109 | } |
||
| 110 | |||
| 111 | /* any other string: remove tags, linebreaks etc., but keep MB-chars */ |
||
| 112 | // [\PL\s]+ ( = non-Letters) kills digits |
||
| 113 | 69 | $re = '/[\PL\s]+/isu'; |
|
|
0 ignored issues
–
show
|
|||
| 114 | 69 | $re = '/[\s\'\"\ยด\`]+/is'; |
|
| 115 | 69 | $val = trim(preg_replace($re, '-', strip_tags($val))); |
|
| 116 | 69 | if (\strlen($val) > 35) { |
|
| 117 | 5 | $fnc = \function_exists('mb_substr') ? 'mb_substr' : 'substr'; |
|
| 118 | 5 | $val = $fnc($val, 0, 17).'-'.$fnc($val, -17); |
|
| 119 | } |
||
| 120 | |||
| 121 | 69 | return $val; |
|
| 122 | } |
||
| 123 | |||
| 124 | /** |
||
| 125 | * @return array<string,string> |
||
| 126 | */ |
||
| 127 | function splitURI($v): array |
||
| 128 | { |
||
| 129 | /* |
||
| 130 | * the following namespaces may lead to conflated URIs, |
||
| 131 | * we have to set the split position manually |
||
| 132 | */ |
||
| 133 | if (strpos($v, 'www.w3.org')) { |
||
| 134 | /* |
||
| 135 | * @todo port to NamespaceHelper |
||
| 136 | */ |
||
| 137 | $specials = [ |
||
| 138 | 'http://www.w3.org/XML/1998/namespace', |
||
| 139 | 'http://www.w3.org/2005/Atom', |
||
| 140 | 'http://www.w3.org/1999/xhtml', |
||
| 141 | ]; |
||
| 142 | foreach ($specials as $ns) { |
||
| 143 | if (str_contains($v, $ns)) { |
||
| 144 | $local_part = substr($v, \strlen($ns)); |
||
| 145 | if (!preg_match('/^[\/\#]/', $local_part)) { |
||
| 146 | return [$ns, $local_part]; |
||
| 147 | } |
||
| 148 | } |
||
| 149 | } |
||
| 150 | } |
||
| 151 | /* auto-splitting on / or # */ |
||
| 152 | //$re = '^(.*?)([A-Z_a-z][-A-Z_a-z0-9.]*)$'; |
||
| 153 | if (preg_match('/^(.*[\/\#])([^\/\#]+)$/', $v, $m)) { |
||
| 154 | return [$m[1], $m[2]]; |
||
| 155 | } |
||
| 156 | /* auto-splitting on last special char, e.g. urn:foo:bar */ |
||
| 157 | if (preg_match('/^(.*[\:\/])([^\:\/]+)$/', $v, $m)) { |
||
| 158 | return [$m[1], $m[2]]; |
||
| 159 | } |
||
| 160 | |||
| 161 | return [$v, '']; |
||
| 162 | } |
||
| 163 |