|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Wikibase\QueryEngine; |
|
4
|
|
|
|
|
5
|
|
|
use InvalidArgumentException; |
|
6
|
|
|
|
|
7
|
|
|
/** |
|
8
|
|
|
* @licence GNU GPL v2+ |
|
9
|
|
|
* @author Jeroen De Dauw < [email protected] > |
|
10
|
|
|
* @author Thiemo Kreuz |
|
11
|
|
|
*/ |
|
12
|
|
|
class StringHasher { |
|
13
|
|
|
|
|
14
|
|
|
/** |
|
15
|
|
|
* A SHA1 hash is 20 binary bytes (or 40 hexadecimal characters). With BASE64 encoding this |
|
16
|
|
|
* becomes ceil( 20 * 8 / 6 ) = 27 ASCII characters. Since BASE64 must always be a multiple |
|
17
|
|
|
* of 4 it adds a meaningless "=" character. This adds no benefit to the hash (it would be |
|
18
|
|
|
* the same in all hashes) so we strip it. |
|
19
|
|
|
* |
|
20
|
|
|
* This leaves 63 - 27 = 36 raw (plain) characters from the original string. |
|
21
|
|
|
* |
|
22
|
|
|
* The 63 was an arbitrary decision (maximum 6 bit number). Could also be 65 or 70. |
|
23
|
|
|
*/ |
|
24
|
|
|
const LENGTH = 63; |
|
25
|
|
|
|
|
26
|
|
|
private $rawLength; |
|
27
|
|
|
private $hashedLength = 27; |
|
28
|
|
|
|
|
29
|
|
|
public function __construct() { |
|
30
|
|
|
$this->rawLength = self::LENGTH - $this->hashedLength; |
|
31
|
|
|
} |
|
32
|
|
|
|
|
33
|
|
|
/** |
|
34
|
|
|
* Returns a version of the string with maximum length 63. |
|
35
|
|
|
* The first 36 characters of the string are kept as-is in all cases. |
|
36
|
|
|
* If the string reaches the maximum length, the end of the space is |
|
37
|
|
|
* used for a hash that ensures uniqueness. |
|
38
|
|
|
* |
|
39
|
|
|
* @param string $string |
|
40
|
|
|
* |
|
41
|
|
|
* @return string |
|
42
|
|
|
* @throws InvalidArgumentException |
|
43
|
|
|
*/ |
|
44
|
|
|
public function hash( $string ) { |
|
45
|
|
|
if ( !is_string( $string ) ) { |
|
46
|
|
|
throw new InvalidArgumentException( '$string should be a string' ); |
|
47
|
|
|
} |
|
48
|
|
|
|
|
49
|
|
|
if ( strlen( $string ) >= self::LENGTH ) { |
|
50
|
|
|
return substr( $string, 0, $this->rawLength ) |
|
51
|
|
|
. substr( base64_encode( sha1( substr( $string, $this->rawLength ), true ) ), 0, $this->hashedLength ); |
|
52
|
|
|
} |
|
53
|
|
|
|
|
54
|
|
|
return $string; |
|
55
|
|
|
} |
|
56
|
|
|
|
|
57
|
|
|
} |
|
58
|
|
|
|