1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Wikibase\QueryEngine; |
4
|
|
|
|
5
|
|
|
use InvalidArgumentException; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* @licence GNU GPL v2+ |
9
|
|
|
* @author Jeroen De Dauw < [email protected] > |
10
|
|
|
* @author Thiemo Kreuz |
11
|
|
|
*/ |
12
|
|
|
class StringHasher { |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* A SHA1 hash is 20 binary bytes (or 40 hexadecimal characters). With BASE64 encoding this |
16
|
|
|
* becomes ceil( 20 * 8 / 6 ) = 27 ASCII characters. Since BASE64 must always be a multiple |
17
|
|
|
* of 4 it adds a meaningless "=" character. This adds no benefit to the hash (it would be |
18
|
|
|
* the same in all hashes) so we strip it. |
19
|
|
|
* |
20
|
|
|
* This leaves 63 - 27 = 36 raw (plain) characters from the original string. |
21
|
|
|
* |
22
|
|
|
* The 63 was an arbitrary decision (maximum 6 bit number). Could also be 65 or 70. |
23
|
|
|
*/ |
24
|
|
|
const LENGTH = 63; |
25
|
|
|
|
26
|
|
|
private $rawLength; |
27
|
|
|
private $hashedLength = 27; |
28
|
|
|
|
29
|
|
|
public function __construct() { |
30
|
|
|
$this->rawLength = self::LENGTH - $this->hashedLength; |
31
|
|
|
} |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Returns a version of the string with maximum length 63. |
35
|
|
|
* The first 36 characters of the string are kept as-is in all cases. |
36
|
|
|
* If the string reaches the maximum length, the end of the space is |
37
|
|
|
* used for a hash that ensures uniqueness. |
38
|
|
|
* |
39
|
|
|
* @param string $string |
40
|
|
|
* |
41
|
|
|
* @return string |
42
|
|
|
* @throws InvalidArgumentException |
43
|
|
|
*/ |
44
|
|
|
public function hash( $string ) { |
45
|
|
|
if ( !is_string( $string ) ) { |
46
|
|
|
throw new InvalidArgumentException( '$string should be a string' ); |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
if ( strlen( $string ) >= self::LENGTH ) { |
50
|
|
|
return substr( $string, 0, $this->rawLength ) |
51
|
|
|
. substr( base64_encode( sha1( substr( $string, $this->rawLength ), true ) ), 0, $this->hashedLength ); |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
return $string; |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
} |
58
|
|
|
|