Passed
Push — master ( 982e46...a8ac91 )
by Timo
19:05
created

tokenizeByQuotesAndEscapeDependingOnContext()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 29
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 29
rs 8.439
c 0
b 0
f 0
cc 6
eloc 17
nc 5
nop 1
1
<?php
2
namespace ApacheSolrForTypo3\Solr\Domain\Search\Query\Helper;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 Timo Hund <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
/**
28
 * The EscpaeService is responsible to escape the querystring as ecpected for Apache Solr.
29
 *
30
 * @author Timo Hund <[email protected]>
31
 */
32
class EscapeService {
33
34
    /**
35
     * Quote and escape search strings
36
     *
37
     * @param string|int|double $string String to escape
38
     * @return string|int|double The escaped/quoted string
39
     */
40
    public function escape($string)
41
    {
42
        // when we have a numeric string only, nothing needs to be done
43
        if (is_numeric($string)) {
44
            return $string;
45
        }
46
47
        // when no whitespaces are in the query we can also just escape the special characters
48
        if (preg_match('/\W/', $string) != 1) {
49
            return $this->escapeSpecialCharacters($string);
50
        }
51
52
        // when there are no quotes inside the query string we can also just escape the whole string
53
        $hasQuotes = strrpos($string, '"') !== false;
54
        if (!$hasQuotes) {
55
            return $this->escapeSpecialCharacters($string);
56
        }
57
58
        $result = $this->tokenizeByQuotesAndEscapeDependingOnContext($string);
59
60
        return $result;
61
    }
62
63
    /**
64
     * This method is used to escape the content in the query string surrounded by quotes
65
     * different then when it is not in a quoted context.
66
     *
67
     * @param string $string
68
     * @return string
69
     */
70
    protected function tokenizeByQuotesAndEscapeDependingOnContext($string)
71
    {
72
        $result = '';
73
        $quotesCount = substr_count($string, '"');
74
        $isEvenAmountOfQuotes = $quotesCount % 2 === 0;
75
76
        // go over all quote segments and apply escapePhrase inside a quoted
77
        // context and escapeSpecialCharacters outside the quoted context.
78
        $segments = explode('"', $string);
79
        $segmentsIndex = 0;
80
        foreach ($segments as $segment) {
81
            $isInQuote = $segmentsIndex % 2 !== 0;
82
            $isLastQuote = $segmentsIndex === $quotesCount;
83
84
            if ($isLastQuote && !$isEvenAmountOfQuotes) {
85
                $result .= '\"';
86
            }
87
88
            if ($isInQuote && !$isLastQuote) {
89
                $result .= $this->escapePhrase($segment);
90
            } else {
91
                $result .= $this->escapeSpecialCharacters($segment);
92
            }
93
94
            $segmentsIndex++;
95
        }
96
97
        return $result;
98
    }
99
100
    /**
101
     * Escapes a value meant to be contained in a phrase with characters with
102
     * special meanings in Lucene query syntax.
103
     *
104
     * @param string $value Unescaped - "dirty" - string
105
     * @return string Escaped - "clean" - string
106
     */
107
    protected function escapePhrase($value)
108
    {
109
        $pattern = '/("|\\\)/';
110
        $replace = '\\\$1';
111
112
        return '"' . preg_replace($pattern, $replace, $value) . '"';
113
    }
114
115
    /**
116
     * Escapes characters with special meanings in Lucene query syntax.
117
     *
118
     * @param string $value Unescaped - "dirty" - string
119
     * @return string Escaped - "clean" - string
120
     */
121
    protected function escapeSpecialCharacters($value)
122
    {
123
        // list taken from http://lucene.apache.org/core/4_4_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#package_description
124
        // which mentions: + - && || ! ( ) { } [ ] ^ " ~ * ? : \ /
125
        // of which we escape: ( ) { } [ ] ^ " ~ : \ /
126
        // and explicitly don't escape: + - && || ! * ?
127
        $pattern = '/(\\(|\\)|\\{|\\}|\\[|\\]|\\^|"|~|\:|\\\\|\\/)/';
128
        $replace = '\\\$1';
129
130
        return preg_replace($pattern, $replace, $value);
131
    }
132
}