1
|
|
|
<?php /** @noinspection PhpInternalEntityUsedInspection */ |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
* citeproc-php |
5
|
|
|
* |
6
|
|
|
* @link http://github.com/seboettg/citeproc-php for the source repository |
7
|
|
|
* @copyright Copyright (c) 2016 Sebastian Böttger. |
8
|
|
|
* @license https://opensource.org/licenses/MIT |
9
|
|
|
*/ |
10
|
|
|
|
11
|
|
|
namespace Seboettg\CiteProc\Util; |
12
|
|
|
|
13
|
|
|
use Seboettg\CiteProc\CiteProc; |
14
|
|
|
use Seboettg\Collection\ArrayList; |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* Class StringHelper |
18
|
|
|
* @package Seboettg\CiteProc\Util |
19
|
|
|
* |
20
|
|
|
* @author Sebastian Böttger <[email protected]> |
21
|
|
|
*/ |
22
|
|
|
class StringHelper |
23
|
|
|
{ |
24
|
|
|
const PREPOSITIONS = [ |
25
|
|
|
'on', 'in', 'at', 'since', 'for', 'ago', 'before', 'to', 'past', 'till', 'until', 'by', 'under', 'below', |
26
|
|
|
'over', 'above', 'across', 'through', 'into', 'towards', 'onto', 'from', 'of', 'off', 'about', 'via' |
27
|
|
|
]; |
28
|
|
|
|
29
|
|
|
const ARTICLES = [ |
30
|
|
|
'a', 'an', 'the' |
31
|
|
|
]; |
32
|
|
|
|
33
|
|
|
const ADVERBS = [ |
34
|
|
|
'yet', 'so', 'just', 'only' |
35
|
|
|
]; |
36
|
|
|
|
37
|
|
|
const CONJUNCTIONS = [ |
38
|
|
|
'nor', 'so', 'and', 'or' |
39
|
|
|
]; |
40
|
|
|
|
41
|
|
|
const ADJECTIVES = [ |
42
|
|
|
'down', 'up' |
43
|
|
|
]; |
44
|
|
|
|
45
|
|
|
const ISO_ENCODINGS = [ |
46
|
|
|
'ISO-8859-1', |
47
|
|
|
'ISO-8859-2', |
48
|
|
|
'ISO-8859-3', |
49
|
|
|
'ISO-8859-4', |
50
|
|
|
'ISO-8859-5', |
51
|
|
|
'ISO-8859-6', |
52
|
|
|
'ISO-8859-7', |
53
|
|
|
'ISO-8859-8', |
54
|
|
|
'ISO-8859-9', |
55
|
|
|
'ISO-8859-10', |
56
|
|
|
'ISO-8859-13', |
57
|
|
|
'ISO-8859-14', |
58
|
|
|
'ISO-8859-15', |
59
|
|
|
'ISO-8859-16' |
60
|
|
|
]; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* opening quote sign |
64
|
|
|
*/ |
65
|
|
|
const OPENING_QUOTE = "“"; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* closing quote sign |
69
|
|
|
*/ |
70
|
|
|
const CLOSING_QUOTE = "”"; |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* @param $text |
74
|
|
|
* @return string |
75
|
|
|
*/ |
76
|
|
|
public static function capitalizeAll($text) |
77
|
|
|
{ |
78
|
|
|
$wordArray = explode(" ", $text); |
79
|
|
|
|
80
|
|
|
array_walk($wordArray, function (&$word) { |
81
|
|
|
$word = ucfirst($word); |
82
|
|
|
}); |
83
|
|
|
|
84
|
|
|
return implode(" ", array_filter($wordArray)); |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* @param $titleString |
89
|
|
|
* @return string |
90
|
|
|
*/ |
91
|
|
|
public static function capitalizeForTitle($titleString) |
92
|
|
|
{ |
93
|
|
|
if (strlen($titleString) == 0) { |
94
|
|
|
return ""; |
95
|
|
|
} |
96
|
|
|
if (preg_match('/(.+[^\<\>][\.:\/;\?\!]\s?)([a-z])(.+)/', $titleString, $match)) { |
97
|
|
|
$titleString = $match[1].StringHelper::mb_ucfirst($match[2]).$match[3]; |
98
|
|
|
} |
99
|
|
|
$pattern = "/(\s|\/)/"; |
100
|
|
|
if (!preg_match($pattern, $titleString, $matches)) { |
101
|
|
|
return StringHelper::mb_ucfirst($titleString); |
102
|
|
|
} |
103
|
|
|
$delimiter = $matches[1]; |
104
|
|
|
$wordArray = preg_split($pattern, $titleString); //explode(" ", $titleString); |
105
|
|
|
|
106
|
|
|
$wordList = new ArrayList(...$wordArray); |
107
|
|
|
return $wordList |
|
|
|
|
108
|
|
|
->map(function(string $word) { |
|
|
|
|
109
|
|
|
$wordParts = explode("-", $word); |
110
|
|
|
if (count($wordParts) > 1) { |
111
|
|
|
$casedWordParts = []; |
112
|
|
|
foreach ($wordParts as $w) { |
113
|
|
|
$casedWordParts[] = StringHelper::keepLowerCase($w) ? $w : StringHelper::mb_ucfirst($w); |
114
|
|
|
} |
115
|
|
|
$word = implode("-", $casedWordParts); |
116
|
|
|
} |
117
|
|
|
return StringHelper::keepLowerCase($word) ? $word : StringHelper::mb_ucfirst($word); |
118
|
|
|
}) |
119
|
|
|
->collectToString($delimiter); |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
/** |
123
|
|
|
* @param $word |
124
|
|
|
* @return bool |
125
|
|
|
*/ |
126
|
|
|
public static function keepLowerCase($word) |
127
|
|
|
{ |
128
|
|
|
// keep lower case if the first char is not an utf-8 letter |
129
|
|
|
return in_array($word, self::PREPOSITIONS) || |
130
|
|
|
in_array($word, self::ARTICLES) || |
131
|
|
|
in_array($word, self::CONJUNCTIONS) || |
132
|
|
|
in_array($word, self::ADJECTIVES) || |
133
|
|
|
(bool) preg_match("/[^\p{L}].+/", $word); |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* @param $string |
138
|
|
|
* @param string $encoding |
139
|
|
|
* @return string |
140
|
|
|
*/ |
141
|
|
|
// phpcs:disable |
142
|
|
|
public static function mb_ucfirst($string, $encoding = 'UTF-8') |
143
|
|
|
{// phpcs:enable |
144
|
|
|
$strlen = mb_strlen($string, $encoding); |
145
|
|
|
$firstChar = mb_substr($string, 0, 1, $encoding); |
146
|
|
|
$then = mb_substr($string, 1, $strlen - 1, $encoding); |
147
|
|
|
|
148
|
|
|
/** @noinspection PhpInternalEntityUsedInspection */ |
149
|
|
|
$encoding = mb_detect_encoding($firstChar, self::ISO_ENCODINGS, true); |
150
|
|
|
return in_array($encoding, self::ISO_ENCODINGS) ? |
151
|
|
|
mb_strtoupper($firstChar, $encoding).$then : $firstChar.$then; |
152
|
|
|
} |
153
|
|
|
// phpcs:disable |
154
|
|
|
public static function mb_strrev($string) |
155
|
|
|
{// phpcs:enable |
156
|
|
|
$result = ''; |
157
|
|
|
for ($i = mb_strlen($string); $i >= 0; --$i) { |
158
|
|
|
$result .= mb_substr($string, $i, 1); |
159
|
|
|
} |
160
|
|
|
return $result; |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
/** |
164
|
|
|
* @param string $delimiter |
165
|
|
|
* @param string[] $arrayOfStrings |
166
|
|
|
* @return string; |
167
|
|
|
*/ |
168
|
|
|
public static function implodeAndPreventConsecutiveChars($delimiter, $arrayOfStrings) |
169
|
|
|
{ |
170
|
|
|
$delim = trim($delimiter); |
171
|
|
|
if (!empty($delim)) { |
172
|
|
|
foreach ($arrayOfStrings as $key => $textPart) { |
173
|
|
|
$pos = mb_strpos(StringHelper::mb_strrev($textPart), StringHelper::mb_strrev($delim)); |
174
|
|
|
if ($pos === 0) { |
175
|
|
|
$length = mb_strlen($textPart) - mb_strlen($delim); |
176
|
|
|
$textPart = mb_substr($textPart, 0, $length); |
177
|
|
|
$arrayOfStrings[$key] = $textPart; |
178
|
|
|
} |
179
|
|
|
} |
180
|
|
|
} |
181
|
|
|
return implode($delimiter, array_filter($arrayOfStrings)); |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* @param $string |
186
|
|
|
* @param $initializeSign |
187
|
|
|
* @return string |
188
|
|
|
*/ |
189
|
|
|
public static function initializeBySpaceOrHyphen($string, $initializeSign) |
190
|
|
|
{ |
191
|
|
|
$initializeWithHyphen = CiteProc::getContext()->getGlobalOptions()->isInitializeWithHyphen(); |
192
|
|
|
$res = ""; |
193
|
|
|
$exploded = explode("-", $string); |
194
|
|
|
$i = 0; |
195
|
|
|
foreach ($exploded as $explode) { |
196
|
|
|
$spaceExploded = explode(" ", $explode); |
197
|
|
|
foreach ($spaceExploded as $givenPart) { |
198
|
|
|
$firstLetter = mb_substr($givenPart, 0, 1, "UTF-8"); |
199
|
|
|
if (StringHelper::isLatinString($firstLetter)) { |
200
|
|
|
$res .= ctype_upper($firstLetter) ? $firstLetter.$initializeSign : " ".$givenPart." "; |
201
|
|
|
} else { |
202
|
|
|
$res .= $firstLetter.$initializeSign; |
203
|
|
|
} |
204
|
|
|
} |
205
|
|
|
if ($i < count($exploded) - 1 && $initializeWithHyphen) { |
206
|
|
|
$res = rtrim($res)."-"; |
207
|
|
|
} |
208
|
|
|
++$i; |
209
|
|
|
} |
210
|
|
|
return $res; |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
/** |
214
|
|
|
* @param $string |
215
|
|
|
* @return mixed|string |
216
|
|
|
*/ |
217
|
|
|
public static function camelCase2Hyphen($string) |
218
|
|
|
{ |
219
|
|
|
$hyphenated = preg_replace("/([A-Z])/", "-$1", $string); |
220
|
|
|
$hyphenated = substr($hyphenated, 0, 1) === "-" ? substr($hyphenated, 1) : $hyphenated; |
221
|
|
|
return mb_strtolower($hyphenated); |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
/** |
225
|
|
|
* @param $string |
226
|
|
|
* @return bool |
227
|
|
|
*/ |
228
|
|
|
public static function checkLowerCaseString($string) |
229
|
|
|
{ |
230
|
|
|
return ($string === mb_strtolower($string)); |
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
/** |
234
|
|
|
* @param $string |
235
|
|
|
* @return bool |
236
|
|
|
*/ |
237
|
|
|
public static function checkUpperCaseString($string) |
238
|
|
|
{ |
239
|
|
|
return ($string === mb_strtoupper($string)); |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
/** |
243
|
|
|
* @param $string |
244
|
|
|
* @return mixed |
245
|
|
|
*/ |
246
|
|
|
public static function clearApostrophes($string) |
247
|
|
|
{ |
248
|
|
|
return preg_replace("/\'/", "’", $string); |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
/** |
252
|
|
|
* replaces outer quotes of $text by given inner quotes |
253
|
|
|
* |
254
|
|
|
* @param $text |
255
|
|
|
* @param $outerOpenQuote |
256
|
|
|
* @param $outerCloseQuote |
257
|
|
|
* @param $innerOpenQuote |
258
|
|
|
* @param $innerCloseQuote |
259
|
|
|
* @return string |
260
|
|
|
*/ |
261
|
|
|
public static function replaceOuterQuotes( |
262
|
|
|
$text, |
263
|
|
|
$outerOpenQuote, |
264
|
|
|
$outerCloseQuote, |
265
|
|
|
$innerOpenQuote, |
266
|
|
|
$innerCloseQuote |
267
|
|
|
) { |
268
|
|
|
if (preg_match("/(.*)$outerOpenQuote(.+)$outerCloseQuote(.*)/u", $text, $match)) { |
269
|
|
|
return $match[1].$innerOpenQuote.$match[2].$innerCloseQuote.$match[3]; |
270
|
|
|
} |
271
|
|
|
return $text; |
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
/** |
275
|
|
|
* @param $string |
276
|
|
|
* @return bool |
277
|
|
|
*/ |
278
|
|
|
public static function isLatinString($string) |
279
|
|
|
{ |
280
|
|
|
return boolval(preg_match_all("/^[\p{Latin}\p{Common}]+$/u", $string)); |
281
|
|
|
//return !$noLatin; |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
/** |
285
|
|
|
* @param $string |
286
|
|
|
* @return bool |
287
|
|
|
*/ |
288
|
|
|
public static function isCyrillicString($string) |
289
|
|
|
{ |
290
|
|
|
return boolval(preg_match("/^[\p{Cyrillic}\p{Common}]+$/u", $string)); |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
/** |
294
|
|
|
* @param $string |
295
|
|
|
* @return bool |
296
|
|
|
*/ |
297
|
|
|
public static function isAsianString($string) |
298
|
|
|
{ |
299
|
|
|
return boolval(preg_match("/^[\p{Han}\s\p{P}]*$/u", $string)); |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
/** |
303
|
|
|
* removes all kind of brackets from a given string |
304
|
|
|
* @param $datePart |
305
|
|
|
* @return mixed |
306
|
|
|
*/ |
307
|
|
|
public static function removeBrackets($datePart) |
308
|
|
|
{ |
309
|
|
|
return str_replace(["[", "]", "(", ")", "{", "}"], "", $datePart); |
310
|
|
|
} |
311
|
|
|
} |
312
|
|
|
|
This function has been deprecated. The supplier of the function has supplied an explanatory message.
The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead.