Completed
Push — master ( feca12...39afc0 )
by Sebastian
05:35
created

StringHelper::keepLowerCase()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 10
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 5
eloc 7
c 1
b 0
f 0
nc 5
nop 1
dl 0
loc 10
rs 8.8571
1
<?php
2
/*
3
 * citeproc-php
4
 *
5
 * @link        http://github.com/seboettg/citeproc-php for the source repository
6
 * @copyright   Copyright (c) 2016 Sebastian Böttger.
7
 * @license     https://opensource.org/licenses/MIT
8
 */
9
10
namespace Seboettg\CiteProc\Util;
11
12
use Seboettg\CiteProc\CiteProc;
13
use Symfony\Polyfill\Mbstring\Mbstring;
14
15
/**
16
 * Class StringHelper
17
 * @package Seboettg\CiteProc\Util
18
 *
19
 * @author Sebastian Böttger <[email protected]>
20
 */
21
class StringHelper
22
{
23
24
    const PREPOSITIONS = [
25
        'on', 'in', 'at', 'since', 'for', 'ago', 'before', 'to', 'past', 'till', 'until', 'by', 'under', 'below', 'over',
26
        'above', 'across', 'through', 'into', 'towards', 'onto', 'from', 'of', 'off', 'about', 'via'
27
    ];
28
29
    const ARTICLES = [
30
        'a', 'an', 'the'
31
    ];
32
33
    const ADVERBS = [
34
        'yet', 'so', 'just', 'only'
35
    ];
36
37
    const CONJUNCTIONS = [
38
        'nor', 'so', 'and', 'or'
39
    ];
40
41
    const ADJECTIVES = [
42
        'down', 'up'
43
    ];
44
45
    const ISO_ENCODINGS = [
46
        'ISO-8859-1',
47
        'ISO-8859-2',
48
        'ISO-8859-3',
49
        'ISO-8859-4',
50
        'ISO-8859-5',
51
        'ISO-8859-6',
52
        'ISO-8859-7',
53
        'ISO-8859-8',
54
        'ISO-8859-9',
55
        'ISO-8859-10',
56
        'ISO-8859-11',
57
        'ISO-8859-13',
58
        'ISO-8859-14',
59
        'ISO-8859-15',
60
        'ISO-8859-16'
61
    ];
62
63
    /**
64
     * opening quote sign
65
     */
66
    const OPENING_QUOTE = "“";
67
68
    /**
69
     * closing quote sign
70
     */
71
    const CLOSING_QUOTE = "”";
72
73
    /**
74
     * @param $text
75
     * @return string
76
     */
77
    public static function capitalizeAll($text)
78
    {
79
        $wordArray = explode(" ", $text);
80
81
        array_walk($wordArray, function(&$word) {
82
            $word = ucfirst($word);
83
        });
84
85
        return implode(" ", $wordArray);
86
    }
87
88
    /**
89
     * @param $titleString
90
     * @return string
91
     */
92
    public static function capitalizeForTitle($titleString)
93
    {
94
        if (preg_match('/(.+[^\<\>][\.:\/;\?\!]\s?)([a-z])(.+)/', $titleString, $match)) {
95
            $titleString = $match[1] . StringHelper::mb_ucfirst($match[2]) . $match[3];
0 ignored issues
show
Coding Style introduced by
As per coding style, self should be used for accessing local static members.

This check looks for accesses to local static members using the fully qualified name instead of self::.

<?php

class Certificate {
    const TRIPLEDES_CBC = 'ASDFGHJKL';

    private $key;

    public function __construct()
    {
        $this->key = Certificate::TRIPLEDES_CBC;
    }
}

While this is perfectly valid, the fully qualified name of Certificate::TRIPLEDES_CBC could just as well be replaced by self::TRIPLEDES_CBC. Referencing local members with self:: assured the access will still work when the class is renamed, makes it perfectly clear that the member is in fact local and will usually be shorter.

Loading history...
96
        }
97
98
        $wordArray = explode(" ", $titleString);
99
100
        array_walk($wordArray, function(&$word) {
101
102
            $words = explode("-", $word);
103
            if (count($words) > 1) {
104
                array_walk($words, function(&$w) {
105
                    $w = StringHelper::keepLowerCase($w) ? $w : StringHelper::mb_ucfirst($w);
106
                });
107
                $word = implode("-", $words);
108
            }
109
            $word = StringHelper::keepLowerCase($word) ? $word : StringHelper::mb_ucfirst($word);
110
        });
111
112
        return implode(" ", $wordArray);
113
    }
114
115
    /**
116
     * @param $word
117
     * @return bool
118
     */
119
    public static function keepLowerCase($word)
120
    {
121
        $lowerCase = in_array($word, self::PREPOSITIONS) ||
122
            in_array($word, self::ARTICLES) ||
123
            in_array($word, self::CONJUNCTIONS) ||
124
            in_array($word, self::ADJECTIVES) ||
125
            (bool) preg_match("/[^\p{L}].+/", $word); // keep lower case if the first char is not an utf-8 letter
126
        return $lowerCase;
127
128
    }
129
130
    /**
131
     * @param $string
132
     * @param string $encoding
133
     * @return string
134
     */
135
    public static function mb_ucfirst($string, $encoding = 'UTF-8')
0 ignored issues
show
Coding Style introduced by
This method is not in camel caps format.

This check looks for method names that are not written in camelCase.

In camelCase names are written without any punctuation, the start of each new word being marked by a capital letter. Thus the name database connection seeker becomes databaseConnectionSeeker.

Loading history...
136
    {
137
        $strlen = mb_strlen($string, $encoding);
138
        $firstChar = mb_substr($string, 0, 1, $encoding);
139
        $then = mb_substr($string, 1, $strlen - 1, $encoding);
140
141
        $encoding = Mbstring::mb_detect_encoding($firstChar, self::ISO_ENCODINGS, true);
142
        return in_array($encoding, self::ISO_ENCODINGS) ? Mbstring::mb_strtoupper($firstChar, $encoding) . $then : $firstChar . $then;
143
    }
144
145
    /**
146
     * @param $string
147
     * @param $initializeSign
148
     * @return string
149
     */
150
    public static function initializeBySpaceOrHyphen($string, $initializeSign)
151
    {
152
        $initializeWithHyphen = CiteProc::getContext()->getGlobalOptions()->isInitializeWithHyphen();
153
        $res = "";
154
        $exploded = explode("-", $string);
155
        $i = 0;
156
        foreach ($exploded as $explode) {
157
            $spaceExploded = explode(" ", $explode);
158
            foreach ($spaceExploded as $givenPart) {
159
                $firstLetter = mb_substr($givenPart, 0, 1, "UTF-8");
160
                if (StringHelper::isLatinString($firstLetter)) {
0 ignored issues
show
Coding Style introduced by
As per coding style, self should be used for accessing local static members.

This check looks for accesses to local static members using the fully qualified name instead of self::.

<?php

class Certificate {
    const TRIPLEDES_CBC = 'ASDFGHJKL';

    private $key;

    public function __construct()
    {
        $this->key = Certificate::TRIPLEDES_CBC;
    }
}

While this is perfectly valid, the fully qualified name of Certificate::TRIPLEDES_CBC could just as well be replaced by self::TRIPLEDES_CBC. Referencing local members with self:: assured the access will still work when the class is renamed, makes it perfectly clear that the member is in fact local and will usually be shorter.

Loading history...
161
                    $res .= ctype_upper($firstLetter) ? $firstLetter . $initializeSign : " " . $givenPart . " ";
162
                } else {
163
                    $res .= $firstLetter . $initializeSign;
164
                }
165
            }
166
            if ($i < count($exploded) - 1 && $initializeWithHyphen) {
167
                $res = rtrim($res) . "-";
168
            }
169
            ++$i;
170
        }
171
        return $res;
172
    }
173
174
    /**
175
     * @param $string
176
     * @return mixed|string
177
     */
178
    public static function camelCase2Hyphen($string)
179
    {
180
        $hyphenated = preg_replace("/([A-Z])/", "-$1", $string);
181
        $hyphenated = substr($hyphenated, 0, 1) === "-" ? substr($hyphenated, 1) : $hyphenated;
182
        return mb_strtolower($hyphenated);
183
    }
184
185
    /**
186
     * @param $string
187
     * @return bool
188
     */
189
    public static function checkLowerCaseString($string)
190
    {
191
        return ($string === mb_strtolower($string));
192
    }
193
194
    /**
195
     * @param $string
196
     * @return bool
197
     */
198
    public static function checkUpperCaseString($string)
199
    {
200
        return ($string === mb_strtoupper($string));
201
    }
202
203
    /**
204
     * @param $string
205
     * @return mixed
206
     */
207
    public static function clearApostrophes($string)
208
    {
209
        return preg_replace("/\'/", "’", $string);
210
    }
211
212
    /**
213
     * replaces outer quotes of $text by given inner quotes
214
     *
215
     * @param $text
216
     * @param $outerOpenQuote
217
     * @param $outerCloseQuote
218
     * @param $innerOpenQuote
219
     * @param $innerCloseQuote
220
     * @return string
221
     */
222
    public static function replaceOuterQuotes($text, $outerOpenQuote, $outerCloseQuote, $innerOpenQuote, $innerCloseQuote)
223
    {
224
        if (preg_match("/(.*)$outerOpenQuote(.+)$outerCloseQuote(.*)/u", $text, $match)) {
225
            return $match[1] . $innerOpenQuote . $match[2] . $innerCloseQuote . $match[3];
226
        }
227
        return $text;
228
    }
229
230
    /**
231
     * @param $string
232
     * @return bool
233
     */
234
    public static function isLatinString($string)
235
    {
236
        return boolval(preg_match_all("/^[\p{Latin}\s\p{P}]*$/u", $string));
237
        //return !$noLatin;
238
    }
239
240
    /**
241
     * @param $string
242
     * @return bool
243
     */
244
    public static function isCyrillicString($string)
245
    {
246
        return boolval(preg_match("/^[\p{Cyrillic}\s\p{P}]*$/u", $string));
247
    }
248
249
    /**
250
     * removes all kind of brackets from a given string
251
     * @param $datePart
252
     * @return mixed
253
     */
254
    public static function removeBrackets($datePart) {
255
        return str_replace(["[","]", "(", ")", "{", "}"], "", $datePart);
256
    }
257
}