Test Failed
Pull Request — master (#611)
by
unknown
02:09
created

PDFDocEncoding::convertPDFDoc2UTF8()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 140
Code Lines 138

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 138
nc 1
nop 1
dl 0
loc 140
rs 8
c 1
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * @file
5
 *          This file is part of the PdfParser library.
6
 *
7
 * @author  Sébastien MALOT <[email protected]>
8
 *
9
 * @date    2017-01-03
10
 *
11
 * @license LGPLv3
12
 *
13
 * @url     <https://github.com/smalot/pdfparser>
14
 *
15
 *  PdfParser is a pdf library written in PHP, extraction oriented.
16
 *  Copyright (C) 2017 - Sébastien MALOT <[email protected]>
17
 *
18
 *  This program is free software: you can redistribute it and/or modify
19
 *  it under the terms of the GNU Lesser General Public License as published by
20
 *  the Free Software Foundation, either version 3 of the License, or
21
 *  (at your option) any later version.
22
 *
23
 *  This program is distributed in the hope that it will be useful,
24
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
25
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
 *  GNU Lesser General Public License for more details.
27
 *
28
 *  You should have received a copy of the GNU Lesser General Public License
29
 *  along with this program.
30
 *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
31
 */
32
33
// Source : https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/pdfreference1.2.pdf
34
// Source : https://ia801001.us.archive.org/1/items/pdf1.7/pdf_reference_1-7.pdf
35
36
namespace Smalot\PdfParser\Encoding;
37
38
/**
39
 * Class PDFDocEncoding
40
 */
41
abstract class PDFDocEncoding extends AbstractEncoding
42
{
43
    public static function convertPDFDoc2UTF8(string $content): string
44
    {
45
        return strtr($content, array(
46
            "\x18" => "\u{02d8}", // breve
47
            "\x19" => "\u{02c7}", // caron
48
            "\x1a" => "\u{02c6}", // circumflex
49
            "\x1b" => "\u{02d9}", // dotaccent
50
            "\x1c" => "\u{02dd}", // hungarumlaut
51
            "\x1d" => "\u{02db}", // ogonek
52
            "\x1e" => "\u{02de}", // ring
53
            "\x1f" => "\u{02dc}", // tilde
54
            "\x7f" => '',
55
            "\x80" => "\u{2022}", // bullet
56
            "\x81" => "\u{2020}", // dagger
57
            "\x82" => "\u{2021}", // daggerdbl
58
            "\x83" => "\u{2026}", // ellipsis
59
            "\x84" => "\u{2014}", // emdash
60
            "\x85" => "\u{2013}", // endash
61
            "\x86" => "\u{0192}", // florin
62
            "\x87" => "\u{2044}", // fraction
63
            "\x88" => "\u{2039}", // guilsinglleft
64
            "\x89" => "\u{203a}", // guilsinglright
65
            "\x8a" => "\u{2212}", // minus
66
            "\x8b" => "\u{2030}", // perthousand
67
            "\x8c" => "\u{201e}", // quotedblbase
68
            "\x8d" => "\u{201c}", // quotedblleft
69
            "\x8e" => "\u{201d}", // quotedblright
70
            "\x8f" => "\u{2018}", // quoteleft
71
            "\x90" => "\u{2019}", // quoteright
72
            "\x91" => "\u{201a}", // quotesinglbase
73
            "\x92" => "\u{2122}", // trademark
74
            "\x93" => "\u{fb01}", // fi
75
            "\x94" => "\u{fb02}", // fl
76
            "\x95" => "\u{0141}", // Lslash
77
            "\x96" => "\u{0152}", // OE
78
            "\x97" => "\u{0160}", // Scaron
79
            "\x98" => "\u{0178}", // Ydieresis
80
            "\x99" => "\u{017d}", // Zcaron
81
            "\x9a" => "\u{0131}", // dotlessi
82
            "\x9b" => "\u{0142}", // lslash
83
            "\x9c" => "\u{0153}", // oe
84
            "\x9d" => "\u{0161}", // scaron
85
            "\x9e" => "\u{017e}", // zcaron
86
            "\x9f" => '',
87
            "\xa0" => "\u{20ac}", // Euro
88
            "\xa1" => "\u{00a1}", // exclamdown
89
            "\xa2" => "\u{00a2}", // cent
90
            "\xa3" => "\u{00a3}", // sterling
91
            "\xa4" => "\u{00a4}", // currency
92
            "\xa5" => "\u{00a5}", // yen
93
            "\xa6" => "\u{00a6}", // brokenbar
94
            "\xa7" => "\u{00a7}", // section
95
            "\xa8" => "\u{00a8}", // dieresis
96
            "\xa9" => "\u{00a9}", // copyright
97
            "\xaa" => "\u{00aa}", // ordfeminine
98
            "\xab" => "\u{00ab}", // guillemotleft
99
            "\xac" => "\u{00ac}", // logicalnot
100
            "\xad" => '',
101
            "\xae" => "\u{00ae}", // registered
102
            "\xaf" => "\u{00af}", // macron
103
            "\xb0" => "\u{00b0}", // degree
104
            "\xb1" => "\u{00b1}", // plusminus
105
            "\xb2" => "\u{00b2}", // twosuperior
106
            "\xb3" => "\u{00b3}", // threesuperior
107
            "\xb4" => "\u{00b4}", // acute
108
            "\xb5" => "\u{00b5}", // mu
109
            "\xb6" => "\u{00b6}", // paragraph
110
            "\xb7" => "\u{00b7}", // periodcentered
111
            "\xb8" => "\u{00b8}", // cedilla
112
            "\xb9" => "\u{00b9}", // onesuperior
113
            "\xba" => "\u{00ba}", // ordmasculine
114
            "\xbb" => "\u{00bb}", // guillemotright
115
            "\xbc" => "\u{00bc}", // onequarter
116
            "\xbd" => "\u{00bd}", // onehalf
117
            "\xbe" => "\u{00be}", // threequarters
118
            "\xbf" => "\u{00bf}", // questiondown
119
            "\xc0" => "\u{00c0}", // Agrave
120
            "\xc1" => "\u{00c1}", // Aacute
121
            "\xc2" => "\u{00c2}", // Acircumflex
122
            "\xc3" => "\u{00c3}", // Atilde
123
            "\xc4" => "\u{00c4}", // Adieresis
124
            "\xc5" => "\u{00c5}", // Aring
125
            "\xc6" => "\u{00c6}", // AE
126
            "\xc7" => "\u{00c7}", // Ccedill
127
            "\xc8" => "\u{00c8}", // Egrave
128
            "\xc9" => "\u{00c9}", // Eacute
129
            "\xca" => "\u{00ca}", // Ecircumflex
130
            "\xcb" => "\u{00cb}", // Edieresis
131
            "\xcc" => "\u{00cc}", // Igrave
132
            "\xcd" => "\u{00cd}", // Iacute
133
            "\xce" => "\u{00ce}", // Icircumflex
134
            "\xcf" => "\u{00cf}", // Idieresis
135
            "\xd0" => "\u{00d0}", // Eth
136
            "\xd1" => "\u{00d1}", // Ntilde
137
            "\xd2" => "\u{00d2}", // Ograve
138
            "\xd3" => "\u{00d3}", // Oacute
139
            "\xd4" => "\u{00d4}", // Ocircumflex
140
            "\xd5" => "\u{00d5}", // Otilde
141
            "\xd6" => "\u{00d6}", // Odieresis
142
            "\xd7" => "\u{00d7}", // multiply
143
            "\xd8" => "\u{00d8}", // Oslash
144
            "\xd9" => "\u{00d9}", // Ugrave
145
            "\xda" => "\u{00da}", // Uacute
146
            "\xdb" => "\u{00db}", // Ucircumflex
147
            "\xdc" => "\u{00dc}", // Udieresis
148
            "\xdd" => "\u{00dd}", // Yacute
149
            "\xde" => "\u{00de}", // Thorn
150
            "\xdf" => "\u{00df}", // germandbls
151
            "\xe0" => "\u{00e0}", // agrave
152
            "\xe1" => "\u{00e1}", // aacute
153
            "\xe2" => "\u{00e2}", // acircumflex
154
            "\xe3" => "\u{00e3}", // atilde
155
            "\xe4" => "\u{00e4}", // adieresis
156
            "\xe5" => "\u{00e5}", // aring
157
            "\xe6" => "\u{00e6}", // ae
158
            "\xe7" => "\u{00e7}", // ccedilla
159
            "\xe8" => "\u{00e8}", // egrave
160
            "\xe9" => "\u{00e9}", // eacute
161
            "\xea" => "\u{00ea}", // ecircumflex
162
            "\xeb" => "\u{00eb}", // edieresis
163
            "\xec" => "\u{00ec}", // igrave
164
            "\xed" => "\u{00ed}", // iacute
165
            "\xee" => "\u{00ee}", // icircumflex
166
            "\xef" => "\u{00ef}", // idieresis
167
            "\xf0" => "\u{00f0}", // eth
168
            "\xf1" => "\u{00f1}", // ntilde
169
            "\xf2" => "\u{00f2}", // ograve
170
            "\xf3" => "\u{00f3}", // oacute
171
            "\xf4" => "\u{00f4}", // ocircumflex
172
            "\xf5" => "\u{00f5}", // otilde
173
            "\xf6" => "\u{00f6}", // odieresis
174
            "\xf7" => "\u{00f7}", // divide
175
            "\xf8" => "\u{00f8}", // oslash
176
            "\xf9" => "\u{00f9}", // ugrave
177
            "\xfa" => "\u{00fa}", // uacute
178
            "\xfb" => "\u{00fb}", // ucircumflex
179
            "\xfc" => "\u{00fc}", // udieresis
180
            "\xfd" => "\u{00fd}", // yacute
181
            "\xfe" => "\u{00fe}", // thorn
182
            "\xff" => "\u{00ff}"  // ydieresis
183
        ));
184
    }
185
}
186