|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Matecat\SubFiltering\Filters; |
|
4
|
|
|
|
|
5
|
|
|
use Matecat\SubFiltering\Commons\AbstractHandler; |
|
6
|
|
|
use Matecat\SubFiltering\Utils\Utils; |
|
7
|
|
|
|
|
8
|
|
|
class EncodeToRawXML extends AbstractHandler { |
|
9
|
80 |
|
public function transform( string $segment ): string { |
|
10
|
|
|
|
|
11
|
|
|
// handling (line feed) |
|
12
|
|
|
// prevent to convert it to \n |
|
13
|
80 |
|
$segment = preg_replace( '/&(#10;|#x0A;)|\n/', '##_ent_0A_##', $segment ); |
|
14
|
|
|
|
|
15
|
|
|
// handling (carriage return) |
|
16
|
|
|
// prevent to convert it to \r |
|
17
|
80 |
|
$segment = preg_replace( '/&(#13;|#x0D;)|\r/', '##_ent_0D_##', $segment ); |
|
18
|
|
|
|
|
19
|
|
|
// handling 	 (tab) |
|
20
|
|
|
// prevent to convert it to \t |
|
21
|
80 |
|
$segment = preg_replace( '/	|\t/', '##_ent_09_##', $segment ); |
|
22
|
|
|
|
|
23
|
|
|
//Substitute 4(+)-byte characters from a UTF-8 string to htmlentities |
|
24
|
80 |
|
$segment = preg_replace_callback( '/([\xF0-\xF7]...)/s', [ Utils::class, 'htmlentitiesFromUnicode' ], $segment ); |
|
25
|
|
|
|
|
26
|
|
|
// handling |
|
27
|
80 |
|
if ( strpos( $segment, '##_ent_0D_##' ) !== false ) { |
|
28
|
2 |
|
$segment = str_replace( '##_ent_0D_##', ' ', $segment ); |
|
29
|
|
|
} |
|
30
|
|
|
|
|
31
|
|
|
// handling |
|
32
|
80 |
|
if ( strpos( $segment, '##_ent_0A_##' ) !== false ) { |
|
33
|
2 |
|
$segment = str_replace( '##_ent_0A_##', ' ', $segment ); |
|
34
|
|
|
} |
|
35
|
|
|
|
|
36
|
|
|
// handling 	 (tab) |
|
37
|
|
|
// prevent to convert it to \t |
|
38
|
80 |
|
if ( strpos( $segment, '##_ent_09_##' ) !== false ) { |
|
39
|
1 |
|
$segment = str_replace( '##_ent_09_##', '	', $segment ); |
|
40
|
|
|
} |
|
41
|
|
|
|
|
42
|
|
|
|
|
43
|
|
|
//encode all not valid XML entities |
|
44
|
80 |
|
return preg_replace( '/&(?!lt;|gt;|amp;|quot;|apos;|#[x]{0,1}[0-9A-F]{1,7};)/', '&', $segment ); |
|
45
|
|
|
} |
|
46
|
|
|
} |