1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Matecat\SubFiltering\Filters; |
4
|
|
|
|
5
|
|
|
use Matecat\SubFiltering\Commons\AbstractHandler; |
6
|
|
|
use Matecat\SubFiltering\Utils\Utils; |
7
|
|
|
|
8
|
|
|
class EncodeToRawXML extends AbstractHandler { |
9
|
82 |
|
public function transform( string $segment ): string { |
10
|
|
|
|
11
|
|
|
// handling (line feed) |
12
|
|
|
// prevent to convert it to \n |
13
|
82 |
|
$segment = preg_replace( '/&(#10;|#x0A;)|\n/', '##_ent_0A_##', $segment ); |
14
|
|
|
|
15
|
|
|
// handling (carriage return) |
16
|
|
|
// prevent to convert it to \r |
17
|
82 |
|
$segment = preg_replace( '/&(#13;|#x0D;)|\r/', '##_ent_0D_##', $segment ); |
18
|
|
|
|
19
|
|
|
// handling 	 (tab) |
20
|
|
|
// prevent to convert it to \t |
21
|
82 |
|
$segment = preg_replace( '/	|\t/', '##_ent_09_##', $segment ); |
22
|
|
|
|
23
|
|
|
//Substitute 4(+)-byte characters from a UTF-8 string to htmlentities |
24
|
82 |
|
$segment = preg_replace_callback( '/([\xF0-\xF7]...)/s', [ Utils::class, 'htmlentitiesFromUnicode' ], $segment ); |
25
|
|
|
|
26
|
|
|
// handling |
27
|
82 |
|
if ( strpos( $segment, '##_ent_0D_##' ) !== false ) { |
28
|
2 |
|
$segment = str_replace( '##_ent_0D_##', ' ', $segment ); |
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
// handling |
32
|
82 |
|
if ( strpos( $segment, '##_ent_0A_##' ) !== false ) { |
33
|
2 |
|
$segment = str_replace( '##_ent_0A_##', ' ', $segment ); |
34
|
|
|
} |
35
|
|
|
|
36
|
|
|
// handling 	 (tab) |
37
|
|
|
// prevent to convert it to \t |
38
|
82 |
|
if ( strpos( $segment, '##_ent_09_##' ) !== false ) { |
39
|
1 |
|
$segment = str_replace( '##_ent_09_##', '	', $segment ); |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
|
43
|
|
|
//encode all not valid XML entities |
44
|
82 |
|
return preg_replace( '/&(?!lt;|gt;|amp;|quot;|apos;|#[x]{0,1}[0-9A-F]{1,7};)/', '&', $segment ); |
45
|
|
|
} |
46
|
|
|
} |