|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* This file contains the abstract base class for all filter implementations. |
|
5
|
|
|
* It provides a foundational structure for transforming string data between different |
|
6
|
|
|
* "layers" of representation, such as raw database content, server-to-server communication |
|
7
|
|
|
* formats, and UI-ready strings. |
|
8
|
|
|
*/ |
|
9
|
|
|
|
|
10
|
|
|
namespace Matecat\SubFiltering; |
|
11
|
|
|
|
|
12
|
|
|
use Exception; |
|
13
|
|
|
use Matecat\SubFiltering\Commons\Pipeline; |
|
14
|
|
|
use Matecat\SubFiltering\Contracts\FeatureSetInterface; |
|
15
|
|
|
use Matecat\SubFiltering\Filters\EncodeToRawXML; |
|
16
|
|
|
use Matecat\SubFiltering\Filters\EquivTextToBase64; |
|
17
|
|
|
use Matecat\SubFiltering\Filters\LtGtDecode; |
|
18
|
|
|
use Matecat\SubFiltering\Filters\LtGtEncode; |
|
19
|
|
|
use Matecat\SubFiltering\Filters\MateCatCustomPHToOriginalValue; |
|
20
|
|
|
use Matecat\SubFiltering\Filters\PlaceHoldXliffTags; |
|
21
|
|
|
use Matecat\SubFiltering\Filters\RestoreEquivText; |
|
22
|
|
|
use Matecat\SubFiltering\Filters\RestorePlaceHoldersToXLIFFLtGt; |
|
23
|
|
|
use Matecat\SubFiltering\Filters\RestoreXliffTagsContent; |
|
24
|
|
|
use Matecat\SubFiltering\Filters\SplitPlaceholder; |
|
25
|
|
|
use Matecat\SubFiltering\Filters\StandardPHToMateCatCustomPH; |
|
26
|
|
|
use Matecat\SubFiltering\Filters\StandardXEquivTextToMateCatCustomPH; |
|
27
|
|
|
|
|
28
|
|
|
/** |
|
29
|
|
|
* Provides a blueprint for creating specific filter implementations. |
|
30
|
|
|
* |
|
31
|
|
|
* This abstract class defines the core structure and functionality for transforming |
|
32
|
|
|
* string data between different logical layers. It manages a set of features, |
|
33
|
|
|
* source/target languages, and transformation pipelines. Subclasses must implement |
|
34
|
|
|
* the specific transformation logic required for their context. |
|
35
|
|
|
* |
|
36
|
|
|
* The class uses a factory method `getInstance` to create and configure filter instances, |
|
37
|
|
|
* which are composed of a `Pipeline` of `AbstractHandler`s. |
|
38
|
|
|
*/ |
|
39
|
|
|
abstract class AbstractFilter { |
|
40
|
|
|
|
|
41
|
|
|
/** |
|
42
|
|
|
* @var FeatureSetInterface |
|
43
|
|
|
* The set of features to be applied during the filtering process. |
|
44
|
|
|
*/ |
|
45
|
|
|
protected FeatureSetInterface $featureSet; |
|
46
|
|
|
|
|
47
|
|
|
/** |
|
48
|
|
|
* @var string|null |
|
49
|
|
|
* The source language of the segment. |
|
50
|
|
|
*/ |
|
51
|
|
|
protected ?string $source; |
|
52
|
|
|
|
|
53
|
|
|
/** |
|
54
|
|
|
* @var string|null |
|
55
|
|
|
* The target language of the segment. |
|
56
|
|
|
*/ |
|
57
|
|
|
protected ?string $target; |
|
58
|
|
|
|
|
59
|
|
|
/** |
|
60
|
|
|
* @var array |
|
61
|
|
|
* A map used for replacing data references within the segment. |
|
62
|
|
|
*/ |
|
63
|
|
|
protected array $dataRefMap = []; |
|
64
|
|
|
|
|
65
|
|
|
/** |
|
66
|
|
|
* @var class-string[] |
|
|
|
|
|
|
67
|
|
|
* An ordered list of handler class names for the Layer 0 to Layer 1 transition. |
|
68
|
|
|
*/ |
|
69
|
|
|
protected array $orderedHandlersForLayer0ToLayer1Transition = []; |
|
70
|
|
|
|
|
71
|
|
|
/** |
|
72
|
|
|
* Factory method to create and configure a new instance of the filter. |
|
73
|
|
|
* |
|
74
|
|
|
* This method instantiates a new filter object and configures it with the provided |
|
75
|
|
|
* feature set, source/target languages, data-ref map, and a list of handlers for |
|
76
|
|
|
* the Layer 0 to Layer 1 transition. |
|
77
|
|
|
* |
|
78
|
|
|
* The handler list follows specific rules: |
|
79
|
|
|
* - An empty array (default) populates the filter with all default handlers from HandlersSorter. |
|
80
|
|
|
* - `null` clears the handler list, meaning no handlers will be used. |
|
81
|
|
|
* - A specific array of class names will be used as the handler list. |
|
82
|
|
|
* |
|
83
|
|
|
* @param FeatureSetInterface $featureSet The feature set to apply. |
|
84
|
|
|
* @param string|null $source The source language code (e.g., 'en-US'). |
|
85
|
|
|
* @param string|null $target The target language code (e.g., 'it-IT'). |
|
86
|
|
|
* @param array|null $dataRefMap A map for data-ref transformations, or null for an empty map. |
|
87
|
|
|
* @param array|null $handlerClassNamesForLayer0ToLayer1Transition A list of handler classes, an empty array for defaults, or null for none. |
|
88
|
|
|
* |
|
89
|
|
|
* @return AbstractFilter The configured instance of the filter. |
|
90
|
|
|
*/ |
|
91
|
108 |
|
public static function getInstance( FeatureSetInterface $featureSet, ?string $source = null, ?string $target = null, ?array $dataRefMap = [], ?array $handlerClassNamesForLayer0ToLayer1Transition = [] ): ?AbstractFilter { |
|
92
|
|
|
// Create a new instance of the specific filter class (e.g., MateCatFilter). |
|
93
|
108 |
|
$newInstance = new static(); |
|
94
|
|
|
|
|
95
|
|
|
// Configure the instance with the provided settings via direct property access. |
|
96
|
108 |
|
$newInstance->featureSet = $featureSet; |
|
97
|
108 |
|
$newInstance->source = $source; |
|
98
|
108 |
|
$newInstance->target = $target; |
|
99
|
|
|
// Use the null coalescing operator to default to an empty array if $dataRefMap is null. |
|
100
|
108 |
|
$newInstance->dataRefMap = $dataRefMap ?? []; |
|
101
|
|
|
|
|
102
|
|
|
// Determine which handlers to use for the Layer 0 to Layer 1 transition. |
|
103
|
108 |
|
if ( is_array( $handlerClassNamesForLayer0ToLayer1Transition ) && empty( $handlerClassNamesForLayer0ToLayer1Transition ) ) { |
|
104
|
|
|
// If an empty array is passed, load the default set of handlers from the sorter. |
|
105
|
97 |
|
$handlerClassNamesForLayer0ToLayer1Transition = array_keys( HandlersSorter::getDefaultInjectedHandlers() ); |
|
106
|
11 |
|
} elseif ( is_null( $handlerClassNamesForLayer0ToLayer1Transition ) ) { |
|
107
|
|
|
// If null is passed, use no handlers. |
|
108
|
1 |
|
$handlerClassNamesForLayer0ToLayer1Transition = []; |
|
109
|
|
|
} |
|
110
|
|
|
// Otherwise, use the custom list of handlers provided. |
|
111
|
|
|
|
|
112
|
|
|
// Sort the dynamic feature-based handlers. |
|
113
|
108 |
|
$sorter = new HandlersSorter( $handlerClassNamesForLayer0ToLayer1Transition ); |
|
114
|
108 |
|
$newInstance->orderedHandlersForLayer0ToLayer1Transition = $sorter->getOrderedHandlersClassNames(); |
|
115
|
|
|
|
|
116
|
|
|
// Return the fully configured filter instance. |
|
117
|
108 |
|
return $newInstance; |
|
118
|
|
|
} |
|
119
|
|
|
|
|
120
|
|
|
/** |
|
121
|
|
|
* Transforms a segment from Layer 1 (server-to-server format) back to Layer 0 (database raw XML). |
|
122
|
|
|
* |
|
123
|
|
|
* This method defines the standard pipeline for reverting sub-filtered content, |
|
124
|
|
|
* restoring placeholders, and re-encoding XML entities to make it safe for database storage. |
|
125
|
|
|
* |
|
126
|
|
|
* @param string $segment The segment in Layer 1 format. |
|
127
|
|
|
* |
|
128
|
|
|
* @return string The transformed segment in Layer 0 format. |
|
129
|
|
|
* @throws Exception If any handler in the pipeline fails. |
|
130
|
|
|
*/ |
|
131
|
82 |
|
public function fromLayer1ToLayer0( string $segment ): string { |
|
132
|
|
|
// Initialize a new pipeline for this transformation. |
|
133
|
82 |
|
$channel = new Pipeline( $this->source, $this->target, $this->dataRefMap ); |
|
134
|
|
|
|
|
135
|
|
|
// Add handlers to reverse the sub-filtering process. |
|
136
|
82 |
|
$channel->addLast( MateCatCustomPHToOriginalValue::class ); // Restore original PH values |
|
137
|
82 |
|
$channel->addLast( PlaceHoldXliffTags::class ); // Isolate XLIFF tags |
|
138
|
82 |
|
$channel->addLast( EncodeToRawXML::class ); // Encode for raw XML storage |
|
139
|
82 |
|
$channel->addLast( LtGtEncode::class ); // Encode '<' and '>' |
|
140
|
82 |
|
$channel->addLast( RestoreXliffTagsContent::class ); // Restore original XLIFF content |
|
141
|
82 |
|
$channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class ); // Restore placeholders for '<' and '>' |
|
142
|
82 |
|
$channel->addLast( SplitPlaceholder::class ); // Handle split placeholders |
|
143
|
82 |
|
$channel->addLast( RestoreEquivText::class ); // Restore equiv-text content |
|
144
|
|
|
|
|
145
|
|
|
// Allow the current feature set to modify the pipeline (e.g., add or remove handlers). |
|
146
|
|
|
/** @var $channel Pipeline */ |
|
147
|
82 |
|
$channel = $this->featureSet->filter( 'fromLayer1ToLayer0', $channel ); |
|
148
|
|
|
|
|
149
|
|
|
// Process the segment through the pipeline and return the result. |
|
150
|
82 |
|
return $channel->transform( $segment ); |
|
151
|
|
|
} |
|
152
|
|
|
|
|
153
|
|
|
|
|
154
|
|
|
/** |
|
155
|
|
|
* Transforms a segment from Layer 0 to Layer 1. |
|
156
|
|
|
* |
|
157
|
|
|
* This method performs the conversion of a segment from the input pre-processed stage (Layer 0) |
|
158
|
|
|
* to Layer 1, where additional processing and standardization are applied. It may use various |
|
159
|
|
|
* processing pipelines or handlers to achieve this transformation, depending on the implementation. |
|
160
|
|
|
* |
|
161
|
|
|
* @param string $segment The input segment to be transformed from Layer 0 to Layer 1. |
|
162
|
|
|
* @param string|null $cid An optional identifier for context or further processing specific to the segment. |
|
163
|
|
|
* |
|
164
|
|
|
* @return string The transformed segment after processing from Layer 0 to Layer 1. |
|
165
|
|
|
*/ |
|
166
|
|
|
public abstract function fromLayer0ToLayer1( string $segment, ?string $cid = null ): string; |
|
167
|
|
|
|
|
168
|
|
|
/** |
|
169
|
|
|
* Configures the pipeline for transforming content from Layer 0 to Layer 1. |
|
170
|
|
|
* |
|
171
|
|
|
* This is the default configuration method of MateCatFilter for setting up the pipeline that processes segments. |
|
172
|
|
|
* MyMemoryFilter or override this method to customize the pipeline as needed. |
|
173
|
|
|
* |
|
174
|
|
|
* This method builds the default pipeline for the Layer 0 to Layer 1 transformation. |
|
175
|
|
|
* It adds a series of standard handlers and then incorporates any custom handlers, |
|
176
|
|
|
* ensuring they are correctly ordered via `HandlersSorter`. |
|
177
|
|
|
* |
|
178
|
|
|
* @param Pipeline $channel |
|
179
|
|
|
* @param string|null $cid |
|
180
|
|
|
*/ |
|
181
|
96 |
|
protected function configureFromLayer0ToLayer1Pipeline( Pipeline $channel, ?string $cid = null ): void { |
|
|
|
|
|
|
182
|
|
|
|
|
183
|
|
|
// Add initial handlers for standard XLIFF and placeholder normalization. |
|
184
|
96 |
|
$channel->addLast( StandardPHToMateCatCustomPH::class ); |
|
185
|
96 |
|
$channel->addLast( StandardXEquivTextToMateCatCustomPH::class ); |
|
186
|
96 |
|
$channel->addLast( PlaceHoldXliffTags::class ); |
|
187
|
96 |
|
$channel->addLast( LtGtDecode::class ); |
|
188
|
|
|
|
|
189
|
|
|
// Add the dynamic feature-based handlers. |
|
190
|
96 |
|
foreach ( $this->orderedHandlersForLayer0ToLayer1Transition as $handler ) { |
|
191
|
96 |
|
$channel->addLast( $handler ); |
|
192
|
|
|
} |
|
193
|
|
|
|
|
194
|
|
|
// Add final handlers to restore XLIFF content and encode for the target layer. |
|
195
|
96 |
|
$channel->addLast( RestoreXliffTagsContent::class ); |
|
196
|
96 |
|
$channel->addLast( RestorePlaceHoldersToXLIFFLtGt::class ); |
|
197
|
96 |
|
$channel->addLast( EquivTextToBase64::class ); |
|
198
|
|
|
|
|
199
|
|
|
} |
|
200
|
|
|
|
|
201
|
|
|
} |