|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace NlpTools\Stemmers; |
|
4
|
|
|
|
|
5
|
|
|
/** |
|
6
|
|
|
* This stemmer is an implementation of the stemmer described by G. Ntais |
|
7
|
|
|
* in his Master Thesis. |
|
8
|
|
|
* http://people.dsv.su.se/~hercules/papers/Ntais_greek_stemmer_thesis_final.pdf |
|
9
|
|
|
* |
|
10
|
|
|
* It was first ported to php by P. Kyriakakis. |
|
11
|
|
|
* This stemmer expects lower case characters and not upper case. |
|
12
|
|
|
*/ |
|
13
|
|
|
class GreekStemmer extends Stemmer |
|
14
|
|
|
{ |
|
15
|
|
|
protected static $step1list = array( |
|
16
|
|
|
"φαγια"=>"φα", |
|
17
|
|
|
"φαγιου"=>"φα", |
|
18
|
|
|
"φαγιων"=>"φα", |
|
19
|
|
|
"σκαγια"=>"σκα", |
|
20
|
|
|
"σκαγιου"=>"σκα", |
|
21
|
|
|
"σκαγιων"=>"σκα", |
|
22
|
|
|
"ολογιου"=>"ολο", |
|
23
|
|
|
"ολογια"=>"ολο", |
|
24
|
|
|
"ολογιων"=>"ολο", |
|
25
|
|
|
"σογιου"=>"σο", |
|
26
|
|
|
"σογια"=>"σο", |
|
27
|
|
|
"σογιων"=>"σο", |
|
28
|
|
|
"τατογια"=>"τατο", |
|
29
|
|
|
"τατογιου"=>"τατο", |
|
30
|
|
|
"τατογιων"=>"τατο", |
|
31
|
|
|
"κρεασ"=>"κρε", |
|
32
|
|
|
"κρεατοσ"=>"κρε", |
|
33
|
|
|
"κρεατα"=>"κρε", |
|
34
|
|
|
"κρεατων"=>"κρε", |
|
35
|
|
|
"περασ"=>"περ", |
|
36
|
|
|
"περατοσ"=>"περ", |
|
37
|
|
|
"περατα"=>"περ", |
|
38
|
|
|
"περατων"=>"περ", |
|
39
|
|
|
"τερασ"=>"τερ", |
|
40
|
|
|
"τερατοσ"=>"τερ", |
|
41
|
|
|
"τερατα"=>"τερ", |
|
42
|
|
|
"τερατων"=>"τερ", |
|
43
|
|
|
"φωσ"=>"φω", |
|
44
|
|
|
"φωτοσ"=>"φω", |
|
45
|
|
|
"φωτα"=>"φω", |
|
46
|
|
|
"φωτων"=>"φω", |
|
47
|
|
|
"καθεστωσ"=>"καθεστ", |
|
48
|
|
|
"καθεστωτοσ"=>"καθεστ", |
|
49
|
|
|
"καθεστωτα"=>"καθεστ", |
|
50
|
|
|
"καθεστωτων"=>"καθεστ", |
|
51
|
|
|
"γεγονοσ"=>"γεγον", |
|
52
|
|
|
"γεγονοτοσ"=>"γεγον", |
|
53
|
|
|
"γεγονοτα"=>"γεγον", |
|
54
|
|
|
"γεγονοτων"=>"γεγον" |
|
55
|
|
|
); |
|
56
|
|
|
protected static $step1regexp="/(.*)(φαγια|φαγιου|φαγιων|σκαγια|σκαγιου|σκαγιων|ολογιου|ολογια|ολογιων|σογιου|σογια|σογιων|τατογια|τατογιου|τατογιων|κρεασ|κρεατοσ|κρεατα|κρεατων|περασ|περατοσ|περατα|περατων|τερασ|τερατοσ|τερατα|τερατων|φωσ|φωτοσ|φωτα|φωτων|καθεστωσ|καθεστωτοσ|καθεστωτα|καθεστωτων|γεγονοσ|γεγονοτοσ|γεγονοτα|γεγονοτων)$/u"; |
|
57
|
|
|
protected static $v = "[αεηιουω]"; |
|
58
|
|
|
protected static $v2 = "[αεηιοω]"; |
|
59
|
|
|
|
|
60
|
|
|
public function stem($w) |
|
61
|
|
|
{ |
|
62
|
|
|
$word = $w; |
|
|
|
|
|
|
63
|
|
|
$stem=""; |
|
|
|
|
|
|
64
|
|
|
$suffix=""; |
|
|
|
|
|
|
65
|
|
|
$firstch=""; |
|
|
|
|
|
|
66
|
|
|
|
|
67
|
|
|
$test1 = true; |
|
68
|
|
|
|
|
69
|
|
|
if (mb_strlen($w, "utf-8") < 4) { |
|
70
|
|
|
return $w; |
|
71
|
|
|
} |
|
72
|
|
|
|
|
73
|
|
|
//step1 |
|
74
|
|
|
if (preg_match(self::$step1regexp,$w,$fp)) { |
|
75
|
|
|
$stem = $fp[1]; |
|
76
|
|
|
$suffix = $fp[2]; |
|
77
|
|
|
$w = $stem.self::$step1list[$suffix]; |
|
78
|
|
|
$test1 = false; |
|
79
|
|
|
} |
|
80
|
|
|
|
|
81
|
|
|
$re1 = "/^(.+?)(αδεσ|αδων)$/u"; |
|
82
|
|
|
$re2 = "/^(.+?)(εδεσ|εδων)$/u"; |
|
83
|
|
|
$re3 = "/^(.+?)(ουδεσ|ουδων)$/u"; |
|
84
|
|
|
$re4 = "/^(.+?)(εωσ|εων)$/u"; |
|
85
|
|
|
if (preg_match($re1,$w,$fp)) { // step 2a |
|
86
|
|
|
$stem = $fp[1]; |
|
87
|
|
|
$w = $stem; |
|
88
|
|
|
$re = "/(οκ|μαμ|μαν|μπαμπ|πατερ|γιαγι|νταντ|κυρ|θει|πεθερ)$/u"; |
|
89
|
|
|
if (!preg_match($re,$w)) { |
|
90
|
|
|
$w .= "αδ"; |
|
91
|
|
|
} |
|
92
|
|
|
} elseif (preg_match($re2,$w,$fp)) { //step 2b |
|
93
|
|
|
$stem = $fp[1]; |
|
94
|
|
|
$w = $stem; |
|
95
|
|
|
$exept2 = "/(οπ|ιπ|εμπ|υπ|γηπ|δαπ|κρασπ|μιλ)$/u"; |
|
96
|
|
|
if (preg_match($exept2,$w)) { |
|
97
|
|
|
$w .= "εδ"; |
|
98
|
|
|
} |
|
99
|
|
|
} elseif (preg_match($re3,$w,$fp)) { //step 2c |
|
100
|
|
|
$stem = $fp[1]; |
|
101
|
|
|
$w = $stem; |
|
102
|
|
|
$exept3 = "/(αρκ|καλιακ|πεταλ|λιχ|πλεξ|σκ|σ|φλ|φρ|βελ|λουλ|χν|σπ|τραγ|φε)$/u"; |
|
103
|
|
|
if (preg_match($exept3,$w)) { |
|
104
|
|
|
$w .= "ουδ"; |
|
105
|
|
|
} |
|
106
|
|
|
} elseif (preg_match($re4,$w,$fp)) { //step 2d |
|
107
|
|
|
$stem = $fp[1]; |
|
108
|
|
|
$w = $stem; |
|
109
|
|
|
$test1 = false; |
|
110
|
|
|
$exept4 = "/^(θ|δ|ελ|γαλ|ν|π|ιδ|παρ)$/u"; |
|
111
|
|
|
if (preg_match($exept4,$w)) { |
|
112
|
|
|
$w .= "ε"; |
|
113
|
|
|
} |
|
114
|
|
|
} |
|
115
|
|
|
|
|
116
|
|
|
//step 3 |
|
117
|
|
|
$re = "/^(.+?)(ια|ιου|ιων)$/u"; |
|
118
|
|
|
if (preg_match($re,$w,$fp)) { |
|
119
|
|
|
$stem = $fp[1]; |
|
120
|
|
|
$w = $stem; |
|
121
|
|
|
$re = "/".self::$v."$/u"; |
|
122
|
|
|
$test1 = false; |
|
123
|
|
|
if (preg_match($re,$w)) { |
|
124
|
|
|
$w = $stem."ι"; |
|
125
|
|
|
} |
|
126
|
|
|
} |
|
127
|
|
|
|
|
128
|
|
|
//step 4 |
|
129
|
|
|
$re = "/^(.+?)(ικα|ικο|ικου|ικων)$/u"; |
|
130
|
|
|
if (preg_match($re,$w,$fp)) { |
|
131
|
|
|
$stem = $fp[1]; |
|
132
|
|
|
$w = $stem; |
|
133
|
|
|
$test1 = false; |
|
134
|
|
|
$re = "/".self::$v."$/u"; |
|
135
|
|
|
$exept5 = "/^(αλ|αδ|ενδ|αμαν|αμμοχαλ|ηθ|ανηθ|αντιδ|φυσ|βρωμ|γερ|εξωδ|καλπ|καλλιν|καταδ|μουλ|μπαν|μπαγιατ|μπολ|μποσ|νιτ|ξικ|συνομηλ|πετσ|πιτσ|πικαντ|πλιατσ|ποστελν|πρωτοδ|σερτ|συναδ|τσαμ|υποδ|φιλον|φυλοδ|χασ)$/u"; |
|
136
|
|
|
if (preg_match($re,$w) || preg_match($exept5,$w)) { |
|
137
|
|
|
$w .= "ικ"; |
|
138
|
|
|
} |
|
139
|
|
|
} |
|
140
|
|
|
|
|
141
|
|
|
//step 5a |
|
142
|
|
|
$re = "/^(.+?)(αμε)$/u"; |
|
143
|
|
|
$re2 = "/^(.+?)(αγαμε|ησαμε|ουσαμε|ηκαμε|ηθηκαμε)$/u"; |
|
144
|
|
|
$re3 = "/^(.+?)(ανε)$/u"; |
|
145
|
|
|
$re4 = "/^(.+?)(αγανε|ησανε|ουσανε|ιοντανε|ιοτανε|ιουντανε|οντανε|οτανε|ουντανε|ηκανε|ηθηκανε)$/u"; |
|
146
|
|
|
$re5 = "/^(.+?)(ετε)$/u"; |
|
147
|
|
|
$re6 = "/^(.+?)(ησετε)$/u"; |
|
148
|
|
|
$re7 = "/^(.+?)(οντασ|ωντασ)$/u"; |
|
149
|
|
|
$re8 = "/^(.+?)(ομαστε|ιομαστε)$/u"; |
|
150
|
|
|
$re9 = "/^(.+?)(εστε)$/u"; |
|
151
|
|
|
$re10 = "/^(.+?)(ιεστε)$/u"; |
|
152
|
|
|
$re11 = "/^(.+?)(ηκα|ηκεσ|ηκε)$/u"; |
|
153
|
|
|
$re12 = "/^(.+?)(ηθηκα|ηθηκεσ|ηθηκε)$/u"; |
|
154
|
|
|
$re13 = "/^(.+?)(ουσα|ουσεσ|ουσε)$/u"; |
|
155
|
|
|
$re14 = "/^(.+?)(αγα|αγεσ|αγε)$/u"; |
|
156
|
|
|
$re15 = "/^(.+?)(ησε|ησου|ησα)$/u"; |
|
157
|
|
|
$re16 = "/^(.+?)(ηστε)$/u"; |
|
158
|
|
|
$re17 = "/^(.+?)(ουνε|ησουνε|ηθουνε)$/u"; |
|
159
|
|
|
$re18 = "/^(.+?)(ουμε|ησουμε|ηθουμε)$/u"; |
|
160
|
|
|
|
|
161
|
|
|
if ($w == "αγαμε") { |
|
162
|
|
|
return "αγαμ"; |
|
163
|
|
|
} |
|
164
|
|
|
|
|
165
|
|
|
if (preg_match($re2,$w,$fp)) { |
|
166
|
|
|
$stem = $fp[1]; |
|
167
|
|
|
$w = $stem; |
|
168
|
|
|
$test1 = false; |
|
169
|
|
|
} elseif (preg_match($re,$w,$fp)) { |
|
170
|
|
|
$stem = $fp[1]; |
|
171
|
|
|
$w = $stem; |
|
172
|
|
|
$test1 = false; |
|
173
|
|
|
$exept6 = "/^(αναπ|αποθ|αποκ|αποστ|βουβ|ξεθ|ουλ|πεθ|πικρ|ποτ|σιχ|χ)$/u"; |
|
174
|
|
|
if (preg_match($exept6,$w)) { |
|
175
|
|
|
$w .= "αμ"; |
|
176
|
|
|
} |
|
177
|
|
|
} elseif (preg_match($re4,$w,$fp)) { //step 5b |
|
178
|
|
|
$stem = $fp[1]; |
|
179
|
|
|
$w = $stem; |
|
180
|
|
|
$test1 = false; |
|
181
|
|
|
$re4 = "/^(τρ|τσ)$/u"; |
|
182
|
|
|
if (preg_match($re4,$w)) { |
|
183
|
|
|
$w .= "αγαν"; |
|
184
|
|
|
} |
|
185
|
|
|
} elseif (preg_match($re3,$w,$fp)) { |
|
186
|
|
|
$stem = $fp[1]; |
|
187
|
|
|
$w = $stem; |
|
188
|
|
|
$test1 = false; |
|
189
|
|
|
$re3 = "/".self::$v2."$/u"; |
|
190
|
|
|
$exept7 = "/^(βετερ|βουλκ|βραχμ|γ|δραδουμ|θ|καλπουζ|καστελ|κορμορ|λαοπλ|μωαμεθ|μ|μουσουλμ|ν|ουλ|π|πελεκ|πλ|πολισ|πορτολ|σαρακατσ|σουλτ|τσαρλατ|ορφ|τσιγγ|τσοπ|φωτοστεφ|χ|ψυχοπλ|αγ|ορφ|γαλ|γερ|δεκ|διπλ|αμερικαν|ουρ|πιθ|πουριτ|σ|ζωντ|ικ|καστ|κοπ|λιχ|λουθηρ|μαιντ|μελ|σιγ|σπ|στεγ|τραγ|τσαγ|φ|ερ|αδαπ|αθιγγ|αμηχ|ανικ|ανοργ|απηγ|απιθ|ατσιγγ|βασ|βασκ|βαθυγαλ|βιομηχ|βραχυκ|διατ|διαφ|ενοργ|θυσ|καπνοβιομηχ|καταγαλ|κλιβ|κοιλαρφ|λιβ|μεγλοβιομηχ|μικροβιομηχ|νταβ|ξηροκλιβ|ολιγοδαμ|ολογαλ|πενταρφ|περηφ|περιτρ|πλατ|πολυδαπ|πολυμηχ|στεφ|ταβ|τετ|υπερηφ|υποκοπ|χαμηλοδαπ|ψηλοταβ)$/u"; |
|
191
|
|
|
if (preg_match($re3,$w) || preg_match($exept7,$w)) { |
|
192
|
|
|
$w .= "αν"; |
|
193
|
|
|
} |
|
194
|
|
|
} elseif (preg_match($re6,$w,$fp)) { //step 5c |
|
195
|
|
|
$stem = $fp[1]; |
|
196
|
|
|
$w = $stem; |
|
197
|
|
|
$test1 = false; |
|
198
|
|
|
} elseif (preg_match($re5,$w,$fp)) { |
|
199
|
|
|
$stem = $fp[1]; |
|
200
|
|
|
$w = $stem; |
|
201
|
|
|
$test1 = false; |
|
202
|
|
|
|
|
203
|
|
|
// $re5 = $this->v2."$"; |
|
204
|
|
|
$re5 = self::$v2.""; |
|
205
|
|
|
$exept8 = "/(οδ|αιρ|φορ|ταθ|διαθ|σχ|ενδ|ευρ|τιθ|υπερθ|ραθ|ενθ|ροθ|σθ|πυρ|αιν|συνδ|συν|συνθ|χωρ|πον|βρ|καθ|ευθ|εκθ|νετ|ρον|αρκ|βαρ|βολ|ωφελ)$/u"; |
|
206
|
|
|
$exept9 = "/^(αβαρ|βεν|εναρ|αβρ|αδ|αθ|αν|απλ|βαρον|ντρ|σκ|κοπ|μπορ|νιφ|παγ|παρακαλ|σερπ|σκελ|συρφ|τοκ|υ|δ|εμ|θαρρ|θ)$/u"; |
|
207
|
|
|
|
|
208
|
|
|
if (preg_match($re5,$w) || preg_match($exept8,$w)) { |
|
209
|
|
|
$w .= "ετ"; |
|
210
|
|
|
} elseif (preg_match($exept9, $w)) { |
|
211
|
|
|
$w .= "ετ"; |
|
212
|
|
|
} |
|
213
|
|
|
} elseif (preg_match($re7,$w,$fp)) { //step 5d |
|
214
|
|
|
$stem = $fp[1]; |
|
215
|
|
|
$w = $stem; |
|
216
|
|
|
$test1 = false; |
|
217
|
|
|
|
|
218
|
|
|
$exept10 = "/^(αρχ)$/u"; |
|
219
|
|
|
$exept11 = "/(κρε)$/u"; |
|
220
|
|
|
if (preg_match($exept10,$w)) { |
|
221
|
|
|
$w .= "οντ"; |
|
222
|
|
|
} |
|
223
|
|
|
if (preg_match($exept11,$w)) { |
|
224
|
|
|
$w .= "ωντ"; |
|
225
|
|
|
} |
|
226
|
|
|
} elseif (preg_match($re8,$w,$fp)) { //step 5e |
|
227
|
|
|
$stem = $fp[1]; |
|
228
|
|
|
$w = $stem; |
|
229
|
|
|
$test1 = false; |
|
230
|
|
|
|
|
231
|
|
|
$exept11 = "/^(ον)$/u"; |
|
232
|
|
|
if (preg_match($exept11,$w)) { |
|
233
|
|
|
$w .= "ομαστ"; |
|
234
|
|
|
} |
|
235
|
|
|
} elseif (preg_match($re10,$w,$fp)) { //step 5f |
|
236
|
|
|
$stem = $fp[1]; |
|
237
|
|
|
$w = $stem; |
|
238
|
|
|
$test1 = false; |
|
239
|
|
|
|
|
240
|
|
|
$re10 = "/^(π|απ|συμπ|ασυμπ|ακαταπ|αμεταμφ)$/u"; |
|
241
|
|
|
if (preg_match($re10,$w)) { |
|
242
|
|
|
$w .= "ιεστ"; |
|
243
|
|
|
} |
|
244
|
|
|
} elseif (preg_match($re9,$w,$fp)) { |
|
245
|
|
|
$stem = $fp[1]; |
|
246
|
|
|
$w = $stem; |
|
247
|
|
|
$test1 = false; |
|
248
|
|
|
|
|
249
|
|
|
$exept12 = "/^(αλ|αρ|εκτελ|ζ|μ|ξ|παρακαλ|αρ|προ|νισ)$/u"; |
|
250
|
|
|
if (preg_match($exept12,$w)) { |
|
251
|
|
|
$w .= "εστ"; |
|
252
|
|
|
} |
|
253
|
|
|
} elseif (preg_match($re12,$w,$fp)) { //step 5g |
|
254
|
|
|
$stem = $fp[1]; |
|
255
|
|
|
$w = $stem; |
|
256
|
|
|
$test1 = false; |
|
257
|
|
|
} elseif (preg_match($re11,$w,$fp)) { |
|
258
|
|
|
$stem = $fp[1]; |
|
259
|
|
|
$w = $stem; |
|
260
|
|
|
$test1 = false; |
|
261
|
|
|
|
|
262
|
|
|
$exept13 = "/(σκωλ|σκουλ|ναρθ|σφ|οθ|πιθ)$/u"; |
|
263
|
|
|
$exept14 = "/^(διαθ|θ|παρακαταθ|προσθ|συνθ|)$/u"; |
|
264
|
|
|
if (preg_match($exept13,$w)) { |
|
265
|
|
|
$w .= "ηκ"; |
|
266
|
|
|
} elseif (preg_match($exept14,$w)) { |
|
267
|
|
|
$w .= "ηκ"; |
|
268
|
|
|
} |
|
269
|
|
|
} elseif (preg_match($re13,$w,$fp)) { //step 5h |
|
270
|
|
|
$stem = $fp[1]; |
|
271
|
|
|
$w = $stem; |
|
272
|
|
|
$test1 = false; |
|
273
|
|
|
|
|
274
|
|
|
$exept15 = "/^(φαρμακ|χαδ|αγκ|αναρρ|βρομ|εκλιπ|λαμπιδ|λεχ|μ|πατ|ρ|λ|μεδ|μεσαζ|υποτειν|αμ|αιθ|ανηκ|δεσποζ|ενδιαφερ|δε|δευτερευ|καθαρευ|πλε|τσα)$/u"; |
|
275
|
|
|
$exept16 = "/(ποδαρ|βλεπ|πανταχ|φρυδ|μαντιλ|μαλλ|κυματ|λαχ|ληγ|φαγ|ομ|πρωτ)$/u"; |
|
276
|
|
|
if (preg_match($exept15,$w)) { |
|
277
|
|
|
$w .= "ουσ"; |
|
278
|
|
|
} elseif (preg_match($exept16,$w)) { |
|
279
|
|
|
$w .= "ουσ"; |
|
280
|
|
|
} |
|
281
|
|
|
} elseif (preg_match($re14,$w,$fp)) { //step 5i |
|
282
|
|
|
$stem = $fp[1]; |
|
283
|
|
|
$w = $stem; |
|
284
|
|
|
$test1 = false; |
|
285
|
|
|
|
|
286
|
|
|
$exept17 = "/^(ψοφ|ναυλοχ)$/u"; |
|
287
|
|
|
$exept20 = "/(κολλ)$/u"; |
|
288
|
|
|
$exept18 = "/^(αβαστ|πολυφ|αδηφ|παμφ|ρ|ασπ|αφ|αμαλ|αμαλλι|ανυστ|απερ|ασπαρ|αχαρ|δερβεν|δροσοπ|ξεφ|νεοπ|νομοτ|ολοπ|ομοτ|προστ|προσωποπ|συμπ|συντ|τ|υποτ|χαρ|αειπ|αιμοστ|ανυπ|αποτ|αρτιπ|διατ|εν|επιτ|κροκαλοπ|σιδηροπ|λ|ναυ|ουλαμ|ουρ|π|τρ|μ)$/u"; |
|
289
|
|
|
$exept19 = "/(οφ|πελ|χορτ|λλ|σφ|ρπ|φρ|πρ|λοχ|σμην)$/u"; |
|
290
|
|
|
|
|
291
|
|
|
if((preg_match($exept18,$w) || preg_match($exept19,$w)) |
|
292
|
|
|
&& !(preg_match($exept17,$w) || preg_match($exept20,$w))) { |
|
293
|
|
|
$w .= "αγ"; |
|
294
|
|
|
} |
|
295
|
|
|
} elseif (preg_match($re15,$w,$fp)) { //step 5j |
|
296
|
|
|
$stem = $fp[1]; |
|
297
|
|
|
$w = $stem; |
|
298
|
|
|
$test1 = false; |
|
299
|
|
|
|
|
300
|
|
|
$exept21 = "/^(ν|χερσον|δωδεκαν|ερημον|μεγαλον|επταν)$/u"; |
|
301
|
|
|
if (preg_match($exept21,$w)) { |
|
302
|
|
|
$w .= "ησ"; |
|
303
|
|
|
} |
|
304
|
|
|
} elseif (preg_match($re16,$w,$fp)) { //step 5k |
|
305
|
|
|
$stem = $fp[1]; |
|
306
|
|
|
$w = $stem; |
|
307
|
|
|
$test1 = false; |
|
308
|
|
|
|
|
309
|
|
|
$exept22 = "/^(ασβ|σβ|αχρ|χρ|απλ|αειμν|δυσχρ|ευχρ|κοινοχρ|παλιμψ)$/u"; |
|
310
|
|
|
if (preg_match($exept22,$w)) { |
|
311
|
|
|
$w .= "ηστ"; |
|
312
|
|
|
} |
|
313
|
|
|
} elseif (preg_match($re17,$w,$fp)) { //step 5l |
|
314
|
|
|
$stem = $fp[1]; |
|
315
|
|
|
$w = $stem; |
|
316
|
|
|
$test1 = false; |
|
317
|
|
|
|
|
318
|
|
|
$exept23 = "/^(ν|ρ|σπι|στραβομουτσ|κακομουτσ|εξων)$/u"; |
|
319
|
|
|
if (preg_match($exept23,$w)) { |
|
320
|
|
|
$w .= "ουν"; |
|
321
|
|
|
} |
|
322
|
|
|
} elseif (preg_match($re18,$w,$fp)) { //step 5l |
|
323
|
|
|
$stem = $fp[1]; |
|
324
|
|
|
$w = $stem; |
|
325
|
|
|
$test1 = false; |
|
326
|
|
|
|
|
327
|
|
|
$exept24 = "/^(παρασουσ|φ|χ|ωριοπλ|αζ|αλλοσουσ|ασουσ)$/u"; |
|
328
|
|
|
if (preg_match($exept24,$w)) { |
|
329
|
|
|
$w .= "ουμ"; |
|
330
|
|
|
} |
|
331
|
|
|
} |
|
332
|
|
|
|
|
333
|
|
|
// step 6 |
|
334
|
|
|
$re = "/^(.+?)(ματα|ματων|ματοσ)$/u"; |
|
335
|
|
|
$re2 = "/^(.+?)(α|αγατε|αγαν|αει|αμαι|αν|ασ|ασαι|αται|αω|ε|ει|εισ|ειτε|εσαι|εσ|εται|ι|ιεμαι|ιεμαστε|ιεται|ιεσαι|ιεσαστε|ιομασταν|ιομουν|ιομουνα|ιονταν|ιοντουσαν|ιοσασταν|ιοσαστε|ιοσουν|ιοσουνα|ιοταν|ιουμα|ιουμαστε|ιουνται|ιουνταν|η|ηδεσ|ηδων|ηθει|ηθεισ|ηθειτε|ηθηκατε|ηθηκαν|ηθουν|ηθω|ηκατε|ηκαν|ησ|ησαν|ησατε|ησει|ησεσ|ησουν|ησω|ο|οι|ομαι|ομασταν|ομουν|ομουνα|ονται|ονταν|οντουσαν|οσ|οσασταν|οσαστε|οσουν|οσουνα|οταν|ου|ουμαι|ουμαστε|ουν|ουνται|ουνταν|ουσ|ουσαν|ουσατε|υ|υσ|ω|ων)$/u"; |
|
336
|
|
|
if (preg_match($re,$w,$fp)) { |
|
337
|
|
|
$stem = $fp[1]; |
|
338
|
|
|
$w = $stem . "μα"; |
|
339
|
|
|
} |
|
340
|
|
|
if (preg_match($re2,$w,$fp) && $test1) { |
|
341
|
|
|
$stem = $fp[1]; |
|
342
|
|
|
$w = $stem; |
|
343
|
|
|
} |
|
344
|
|
|
|
|
345
|
|
|
// step 7 |
|
346
|
|
|
$re = "/^(.+?)(εστερ|εστατ|οτερ|οτατ|υτερ|υτατ|ωτερ|ωτατ)$/u"; |
|
347
|
|
|
if (preg_match($re,$w,$fp)) { |
|
348
|
|
|
$stem = $fp[1]; |
|
349
|
|
|
$w = $stem; |
|
350
|
|
|
} |
|
351
|
|
|
|
|
352
|
|
|
return $w; |
|
353
|
|
|
} |
|
354
|
|
|
|
|
355
|
|
|
} |
|
356
|
|
|
|
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.
Both the
$myVarassignment in line 1 and the$higherassignment in line 2 are dead. The first because$myVaris never used and the second because$higheris always overwritten for every possible time line.