1
|
|
|
package br.ufrj.ppgi.greco.kettle; |
2
|
|
|
|
3
|
|
|
import java.io.File; |
4
|
|
|
import java.io.IOException; |
5
|
|
|
import java.lang.reflect.InvocationTargetException; |
6
|
|
|
import java.lang.reflect.Method; |
7
|
|
|
|
8
|
|
|
import javax.xml.parsers.DocumentBuilder; |
9
|
|
|
import javax.xml.parsers.DocumentBuilderFactory; |
10
|
|
|
import javax.xml.parsers.ParserConfigurationException; |
11
|
|
|
|
12
|
|
|
import bsh.EvalError; |
13
|
|
|
import bsh.Interpreter; |
14
|
|
|
|
15
|
|
|
import org.pentaho.di.core.exception.KettleException; |
16
|
|
|
import org.pentaho.di.core.exception.KettleStepException; |
17
|
|
|
import org.pentaho.di.core.row.RowDataUtil; |
18
|
|
|
import org.pentaho.di.core.row.RowMeta; |
19
|
|
|
import org.pentaho.di.core.row.RowMetaInterface; |
20
|
|
|
import org.pentaho.di.trans.Trans; |
21
|
|
|
import org.pentaho.di.trans.TransMeta; |
22
|
|
|
import org.pentaho.di.trans.step.BaseStep; |
23
|
|
|
import org.pentaho.di.trans.step.StepDataInterface; |
24
|
|
|
import org.pentaho.di.trans.step.StepInterface; |
25
|
|
|
import org.pentaho.di.trans.step.StepMeta; |
26
|
|
|
import org.pentaho.di.trans.step.StepMetaInterface; |
27
|
|
|
import org.w3c.dom.Element; |
28
|
|
|
import org.w3c.dom.Node; |
29
|
|
|
import org.w3c.dom.NodeList; |
30
|
|
|
import org.w3c.dom.Document; |
31
|
|
|
import org.xml.sax.SAXException; |
32
|
|
|
|
33
|
|
|
import org.apache.jena.rdf.model.Model; |
34
|
|
|
import org.apache.jena.rdf.model.ModelFactory; |
35
|
|
|
import org.apache.jena.rdf.model.Property; |
36
|
|
|
import org.apache.jena.rdf.model.ResIterator; |
37
|
|
|
import org.apache.jena.rdf.model.Resource; |
38
|
|
|
import org.apache.jena.rdf.model.ResourceFactory; |
39
|
|
|
import org.apache.jena.rdf.model.Statement; |
40
|
|
|
import org.apache.jena.rdf.model.StmtIterator; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Step GraphSemanticLevelMarker. |
44
|
|
|
* <p /> |
45
|
|
|
* |
46
|
|
|
* @author Kelli de Faria Cordeiro |
47
|
|
|
* |
48
|
|
|
*/ |
49
|
|
|
public class GraphSemanticLevelMarkerStep extends BaseStep implements StepInterface { |
50
|
|
|
|
51
|
|
|
static Integer assessedValueLevel; |
52
|
|
|
|
53
|
|
|
public GraphSemanticLevelMarkerStep(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, |
54
|
|
|
TransMeta transMeta, Trans trans) { |
55
|
|
|
super(stepMeta, stepDataInterface, copyNr, transMeta, trans); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
@Override |
59
|
|
|
public boolean init(StepMetaInterface smi, StepDataInterface sdi) { |
60
|
|
|
if (super.init(smi, sdi)) { |
61
|
|
|
return true; |
62
|
|
|
} else |
63
|
|
|
return false; |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
@Override |
67
|
|
|
public void dispose(StepMetaInterface smi, StepDataInterface sdi) { |
68
|
|
|
super.dispose(smi, sdi); |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Metodo chamado para cada linha que entra no step |
73
|
|
|
*/ |
74
|
|
View Code Duplication |
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { |
|
|
|
|
75
|
|
|
GraphSemanticLevelMarkerStepMeta meta = (GraphSemanticLevelMarkerStepMeta) smi; |
76
|
|
|
GraphSemanticLevelMarkerStepData data = (GraphSemanticLevelMarkerStepData) sdi; |
77
|
|
|
|
78
|
|
|
String rulesFileName = meta.getRulesFilename(); |
79
|
|
|
String LOVFileName = meta.getBrowseFilename(); |
80
|
|
|
|
81
|
|
|
// Obtem linha do fluxo de entrada e termina caso nao haja mais entrada |
82
|
|
|
Object[] row = getRow(); |
83
|
|
|
if (row == null) { // Nao ha mais linhas de dados |
84
|
|
|
setOutputDone(); |
85
|
|
|
return false; |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
// Executa apenas uma vez. Variavel first definida na superclasse com |
89
|
|
|
// valor true |
90
|
|
|
if (first) { |
91
|
|
|
first = false; |
92
|
|
|
|
93
|
|
|
// Obtem todas as colunas ateh o step anterior. |
94
|
|
|
// Chamar apenas apos chamar getRow() |
95
|
|
|
data.outputRowMeta = new RowMeta(); |
96
|
|
|
|
97
|
|
|
// Adiciona os metadados do step atual |
98
|
|
|
meta.getFields(data.outputRowMeta, getStepname(), null, null, this); |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
// Logica do step |
102
|
|
|
// Leitura de campos Input |
103
|
|
|
RowMetaInterface rowMeta = getInputRowMeta(); |
104
|
|
|
int indexGraph = rowMeta.indexOfValue(meta.getInputGraph()); |
105
|
|
|
Object graph = (indexGraph >= 0) ? row[indexGraph] : null; |
106
|
|
|
|
107
|
|
|
// Set output row |
108
|
|
|
Method[] methods = graph.getClass().getMethods(); |
109
|
|
|
boolean hasListStatements = false; |
110
|
|
|
for (Method method : methods) { |
111
|
|
|
if (method.getName().equals("listStatements")) { |
112
|
|
|
hasListStatements = true; |
113
|
|
|
break; |
114
|
|
|
} |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
if (hasListStatements) { |
118
|
|
|
tripleWriter(graph, null, data, rulesFileName, LOVFileName); |
119
|
|
|
} |
120
|
|
|
|
121
|
|
|
return true; |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
private int tripleWriter(Object model, Object[] row, GraphSemanticLevelMarkerStepData data, String rulesFileName, |
125
|
|
|
String LOVFileName) throws KettleStepException { |
126
|
|
|
int numPutRows = 0; |
127
|
|
|
|
128
|
|
|
try { |
129
|
|
|
// Recreates the graph sent by the previous step |
130
|
|
|
Model inputRecreatedGraph = recreateGraph(model); |
131
|
|
|
|
132
|
|
|
// Identify inputGraph Semantic Level |
133
|
|
|
Statement stamp = markGraphSemanticLevel(inputRecreatedGraph, rulesFileName, LOVFileName); |
134
|
|
|
|
135
|
|
|
// Creates output with the semantic level stamp |
136
|
|
|
Object[] outputRow = row; |
137
|
|
|
int i = 0; |
138
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, i++, stamp.getSubject().toString()); |
139
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, i++, stamp.getPredicate().toString()); |
140
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, i++, stamp.getObject().toString()); |
141
|
|
|
|
142
|
|
|
// Joga tripla no fluxo |
143
|
|
|
putRow(data.outputRowMeta, outputRow); |
144
|
|
|
|
145
|
|
|
numPutRows++; |
146
|
|
|
|
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
catch (SecurityException e) { |
150
|
|
|
// TODO Auto-generated catch block |
151
|
|
|
e.printStackTrace(); |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
return numPutRows; |
155
|
|
|
} |
156
|
|
|
|
157
|
|
|
private Model recreateGraph(Object model) { |
158
|
|
|
|
159
|
|
|
// Recreates a Model from a Object |
160
|
|
|
Object it = null; |
161
|
|
|
Model inputModel = ModelFactory.createDefaultModel(); |
162
|
|
|
|
163
|
|
|
try { |
164
|
|
|
it = model.getClass().getMethod("listStatements").invoke(model); |
165
|
|
|
while ((Boolean) it.getClass().getMethod("hasNext").invoke(it)) { |
166
|
|
|
Object stmt = it.getClass().getMethod("next").invoke(it); |
167
|
|
|
|
168
|
|
|
String subject = stmt.getClass().getMethod("getSubject").invoke(stmt).toString(); |
169
|
|
|
String predicate = stmt.getClass().getMethod("getPredicate").invoke(stmt).toString(); |
170
|
|
|
String object = stmt.getClass().getMethod("getObject").invoke(stmt).toString(); |
171
|
|
|
|
172
|
|
|
Resource r = ResourceFactory.createResource(subject); |
173
|
|
|
Property p = ResourceFactory.createProperty(predicate); |
174
|
|
|
inputModel.add(r, p, object); |
175
|
|
|
|
176
|
|
|
} |
177
|
|
|
|
178
|
|
|
} catch (IllegalAccessException e) { |
179
|
|
|
// TODO Auto-generated catch block |
180
|
|
|
e.printStackTrace(); |
181
|
|
|
} catch (IllegalArgumentException e) { |
182
|
|
|
// TODO Auto-generated catch block |
183
|
|
|
e.printStackTrace(); |
184
|
|
|
} catch (InvocationTargetException e) { |
185
|
|
|
// TODO Auto-generated catch block |
186
|
|
|
e.printStackTrace(); |
187
|
|
|
} catch (NoSuchMethodException e) { |
188
|
|
|
// TODO Auto-generated catch block |
189
|
|
|
e.printStackTrace(); |
190
|
|
|
} catch (SecurityException e) { |
191
|
|
|
// TODO Auto-generated catch block |
192
|
|
|
e.printStackTrace(); |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
return inputModel; |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
private Statement markGraphSemanticLevel(Model inputModel, String rulesFileName, String LOVFileName) { |
199
|
|
|
// Variables initializations |
200
|
|
|
ResIterator resourceSet = inputModel.listSubjects(); |
201
|
|
|
Model innerModel = ModelFactory.createDefaultModel(); |
202
|
|
|
Resource r = resourceSet.nextResource(); |
203
|
|
|
Property p = ResourceFactory.createProperty("sstamp:hassemanticlevel"); |
204
|
|
|
|
205
|
|
|
// Tive que criar um model para trabalhar com um Resource |
206
|
|
|
Statement outputGraphSemanticLevel = innerModel.createStatement(r, p, "sstamp:notMarked"); |
207
|
|
|
|
208
|
|
|
// Identify the levels of each statement on the inputGraph |
209
|
|
|
StmtIterator statementSet = inputModel.listStatements(); |
210
|
|
|
Integer valueLevel = 0; |
211
|
|
|
|
212
|
|
|
while (statementSet.hasNext()) { |
213
|
|
|
Statement s = statementSet.nextStatement(); |
214
|
|
|
|
215
|
|
|
String semanticLevel = assessSemanticLevel(s, rulesFileName, LOVFileName); |
216
|
|
|
|
217
|
|
|
if (valueLevel < assessedValueLevel) { |
218
|
|
|
outputGraphSemanticLevel = innerModel.createStatement(r, p, semanticLevel); |
219
|
|
|
valueLevel = assessedValueLevel; |
220
|
|
|
|
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
} |
224
|
|
|
return outputGraphSemanticLevel; |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
private static String assessSemanticLevel(Statement s, String rulesFileName, String LOVFileName) { |
228
|
|
|
Interpreter i = new Interpreter(); |
229
|
|
|
|
230
|
|
|
String assessedDescriptionLevel = "NotIdentified"; |
231
|
|
|
|
232
|
|
|
// Is Literal? |
233
|
|
|
// if (s.getLiteral() != null) |
234
|
|
|
// assessedLevel="laiid:low"; |
235
|
|
|
|
236
|
|
|
String inputSubject = s.getSubject().toString(); |
237
|
|
|
String inputPredicate = s.getPredicate().toString(); |
238
|
|
|
String inputObject = s.getObject().toString(); |
239
|
|
|
|
240
|
|
|
try { |
241
|
|
|
// Open LOV xml file |
242
|
|
|
DocumentBuilderFactory docBuilderFactory2 = DocumentBuilderFactory.newInstance(); |
243
|
|
|
DocumentBuilder docBuilder2 = docBuilderFactory2.newDocumentBuilder(); |
244
|
|
|
Document doc2 = docBuilder2.parse(new File(LOVFileName.toString())); |
245
|
|
|
|
246
|
|
|
// Open SemanticLevelFrameWork files |
247
|
|
|
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); |
248
|
|
|
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); |
249
|
|
|
Document doc = docBuilder.parse(new File(rulesFileName)); |
250
|
|
|
NodeList listOfFrames = doc.getElementsByTagName("Frame"); |
251
|
|
|
int totalFrames = listOfFrames.getLength(); |
252
|
|
|
|
253
|
|
|
// valida variaveis para o interpreter |
254
|
|
|
i.set("inputSubject", inputSubject); |
255
|
|
|
i.set("inputPredicate", inputPredicate); |
256
|
|
|
i.set("inputObject", inputObject); |
257
|
|
|
|
258
|
|
|
// Get Prefix |
259
|
|
|
String prefixo = ""; |
260
|
|
|
if (inputPredicate.contains(":")) { |
261
|
|
|
int index = inputPredicate.indexOf(":"); |
262
|
|
|
prefixo = inputPredicate.substring(0, index); |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
i.set("isVocabulary", isVocabulary(prefixo, doc2)); |
266
|
|
|
i.set("isOntology", isOntology(prefixo, doc2)); |
267
|
|
|
i.set("s", s); |
268
|
|
|
|
269
|
|
|
// busca as regras |
270
|
|
|
for (int k = 0; k < totalFrames; k++) { |
271
|
|
|
Node ruleFrameNode = listOfFrames.item(k); |
272
|
|
|
if (ruleFrameNode.getNodeType() == Node.ELEMENT_NODE) { |
273
|
|
|
Element ruleFrameElement = (Element) ruleFrameNode; |
274
|
|
|
NodeList ruleList = ruleFrameElement.getElementsByTagName("Rule"); |
275
|
|
|
Element ruleElement = (Element) ruleList.item(0); |
276
|
|
|
NodeList textRList = ruleElement.getChildNodes(); |
277
|
|
|
NodeList levelValueList = ruleFrameElement.getElementsByTagName("LevelValue"); |
278
|
|
|
Element levelValueElement = (Element) levelValueList.item(0); |
279
|
|
|
NodeList levelDescriptionList = ruleFrameElement.getElementsByTagName("LevelDescription"); |
280
|
|
|
Element levelDescriptionElement = (Element) levelDescriptionList.item(0); |
281
|
|
|
|
282
|
|
|
NodeList textLValueList = levelValueElement.getChildNodes(); |
283
|
|
|
NodeList textLDescriptionList = levelDescriptionElement.getChildNodes(); |
284
|
|
|
|
285
|
|
|
// Rule Evaluation |
286
|
|
|
if ((Boolean) i.eval(textRList.item(0).getNodeValue().trim())) { |
287
|
|
|
assessedDescriptionLevel = textLDescriptionList.item(0).getNodeValue().trim(); |
288
|
|
|
assessedValueLevel = Integer.valueOf(textLValueList.item(0).getNodeValue()); |
289
|
|
|
|
290
|
|
|
// TODO avaliar sair do for |
291
|
|
|
} |
292
|
|
|
} |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
} catch (ParserConfigurationException e) { |
296
|
|
|
// TODO Auto-generated catch block |
297
|
|
|
e.printStackTrace(); |
298
|
|
|
} catch (SAXException e) { |
299
|
|
|
// TODO Auto-generated catch block |
300
|
|
|
e.printStackTrace(); |
301
|
|
|
} catch (IOException e) { |
302
|
|
|
// TODO Auto-generated catch block |
303
|
|
|
e.printStackTrace(); |
304
|
|
|
} catch (EvalError e) { |
305
|
|
|
// TODO Auto-generated catch block |
306
|
|
|
e.printStackTrace(); |
307
|
|
|
} |
308
|
|
|
|
309
|
|
|
return assessedDescriptionLevel; |
310
|
|
|
} |
311
|
|
|
|
312
|
|
View Code Duplication |
private static boolean isOntology(String prefix, Document doc) { |
|
|
|
|
313
|
|
|
NodeList listOfResults = doc.getElementsByTagName("result"); |
314
|
|
|
int totalResults = listOfResults.getLength(); |
315
|
|
|
for (int k = 0; k < totalResults; k++) { |
316
|
|
|
Node resultNode = listOfResults.item(k); |
317
|
|
|
Element resultElement = (Element) resultNode; |
318
|
|
|
NodeList bindingList = resultElement.getElementsByTagName("binding"); |
319
|
|
|
Element bindingPrefixElement = (Element) bindingList.item(0); |
320
|
|
|
NodeList literalList = bindingPrefixElement.getElementsByTagName("literal"); |
321
|
|
|
Element literalElement = (Element) literalList.item(0); |
322
|
|
|
NodeList textLiList = literalElement.getChildNodes(); |
323
|
|
|
if (prefix.equals(textLiList.item(0).getNodeValue().trim())) { |
324
|
|
|
Element bindingDescElement = (Element) bindingList.item(3); |
325
|
|
|
NodeList literalDescList = bindingDescElement.getElementsByTagName("literal"); |
326
|
|
|
Element literalDescElement = (Element) literalDescList.item(0); |
327
|
|
|
NodeList textDescList = literalDescElement.getChildNodes(); |
328
|
|
|
Element bindingTitleElement = (Element) bindingList.item(2); |
329
|
|
|
NodeList literalTitleList = bindingTitleElement.getElementsByTagName("literal"); |
330
|
|
|
Element literalTitleElement = (Element) literalTitleList.item(0); |
331
|
|
|
NodeList textTitleList = literalTitleElement.getChildNodes(); |
332
|
|
|
if (textDescList.item(0).getNodeValue().trim().toLowerCase().contains("ontology")) { |
333
|
|
|
return true; |
334
|
|
|
} |
335
|
|
|
if (textTitleList.item(0).getNodeValue().trim().toLowerCase().contains("ontology")) { |
336
|
|
|
return true; |
337
|
|
|
} |
338
|
|
|
} |
339
|
|
|
} |
340
|
|
|
return false; |
341
|
|
|
} |
342
|
|
|
|
343
|
|
View Code Duplication |
private static boolean isVocabulary(String prefix, Document doc) { |
|
|
|
|
344
|
|
|
NodeList listOfResults = doc.getElementsByTagName("result"); |
345
|
|
|
int totalResults = listOfResults.getLength(); |
346
|
|
|
for (int k = 0; k < totalResults; k++) { |
347
|
|
|
Node resultNode = listOfResults.item(k); |
348
|
|
|
Element resultElement = (Element) resultNode; |
349
|
|
|
NodeList bindingList = resultElement.getElementsByTagName("binding"); |
350
|
|
|
Element bindingPrefixElement = (Element) bindingList.item(0); |
351
|
|
|
NodeList literalList = bindingPrefixElement.getElementsByTagName("literal"); |
352
|
|
|
Element literalElement = (Element) literalList.item(0); |
353
|
|
|
NodeList textLiList = literalElement.getChildNodes(); |
354
|
|
|
if (prefix.equals(textLiList.item(0).getNodeValue().trim())) { |
355
|
|
|
Element bindingDescElement = (Element) bindingList.item(3); |
356
|
|
|
NodeList literalDescList = bindingDescElement.getElementsByTagName("literal"); |
357
|
|
|
Element literalDescElement = (Element) literalDescList.item(0); |
358
|
|
|
NodeList textDescList = literalDescElement.getChildNodes(); |
359
|
|
|
Element bindingTitleElement = (Element) bindingList.item(2); |
360
|
|
|
NodeList literalTitleList = bindingTitleElement.getElementsByTagName("literal"); |
361
|
|
|
Element literalTitleElement = (Element) literalTitleList.item(0); |
362
|
|
|
NodeList textTitleList = literalTitleElement.getChildNodes(); |
363
|
|
|
if (textDescList.item(0).getNodeValue().trim().toLowerCase().contains("vocabulary")) { |
364
|
|
|
return true; |
365
|
|
|
} |
366
|
|
|
if (textTitleList.item(0).getNodeValue().trim().toLowerCase().contains("vocabulary")) { |
367
|
|
|
return true; |
368
|
|
|
} |
369
|
|
|
} |
370
|
|
|
} |
371
|
|
|
return false; |
372
|
|
|
} |
373
|
|
|
|
374
|
|
|
} |
375
|
|
|
|