1
|
|
|
package br.ufrj.ppgi.greco.kettle; |
2
|
|
|
|
3
|
|
|
import java.text.DecimalFormat; |
4
|
|
|
import java.text.DecimalFormatSymbols; |
5
|
|
|
import java.text.SimpleDateFormat; |
6
|
|
|
import java.util.Iterator; |
7
|
|
|
import java.util.List; |
8
|
|
|
import java.util.Locale; |
9
|
|
|
|
10
|
|
|
import org.pentaho.di.core.exception.KettleException; |
11
|
|
|
import org.pentaho.di.core.exception.KettleStepException; |
12
|
|
|
import org.pentaho.di.core.row.RowDataUtil; |
13
|
|
|
import org.pentaho.di.core.row.RowMetaInterface; |
14
|
|
|
import org.pentaho.di.trans.Trans; |
15
|
|
|
import org.pentaho.di.trans.TransMeta; |
16
|
|
|
import org.pentaho.di.trans.step.BaseStep; |
17
|
|
|
import org.pentaho.di.trans.step.StepDataInterface; |
18
|
|
|
import org.pentaho.di.trans.step.StepInterface; |
19
|
|
|
import org.pentaho.di.trans.step.StepMeta; |
20
|
|
|
import org.pentaho.di.trans.step.StepMetaInterface; |
21
|
|
|
|
22
|
|
|
import br.ufrj.ppgi.greco.kettle.plugin.tools.datatable.DataTable; |
23
|
|
|
|
24
|
|
|
public class DataPropertyMappingStep extends BaseStep implements StepInterface { |
25
|
|
|
|
26
|
|
|
public DataPropertyMappingStep(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, |
27
|
|
|
TransMeta transMeta, Trans trans) { |
28
|
|
|
super(stepMeta, stepDataInterface, copyNr, transMeta, trans); |
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
@Override |
32
|
|
|
public boolean init(StepMetaInterface smi, StepDataInterface sdi) { |
33
|
|
|
if (super.init(smi, sdi)) { |
34
|
|
|
// TODO init something here if needed |
35
|
|
|
// ... |
36
|
|
|
return true; |
37
|
|
|
} else |
38
|
|
|
return false; |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
@Override |
42
|
|
|
public void dispose(StepMetaInterface smi, StepDataInterface sdi) { |
43
|
|
|
super.dispose(smi, sdi); |
44
|
|
|
|
45
|
|
|
// TODO finalize something here if needed |
46
|
|
|
// ... |
47
|
|
|
} |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* Metodo chamado para cada linha que entra no step |
51
|
|
|
*/ |
52
|
|
|
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException { |
53
|
|
|
|
54
|
|
|
DataPropertyMappingStepMeta meta = (DataPropertyMappingStepMeta) smi; |
55
|
|
|
DataPropertyMappingStepData data = (DataPropertyMappingStepData) sdi; |
56
|
|
|
|
57
|
|
|
// Obtem linha do fluxo de entrada e termina caso nao haja mais entrada |
58
|
|
|
Object[] row = getRow(); |
59
|
|
|
if (row == null) { // Nao ha mais linhas de dados |
60
|
|
|
setOutputDone(); |
61
|
|
|
return false; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
if (first) { // Executa apenas uma vez. Variavel first definida na |
65
|
|
|
// superclasse |
66
|
|
|
first = false; |
67
|
|
|
|
68
|
|
|
// Obtem todas as colunas ate o step anterior. |
69
|
|
|
// Chamar apenas apos chamar getRow() |
70
|
|
|
RowMetaInterface rowMeta = getInputRowMeta(); |
71
|
|
|
data.outputRowMeta = rowMeta.clone(); |
72
|
|
|
|
73
|
|
|
// Adiciona os metadados do step atual |
74
|
|
|
meta.getFields(data.outputRowMeta, getStepname(), null, null, this); |
75
|
|
|
|
76
|
|
|
// TODO Outras opera��es que devem ser executadas apenas uma vez |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
/* |
80
|
|
|
* Logica do step: leitura de campos de entrada e internos e geracao do |
81
|
|
|
* campo de saida |
82
|
|
|
*/ |
83
|
|
|
|
84
|
|
|
// Add rdf:type |
85
|
|
|
String subject = getInputRowMeta().getString(row, meta.getSubjectUriFieldName(), ""); |
86
|
|
|
|
87
|
|
|
List<String> typesUri = meta.getRdfTypeUris(); |
88
|
|
|
Iterator<String> it = typesUri.iterator(); |
89
|
|
|
while (it.hasNext()) { |
90
|
|
|
String type = (String) it.next(); |
91
|
|
|
putOutRow(row, meta, data, subject, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", type); |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
// Add data properties |
95
|
|
|
DataTable<String> table = meta.getMapTable(); |
96
|
|
|
for (int i = 0; i < table.size(); i++) { |
97
|
|
|
|
98
|
|
|
String predicateField = table.getValue(i, DataPropertyMappingStepMeta.Field.MAP_TABLE_PREDICATE_URI.name()); |
99
|
|
|
String predicate = getInputRowMeta().getString(row, predicateField, predicateField); |
100
|
|
|
|
101
|
|
|
String objectField = table.getValue(i, |
102
|
|
|
DataPropertyMappingStepMeta.Field.MAP_TABLE_OBJECT_FIELD_NAME.name()); |
103
|
|
|
int index = getInputRowMeta().indexOfValue(objectField); |
104
|
|
|
|
105
|
|
|
String datatype = table.getValue(i, DataPropertyMappingStepMeta.Field.MAP_TABLE_TYPED_LITERAL.name()); |
106
|
|
|
|
107
|
|
|
String langtagValue = table.getValue(i, DataPropertyMappingStepMeta.Field.MAP_TABLE_LANGUAGE_TAG.name()); |
108
|
|
|
String langtagField = table.getValue(i, |
109
|
|
|
DataPropertyMappingStepMeta.Field.MAP_TABLE_LANGTAG_FIELD_NAME.name()); |
110
|
|
|
|
111
|
|
|
String langtag = null; |
112
|
|
|
if (langtagField != null && !langtagField.isEmpty()) { |
113
|
|
|
langtag = getInputRowMeta().getString(row, getInputRowMeta().indexOfValue(langtagField)); |
114
|
|
|
} |
115
|
|
|
if (langtag == null || langtag.isEmpty()) |
116
|
|
|
langtag = langtagValue; |
117
|
|
|
|
118
|
|
|
String object = null; |
119
|
|
|
|
120
|
|
|
try { |
121
|
|
|
if ("xsd:float".equals(datatype) || "xsd:double".equals(datatype) || "xsd:decimal".equals(datatype)) { |
122
|
|
|
object = new DecimalFormat("0.0#########", new DecimalFormatSymbols(Locale.US)) |
123
|
|
|
.format(getInputRowMeta().getNumber(row, index)); |
124
|
|
|
} else if ("xsd:dateTime".equals(datatype)) { |
125
|
|
|
object = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'") |
126
|
|
|
.format(getInputRowMeta().getDate(row, index)); |
127
|
|
|
} else if ("xsd:date".equals(datatype)) { |
128
|
|
|
object = new SimpleDateFormat("yyyy-MM-dd").format(getInputRowMeta().getDate(row, index)); |
129
|
|
|
} else if ("xsd:integer".equals(datatype)) { |
130
|
|
|
object = getInputRowMeta().getInteger(row, index).toString(); |
131
|
|
|
} else { |
132
|
|
|
object = getInputRowMeta().getString(row, index); |
133
|
|
|
} |
134
|
|
|
} catch (Exception e) { |
135
|
|
|
object = ""; |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
// Rogers (Jul./2012): Quando o repositorio e' database, o valor do |
139
|
|
|
// datatype quando vazio e' null. |
140
|
|
|
if (datatype != null) { |
141
|
|
|
datatype = datatype.replace("xsd:", "http://www.w3.org/2001/XMLSchema#"); |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
if (subject != null && predicate != null && object != null && !"".equals(subject) && !"".equals(predicate) |
145
|
|
|
&& !"".equals(object)) { |
146
|
|
|
putOutRow(row, meta, data, subject, predicate, object, datatype, langtag); |
147
|
|
|
} |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
return true; |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
private void putOutRow(Object[] inputRow, DataPropertyMappingStepMeta meta, DataPropertyMappingStepData data, |
154
|
|
|
String subject, String predicate, String object) throws KettleStepException { |
155
|
|
|
putOutRow(inputRow, meta, data, subject, predicate, object, "", ""); |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/* |
159
|
|
|
* Casos tratados: keep & !addS ==> S1 : outRow = inRow + p + o keep & addS |
160
|
|
|
* ==> S2 : outRow = inRow + s + p + o !keep & addS ==> S3 : outRow = s + p |
161
|
|
|
* + o !keep & !addS ==> S4 : outRow = uri + p + o (uri = campo de entrada) |
162
|
|
|
* |
163
|
|
|
* addS = (uri != s) && defined(s) |
164
|
|
|
*/ |
165
|
|
|
private void putOutRow(Object[] inputRow, DataPropertyMappingStepMeta meta, DataPropertyMappingStepData data, |
166
|
|
|
String subject, String predicate, String object, String datatype, String langtag) |
167
|
|
|
throws KettleStepException { |
168
|
|
|
|
169
|
|
|
int outputRowPos = 0; |
170
|
|
|
Object[] outputRow = null; |
171
|
|
|
|
172
|
|
|
// Determina se deve repassar campos de entrada |
173
|
|
|
if (meta.isKeepInputFields()) { |
174
|
|
|
outputRow = inputRow; |
175
|
|
|
outputRowPos = getInputRowMeta().size(); // S1, S2 |
176
|
|
|
} else { |
177
|
|
|
outputRow = new Object[5]; // S3, S4 |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
if (meta.isThereAdditionalSubjectOutputField()) { // addS |
181
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, outputRowPos++, subject); // S2, |
182
|
|
|
// S3 |
183
|
|
|
} else { |
184
|
|
|
if (!meta.isKeepInputFields()) { |
185
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, outputRowPos++, subject); // S4 |
186
|
|
|
} |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
// S1, S2, S3, S4 |
190
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, outputRowPos++, predicate); |
191
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, outputRowPos++, object); |
192
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, outputRowPos++, datatype); |
193
|
|
|
|
194
|
|
|
if (datatype == null || datatype.isEmpty() || "http://www.w3.org/2001/XMLSchema#string".equals(datatype)) { |
195
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, outputRowPos++, langtag); |
196
|
|
|
} else { |
197
|
|
|
outputRow = RowDataUtil.addValueData(outputRow, outputRowPos++, ""); |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
// Coloca linha no fluxo |
201
|
|
|
putRow(data.outputRowMeta, outputRow); |
202
|
|
|
} |
203
|
|
|
} |
204
|
|
|
|