1
|
|
|
package br.ufrj.ppgi.greco.kettle.silk; |
2
|
|
|
|
3
|
|
|
import java.util.ArrayList; |
4
|
|
|
import java.util.List; |
5
|
|
|
|
6
|
|
|
import javax.xml.bind.JAXBContext; |
7
|
|
|
import javax.xml.bind.Marshaller; |
8
|
|
|
import javax.xml.bind.annotation.XmlElement; |
9
|
|
|
import javax.xml.bind.annotation.XmlRootElement; |
10
|
|
|
|
11
|
|
|
import org.pentaho.di.core.Const; |
12
|
|
|
|
13
|
|
|
import br.ufrj.ppgi.greco.kettle.LinkDiscoveryToolStepMeta; |
14
|
|
|
|
15
|
|
|
@XmlRootElement(name = "Silk") |
16
|
|
|
public class Silk { |
17
|
|
|
|
18
|
|
|
@XmlElement(name = "Prefixes") |
19
|
|
|
protected Prefixes prefixes = new Prefixes(); |
20
|
|
|
|
21
|
|
|
@XmlElement(name = "DataSources") |
22
|
|
|
protected Datasets dataSources = new Datasets(); |
23
|
|
|
|
24
|
|
|
@XmlElement(name = "Interlinks") |
25
|
|
|
protected Interlinks interlinks = new Interlinks(); |
26
|
|
|
|
27
|
|
|
@XmlElement(name = "Outputs") |
28
|
|
|
protected Datasets outputs = new Datasets(); |
29
|
|
|
|
30
|
|
|
public Silk() { |
31
|
|
|
} |
32
|
|
|
|
33
|
|
|
public Silk(LinkDiscoveryToolStepMeta input) { |
34
|
|
|
this.setPrefixes(input); |
35
|
|
|
this.setDataSources(input); |
36
|
|
|
this.setInterlinks(input); |
37
|
|
|
this.setOutputs(input); |
38
|
|
|
} |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* Set the <Prefixes> tag in the Silk SLS file |
42
|
|
|
* |
43
|
|
|
* @param input |
44
|
|
|
* information given by the user on Kettle |
45
|
|
|
*/ |
46
|
|
|
public void setPrefixes(LinkDiscoveryToolStepMeta input) { |
47
|
|
|
final int PREFIX = 0; |
48
|
|
|
final int NAMESPACE = 1; |
49
|
|
|
for (int i = 0; i < input.getPrefixes().size(); i++) { |
50
|
|
|
List<String> prefix = input.getPrefixes().getRow(i); |
51
|
|
|
Prefix p = new Prefix(prefix.get(PREFIX), prefix.get(NAMESPACE)); |
52
|
|
|
this.prefixes.add(p); |
53
|
|
|
} |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* Set the <DataSources> tag in the Silk SLS file |
58
|
|
|
* |
59
|
|
|
* @param input |
60
|
|
|
* information given by the user on Kettle |
61
|
|
|
*/ |
62
|
|
|
public void setDataSources(LinkDiscoveryToolStepMeta input) { |
63
|
|
|
if (input.isSparqlEndpoint(input.getSourceEndpoint())) { |
64
|
|
|
setDataSource(input, Dataset.SPARQL, "source", input.getSourceEndpoint(), input.getSourceGraph()); |
65
|
|
|
} else { |
66
|
|
|
setDataSource(input, input.getFileType(input.getSourceEndpoint()), "source", input.getSourceEndpoint(), |
67
|
|
|
input.getSourceGraph()); |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
if (input.isSparqlEndpoint(input.getTargetEndpoint())) { |
71
|
|
|
setDataSource(input, Dataset.SPARQL, "target", input.getTargetEndpoint(), input.getTargetGraph()); |
72
|
|
|
} else { |
73
|
|
|
setDataSource(input, input.getFileType(input.getTargetEndpoint()), "target", input.getTargetEndpoint(), |
74
|
|
|
input.getTargetGraph()); |
75
|
|
|
} |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
private String getFileFormat(String fileType) { |
79
|
|
|
switch (fileType) { |
80
|
|
|
case "rdf": |
81
|
|
|
return "RDF/XML"; |
82
|
|
|
case "ttl": |
83
|
|
|
return "Turtle"; |
84
|
|
|
case "nt": |
85
|
|
|
return "N-Triples"; |
86
|
|
|
default: |
87
|
|
|
return "RDF/XML"; |
88
|
|
|
} |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
public void setDataSource(LinkDiscoveryToolStepMeta input, String type, String id, String endpoint, String graph) { |
92
|
|
|
Dataset source = null; |
93
|
|
|
switch (type) { |
94
|
|
|
case Dataset.SPARQL: |
95
|
|
|
source = new Dataset(id, Dataset.SPARQL); |
96
|
|
|
source.add(new Param("endpointURI", Const.NVL(endpoint, ""))); |
97
|
|
|
source.add(new Param("graph", Const.NVL(graph, ""))); |
98
|
|
|
break; |
99
|
|
|
case "rdf": |
100
|
|
|
case "ttl": |
101
|
|
|
case "nt": |
102
|
|
|
source = new Dataset(id, Dataset.RDF); |
103
|
|
|
source.add(new Param("file", Const.NVL(endpoint, ""))); |
104
|
|
|
source.add(new Param("format", Const.NVL(getFileFormat(type), ""))); |
105
|
|
|
source.add(new Param("graph", Const.NVL(graph, ""))); |
106
|
|
|
break; |
107
|
|
|
case "csv": |
108
|
|
|
case "xml": |
109
|
|
|
source = new Dataset(id, type); |
110
|
|
|
source.add(new Param("file", Const.NVL(endpoint, ""))); |
111
|
|
|
break; |
112
|
|
|
} |
113
|
|
|
this.dataSources.add(source); |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* Set the <Interlinks> tag in the Silk SLS file |
118
|
|
|
* |
119
|
|
|
* @param input |
120
|
|
|
* information given by the user on Kettle |
121
|
|
|
*/ |
122
|
|
|
public void setInterlinks(LinkDiscoveryToolStepMeta input) { |
123
|
|
|
DataSource src = new DataSource("source", "a", input.getSourceRestriction()); |
124
|
|
|
DataSource tgt = new DataSource("target", "b", input.getTargetRestriction()); |
125
|
|
|
LinkageRule link = getLinkageRule(input); |
126
|
|
|
Outputs outputs = new Outputs(); |
127
|
|
|
outputs.addOutput(new Output("output")); |
128
|
|
|
Interlink interlink = new Interlink("link", src, tgt, link, outputs); |
129
|
|
|
this.interlinks.add(interlink); |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
/** |
133
|
|
|
* Set the <Outputs> tag in the Silk SLS file |
134
|
|
|
* |
135
|
|
|
* @param input |
136
|
|
|
* information given by the user on Kettle |
137
|
|
|
*/ |
138
|
|
|
public void setOutputs(LinkDiscoveryToolStepMeta input) { |
139
|
|
|
Dataset output = null; |
140
|
|
|
if (input.isSparqlOutput()){ |
141
|
|
|
output = new Dataset("output", Dataset.SPARQL); |
142
|
|
|
output.add(new Param("endpointURI", Const.NVL(input.getOutputEndpoint(), ""))); |
143
|
|
|
output.add(new Param("graph", Const.NVL(input.getOutputGraph(), ""))); |
144
|
|
|
}else{ |
145
|
|
|
output = new Dataset("output", "file"); |
146
|
|
|
output.add(new Param("file", input.getFilePath())); |
147
|
|
|
output.add(new Param("format", "N-Triples")); |
148
|
|
|
} |
149
|
|
|
this.outputs.add(output); |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
public List<Metric> getMetrics(LinkDiscoveryToolStepMeta input){ |
153
|
|
|
final int SOURCE_PATH = 0, TARGET_PATH = 1, METRIC = 2; |
154
|
|
|
List<Metric> metricList = new ArrayList<>(); |
155
|
|
|
for (int i = 0; i < input.getMetrics().size(); i++) { |
156
|
|
|
List<String> metrics = input.getMetrics().getRow(i); |
157
|
|
|
Metric metric = new Metric(i+1, metrics.get(METRIC)); |
158
|
|
|
metric.addInput(new Input("sourcePath" + (i+1), metrics.get(SOURCE_PATH))); |
159
|
|
|
metric.addInput(new Input("targetPath" + (i+1), metrics.get(TARGET_PATH))); |
160
|
|
|
metricList.add(metric); |
161
|
|
|
} |
162
|
|
|
return metricList; |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
public Aggregate createAggregation(LinkDiscoveryToolStepMeta input) { |
166
|
|
|
Aggregate aggregation = null; |
167
|
|
|
boolean hasNoAggregation = input.getMetrics() == null || input.getMetrics().size() == 0 || input.getMetrics().getValue(0, 2).isEmpty(); |
168
|
|
|
if (!hasNoAggregation) { |
169
|
|
|
aggregation = new Aggregate(input.getAggregationType(), input.getAggregationType()); |
170
|
|
|
aggregation.setMetrics(getMetrics(input)); |
171
|
|
|
} |
172
|
|
|
return aggregation; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
private LinkageRule getLinkageRule(LinkDiscoveryToolStepMeta input) { |
176
|
|
|
LinkageRule link = new LinkageRule(input.getLinkageType()); |
177
|
|
|
if (input.getAggregationType() == null || input.getAggregationType().equals("")){ |
178
|
|
|
link.setMetrics(getMetrics(input)); |
179
|
|
|
} else { |
180
|
|
|
link = new LinkageRule(input.getLinkageType(), createAggregation(input)); |
181
|
|
|
} |
182
|
|
|
return link; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
public static void main(String[] args) { |
186
|
|
|
LinkDiscoveryToolStepMeta input = new LinkDiscoveryToolStepMeta(); |
187
|
|
|
Silk s = new Silk(input); |
188
|
|
|
try { |
189
|
|
|
JAXBContext context = JAXBContext.newInstance(Silk.class); |
190
|
|
|
Marshaller m = context.createMarshaller(); |
191
|
|
|
m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE); |
192
|
|
|
m.marshal(s, System.out); |
193
|
|
|
} catch (Exception e) { |
194
|
|
|
e.printStackTrace(); |
195
|
|
|
} |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
} |