1 | <?php |
||
20 | class FactsFactory implements FactsFactoryInterface { |
||
21 | |||
22 | protected $profile; |
||
23 | protected $modelClass; |
||
24 | protected $counter = array( |
||
25 | 'triple' => 0, // rdf triples in facts |
||
26 | 'error' => 0, // facts contains error |
||
27 | 'insane' => 0, // raw data unaccepted |
||
28 | 'entity' => 0, // raw data processed |
||
29 | ); |
||
30 | |||
31 | |||
32 | 1 | public function __construct( array $profile =array() ) |
|
68 | |||
69 | |||
70 | /** |
||
71 | * two level filter array, a default for dataCleaner callback |
||
72 | */ |
||
73 | 1 | public static function REMOVE_EMPTY( array $data) |
|
74 | { |
||
75 | 1 | $a = array(); |
|
76 | 1 | foreach ($data as $key => $value) { |
|
77 | 1 | $a[$key] = is_array($value)?array_filter($value):$value; |
|
78 | } |
||
79 | 1 | return array_filter($a); |
|
80 | } |
||
81 | |||
82 | /** |
||
83 | * a default for dataValidator callback |
||
84 | */ |
||
85 | 1 | public static function NOT_EMPTY_FACTS( \BOTK\ModelInterface $data) |
|
86 | { |
||
87 | 1 | return $data->getTripleCount()?false:'No facts found.'; |
|
88 | } |
||
89 | |||
90 | |||
91 | /** |
||
92 | * create facts from rawdata. Please nothe that null facts does not means always an error (i.e. no facts is a fact). |
||
93 | * if you do not want empty facts use dataValidator |
||
94 | */ |
||
95 | 1 | public function factualize($rawData) |
|
96 | { |
||
97 | 1 | $rawdataSanitizer = $this->profile['rawdataSanitizer']; |
|
98 | 1 | $validRawData = $rawdataSanitizer($rawData); |
|
99 | 1 | $this->counter['entity']++; |
|
100 | |||
101 | 1 | if (!empty($validRawData)){ |
|
102 | 1 | $datamapper = $this->profile['datamapper']; |
|
103 | 1 | $dataCleaner = $this->profile['dataCleaner']; |
|
104 | 1 | $factsErrorDetector = $this->profile['factsErrorDetector']; |
|
105 | 1 | $data =$dataCleaner($datamapper($validRawData)); |
|
106 | 1 | $facts = call_user_func($this->modelClass.'::fromArray',$data,$this->profile['modelOptions']); |
|
107 | 1 | $this->counter['triple'] += $facts->getTripleCount(); |
|
108 | 1 | if($error=$factsErrorDetector($facts)){ |
|
109 | $this->counter['error']++; |
||
110 | 1 | throw new FactsValidatorWarning($error,$facts); |
|
111 | } |
||
112 | } else { |
||
113 | $this->counter['insane']++; |
||
114 | throw new InvalidRawDataWarning("Invalid rawdata",$rawData); |
||
115 | } |
||
116 | |||
117 | // ensure that not too many errors |
||
118 | 1 | $errorRate = ($this->counter['error']/$this->counter['entity']); |
|
119 | 1 | if(( $this->counter['entity'] > $this->profile['entityThreshold']) |
|
120 | 1 | && ( $errorRate > $this->profile['resilienceToErrors'])){ |
|
121 | $x = $this->profile['resilienceToErrors']*100; |
||
122 | throw new TooManyErrorsException("Error rate in data processing exceeded the $x% threshold"); |
||
123 | } |
||
124 | |||
125 | // ensure that not too many insaness raw data |
||
126 | 1 | $insaneRate = ($this->counter['insane']/$this->counter['entity']); |
|
127 | 1 | if(( $this->counter['entity'] > $this->profile['entityThreshold']) |
|
128 | 1 | && ($insaneRate > $this->profile['resilienceToInsanes'])){ |
|
129 | $x = $this->profile['resilienceToInsanes']*100; |
||
130 | throw new TooManyInsanesException("Unacceptable data rate exceeded the $x% threshold"); |
||
131 | } |
||
132 | |||
133 | 1 | return $facts; |
|
134 | } |
||
135 | |||
136 | |||
137 | public function generateLinkedDataHeader() |
||
138 | { |
||
139 | $metadata=''; |
||
140 | if(!is_null($this->profile['documentURL'])) { |
||
141 | // Requires foaf:, dct: and void: prefixes to be defined in model |
||
142 | $metadata.= "\n<{$this->profile['documentURL']}> a foaf:Document .\n"; |
||
143 | if(!empty($this->profile['source'])){ |
||
144 | $metadata .= "<{$this->profile['documentURL']}> dct:source <{$this->profile['source']}>. \n"; |
||
145 | } |
||
146 | $metadata .= "[] a void:Dataset; void:datadump <{$this->profile['documentURL']}>;void:triples {$this->counter['triple']} ;void:entities {$this->counter['entity']}.\n"; |
||
147 | |||
148 | } |
||
149 | return call_user_func($this->modelClass.'::getTurtleHeader') . $metadata; |
||
150 | } |
||
151 | |||
152 | |||
153 | public function generateLinkedDataFooter() |
||
157 | |||
158 | |||
159 | public function addToCounter($counter,$val=1) |
||
160 | { |
||
161 | if(!array_key_exists($counter,$this->counter)){ |
||
162 | throw new \InvalidArgumentException("Invalid counter name"); |
||
163 | } |
||
164 | $this->counter[$counter]+= intval($val); |
||
165 | } |
||
166 | |||
167 | |||
168 | public function getCounters() |
||
172 | |||
173 | } |