Passed
Push — master ( b9ff22...31f6ed )
by Enrico
01:47
created

FactsFactory   A

Complexity

Total Complexity 27

Size/Duplication

Total Lines 162
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 5

Test Coverage

Coverage 58.97%

Importance

Changes 0
Metric Value
wmc 27
lcom 1
cbo 5
dl 0
loc 162
ccs 46
cts 78
cp 0.5897
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
A REMOVE_EMPTY() 0 8 3
A NOT_EMPTY_FACTS() 0 4 2
B factualize() 0 40 7
A generateLinkedDataHeader() 0 4 1
A generateLinkedDataFooter() 0 23 2
A addToCounter() 0 7 2
A getCounters() 0 4 1
B __construct() 0 35 9
1
<?php
2
namespace BOTK;
3
4
use BOTK\Exception\FactsValidatorWarning;
5
use BOTK\Exception\InvalidRawDataWarning;
6
use BOTK\Exception\TooManyErrorsException;
7
use BOTK\Exception\TooManyInsanesException;
8
9
/**
10
 * Create structured data from an array of raw data (i.e. ie just a sequence of scalars) managing errors.
11
 * The class provides a RDF triple counter and tresholds for data processinge errors
12
 * Options:
13
 * 	'datamapper' a callable that accepts a raw data array and create structured data as an array. Must be provided.
14
 *  'rawdataSanitizer a callable that validate raw data before datamapper. It returns an array of raw data of false if rawdata is invalid.
15
 *  'dataCleaner' a callable that filters the structured data returned by datamapper before to instanciate data model, 
16
 * 					by default it removes all empty properties.
17
 *  'factsErrorDetector' a callable that validate computed facts. It accepts a ModelInterface and returns an error description  or false. 
18
 * 					By defaults accepted raw data that produces empty facts are considered errors.
19
 */
20
class FactsFactory implements FactsFactoryInterface {
21
	
22
	protected $profile;
23
	protected $modelClass;
24
	protected $counter = array(
25
		'triple'		=> 0,			// rdf triples in facts
26
		'error'			=> 0,			// facts contains error
27
		'insane'		=> 0,			// raw data unaccepted
28
		'entity'		=> 0,			// raw data processed
29
	);
30
	
31
	
32 1
	public function __construct( array $profile =array() )
33
	{
34
		$defaults = array(
35 1
			'model'					  => 'SampleSchemaThing',
36
			'modelOptions'			  => array(),
37 1
			'entityThreshold'		  => 100, // min numbers of entity that trigger error resilence computation.
38 1
			'resilienceToErrors' 	  => 0.3, // if more than 30% of error throws a TooManyErrorException
39 1
			'resilienceToInsanes'	  => 0.9, // if more than 90% of unacceptable data throws a TooManyErrorException
40
			'source' 			  	  => null,	
41 1
			'datamapper'			  => function($rawdata){return $rawdata;},
42 1
			'dataCleaner' 		  	  => get_class().'::REMOVE_EMPTY',
43 1
			'factsErrorDetector' 	  => get_class().'::NOT_EMPTY_FACTS',
44 1
			'rawdataSanitizer' 		  => function($rawdata){return is_array($rawdata)?$rawdata:false;},
45
		);
46 1
		$this->profile = array_merge($defaults,$profile);
47 1
		$this->modelClass = class_exists($this->profile['model'])
48
			?$this->profile['model']
49 1
			:('\BOTK\Model\\'.$this->profile['model']);
50
		
51 1
		if( !class_exists($this->modelClass) || !is_subclass_of($this->modelClass, '\BOTK\ModelInterface')){
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of returns inconsistent results on some PHP versions for interfaces; you could instead use ReflectionClass::implementsInterface.
Loading history...
52
			throw new \InvalidArgumentException("The provided model ({$this->profile['model']} is unknown");	
53
		}
54 1
		if( !is_callable($this->profile['datamapper'])) {
55
			throw new \InvalidArgumentException("Invalid datamapper callback");	
56
		}
57 1
		if( !is_callable($this->profile['dataCleaner'])) {
58
			throw new \InvalidArgumentException("Invalid dataCleaner callback");	
59
		}
60 1
		if( !is_callable($this->profile['rawdataSanitizer'])) {
61
			throw new \InvalidArgumentException("Invalid rawdataSanitizer callback");	
62
		}
63 1
		if( !is_callable($this->profile['factsErrorDetector'])) {
64
			throw new \InvalidArgumentException("Invalid factsErrorDetector callback");	
65
		}
66 1
	}
67
	
68
	
69
	/**
70
	 * two level filter array, a default for dataCleaner callback
71
	 */
72 1
	public static function REMOVE_EMPTY( array $data)
73
	{
74 1
		$a = array();
75 1
	    foreach ($data as $key => $value) {
76 1
	       $a[$key] = is_array($value)?array_filter($value):$value;
77
	    }
78 1
	    return array_filter($a);
79
	}
80
	
81
	/**
82
	 * a default for dataValidator callback  
83
	 */
84 1
	public static function NOT_EMPTY_FACTS( \BOTK\ModelInterface $data)
85
	{
86 1
		return $data->getTripleCount()?false:'No facts found.';
87
	}
88
	
89
	
90
	/**
91
	 * create facts from rawdata. Please nothe that null facts does not means always an error (i.e. no facts is a fact).
92
	 * if you do not want empty facts use dataValidator
93
	 */
94 1
	public function factualize($rawData)
95
	{
96 1
		$rawdataSanitizer = $this->profile['rawdataSanitizer'];
97 1
		$validRawData = $rawdataSanitizer($rawData);
98 1
		$this->counter['entity']++;
99
		
100 1
		if (!empty($validRawData)){
101 1
			$datamapper = $this->profile['datamapper'];
102 1
			$dataCleaner = $this->profile['dataCleaner'];
103 1
			$factsErrorDetector = $this->profile['factsErrorDetector'];
104 1
			$data =$dataCleaner($datamapper($validRawData));
105 1
			$facts = call_user_func($this->modelClass.'::fromArray',$data,$this->profile['modelOptions']);
106 1
			$this->counter['triple'] += $facts->getTripleCount();
107 1
			if($error=$factsErrorDetector($facts)){
108
				$this->counter['error']++;
109 1
				throw new FactsValidatorWarning($error,$facts);
110
			}
111
		} else {
112
			$this->counter['insane']++;
113
			throw new InvalidRawDataWarning("Invalid rawdata",$rawData);
114
		}
115
		
116
		// ensure that not too many errors
117 1
		$errorRate = ($this->counter['error']/$this->counter['entity']);
118 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
119 1
				&& ( $errorRate > $this->profile['resilienceToErrors'])){
120
			$x = $this->profile['resilienceToErrors']*100;
121
			throw new TooManyErrorsException("Error rate in data processing exceeded the $x% threshold");			
122
		}
123
124
		// ensure that not too many insaness raw data
125 1
		$insaneRate = ($this->counter['insane']/$this->counter['entity']);
126 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
127 1
				&& ($insaneRate > $this->profile['resilienceToInsanes'])){
128
			$x = $this->profile['resilienceToInsanes']*100;
129
			throw new TooManyInsanesException("Unacceptable data rate exceeded the $x% threshold");			
130
		}
131
132 1
		return $facts;
133
	}
134
	
135
	
136
	public function generateLinkedDataHeader()
137
	{
138
		return call_user_func($this->modelClass.'::getTurtleHeader'); 
139
	}
140
	
141
	
142
	public function generateLinkedDataFooter()
143
	{
144
		$now = date('c');
145
		$rdf = "
146
#@prefix dct: <http://purl.org/dc/terms/> .
147
#@prefix void: <http://rdfs.org/ns/void#> .
148
#@prefix prov: <http://www.w3.org/ns/prov#> .
149
";
150
151
		// add  provenance info
152
		$rdf .= "#<> prov:generatedAtTime \"$now\"^^xsd:dateTime;";
153
		if(!empty($this->profile['source'])){
154
			$rdf.= "dct:source <{$this->profile['source']}>;";	
155
		}
156
		
157
		
158
		// add dataset info and a human readable comment as last line
159
		$rdf.= "foaf:primaryTopic <#dataset>.\n";
160
		$rdf.= "#<#dataset> a void:Dataset; void:datadump <>;void:triples {$this->counter['triple']} ;void:entities {$this->counter['entity']}.\n";
161
		$rdf.= "#Generated {$this->counter['triple']} good triples from {$this->counter['entity']} entities ({$this->counter['insane']} ignored), {$this->counter['error']} errors\n";
162
		
163
		return $rdf;
164
	}
165
166
167
	public function addToCounter($counter,$val=1)
168
	{
169
		if(!array_key_exists($counter,$this->counter)){
170
			throw new \InvalidArgumentException("Invalid counter name");
171
		}
172
		$this->counter[$counter]+= intval($val);
173
	}
174
	
175
	
176
	public function getCounters()
177
	{
178
		return $this->counter;
179
	}
180
181
}