Completed
Push — master ( 081071...48c049 )
by Enrico
01:58
created

FactsFactory::addToTripleCounter()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 5
ccs 0
cts 3
cp 0
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 1
crap 2
1
<?php
2
namespace BOTK;
3
4
use BOTK\Exception\FactsValidatorWarning;
5
use BOTK\Exception\InvalidRawDataWarning;
6
use BOTK\Exception\TooManyErrorsException;
7
use BOTK\Exception\TooManyInsanesException;
8
9
/**
10
 * Create structured data from an array of raw data (i.e. ie just a sequence of scalars) managing errors.
11
 * The class provides a RDF triple counter and tresholds for data processinge errors
12
 * Options:
13
 * 	'datamapper' a callable that accepts a raw data array and create structured data as an array. Must be provided.
14
 *  'rawdataSanitizer a callable that validate raw data before datamapper. It returns an array of raw data of false if rawdata is invalid.
15
 *  'dataCleaner' a callable that filters the structured data returned by datamapper before to instanciate data model, 
16
 * 					by default it removes all empty properties.
17
 *  'factsErrorDetector' a callable that validate computed facts. It accepts a ModelInterface and returns an error description  or false. 
18
 * 					By defaults accepted raw data that produces empty facts are considered errors.
19
 */
20
class FactsFactory implements FactsFactoryInterface {
21
	
22
	protected $profile;
23
	protected $modelClass;
24
	protected $counter = array(
25
		'triple'		=> 0,			// rdf triples in facts
26
		'error'			=> 0,			// facts contains error
27
		'insane'		=> 0,			// raw data unaccepted
28
		'entity'		=> 0,			// raw data processed
29
	);
30
	
31
	
32 1
	public function __construct( array $profile =array() )
33
	{
34
		$defaults = array(
35 1
			'model'					  => 'LocalBusiness',
36
			'modelOptions'			  => array(),
37 1
			'entityThreshold'		  => 100, // min numbers of entity that trigger error resilence computation.
38 1
			'resilienceToErrors' 	  => 0.3, // if more than 30% of error throws a TooManyErrorException
39 1
			'resilienceToInsanes'	  => 0.9, // if more than 90% of unacceptable data throws a TooManyErrorException
40
			'source' 			  	  => null,	
41
			'datamapper'			  => function($rawdata){return $rawdata;},
42 1
			'dataCleaner' 		  	  => get_class().'::REMOVE_EMPTY',
43 1
			'factsErrorDetector' 	  => get_class().'::NOT_EMPTY_FACTS',
44
			'rawdataSanitizer' 		  => function($rawdata){return is_array($rawdata)?$rawdata:false;},
45
		);
46 1
		$this->profile = array_merge($defaults,$profile);
47 1
		$this->modelClass = class_exists($this->profile['model'])
48
			?$this->profile['model']
49 1
			:('\BOTK\Model\\'.$this->profile['model']);
50
		
51 1
		if( !class_exists($this->modelClass) || !is_subclass_of($this->modelClass, '\BOTK\ModelInterface')){
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of returns inconsistent results on some PHP versions for interfaces; you could instead use ReflectionClass::implementsInterface.
Loading history...
52
			throw new \InvalidArgumentException("The provided model ({$this->profile['model']} is unknown");	
53
		}
54 1
		if( !is_callable($this->profile['datamapper'])) {
55
			throw new \InvalidArgumentException("Invalid datamapper callback");	
56
		}
57 1
		if( !is_callable($this->profile['dataCleaner'])) {
58
			throw new \InvalidArgumentException("Invalid dataCleaner callback");	
59
		}
60 1
		if( !is_callable($this->profile['rawdataSanitizer'])) {
61
			throw new \InvalidArgumentException("Invalid rawdataSanitizer callback");	
62
		}
63 1
		if( !is_callable($this->profile['factsErrorDetector'])) {
64
			throw new \InvalidArgumentException("Invalid factsErrorDetector callback");	
65
		}
66 1
	}
67
	
68
	
69
	/**
70
	 * two level filter array, a default for dataCleaner callback
71
	 */
72 1
	public static function REMOVE_EMPTY( array $data)
73
	{
74 1
		$a = array();
75 1
	    foreach ($data as $key => $value) {
76 1
	       $a[$key] = is_array($value)?array_filter($value):$value;
77
	    }
78 1
	    return array_filter($a);
79
	}
80
	
81
	/**
82
	 * a default for dataValidator callback  
83
	 */
84 1
	public static function NOT_EMPTY_FACTS( \BOTK\ModelInterface $data)
85
	{
86 1
		return $data->getTripleCount()?false:'No facts found.';
87
	}
88
	
89
	
90
	/**
91
	 * create facts from rawdata. Please nothe that null facts does not means always an error (i.e. no facts is a fact).
92
	 * if you do not want empty facts use dataValidator
93
	 */
94 1
	public function factualize($rawData)
95
	{
96 1
		$rawdataSanitizer = $this->profile['rawdataSanitizer'];
97 1
		$validRawData = $rawdataSanitizer($rawData);
98
		
99 1
		if (!empty($validRawData)){
100 1
			$this->counter['entity']++;
101 1
			$datamapper = $this->profile['datamapper'];
102 1
			$dataCleaner = $this->profile['dataCleaner'];
103 1
			$factsErrorDetector = $this->profile['factsErrorDetector'];
104 1
			$data =$dataCleaner($datamapper($validRawData));
105 1
			$facts = call_user_func($this->modelClass.'::fromArray',$data,$this->profile['modelOptions']);
106 1
			$this->counter['triple'] += $facts->getTripleCount();
107 1
			if(!$error=$factsErrorDetector($facts)){
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
108
			} else {	
109
				$this->counter['error']++;
110 1
				throw new FactsValidatorWarning($error,$facts);
111
			}
112
		} else {
113
			$this->counter['insane']++;
114
			throw new InvalidRawDataWarning("Invalid rawdata",$rawData);
115
		}
116
		
117
		// ensure that not too many errors
118 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
119 1
				&& ($this->counter['error']/$this->counter['entity']) > $this->profile['resilienceToErrors']){
120
			$x = $this->profile['resilienceToErrors']*100;
121
			throw new TooManyErrorsException("Error rate in data processing exceeded the $x% threshold");			
122
		}
123
124
		// ensure that not too many insaness raw data	
125 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
126 1
				&& ($this->counter['insane']/$this->counter['entity']) > $this->profile['resilienceToInsanes']){
127
			$x = $this->profile['resilienceToInsaness']*100;
128
			throw new TooManyInsanesException("Unacceptable data rate exceeded the $x% threshold");			
129
		}
130
131 1
		return $facts;
132
	}
133
	
134
	
135
	public function generateLinkedDataHeader()
136
	{
137
		return call_user_func($this->modelClass.'::getTurtleHeader'); 
138
	}
139
	
140
	
141
	public function generateLinkedDataFooter()
142
	{
143
		$now = date('c');
144
		$rdf = "\n<> ";
145
		$this->counter['triple'] += 6;
146
147
		// add  provenance info
148
		$rdf .= "prov:generatedAtTime \"$now\"^^xsd:dateTime;";
149
		if(!empty($this->profile['source'])){
150
			$rdf.= "dct:source <{$this->profile['source']}>;";	
151
			$this->counter['triple']++;
152
		}
153
		
154
		// add dataset info and a human readable comment as last line
155
		$rdf.= "foaf:primaryTopic <#dataset>.\n";
156
		$rdf.= "<#dataset> a void:Dataset; void:datadump <>;void:triples {$this->counter['triple']} ;void:entities {$this->counter['entity']}.\n";
157
		$rdf.= "# Generated {$this->counter['triple']} good triples from {$this->counter['entity']} entities ({$this->counter['insane']} ignored), {$this->counter['error']} errors\n";
158
		
159
		return $rdf;
160
	}
161
162
163
	public function addToCounter($counter,$val=1)
164
	{
165
		if(!array_key_exists($counter,$this->counter)){
166
			throw new \InvalidArgumentException("Invalid counter name");
167
		}
168
		$this->counter[$counter]+= intval($val);
169
	}
170
	
171
	
172
	public function getCounters()
173
	{
174
		return $this->counter;
175
	}
176
177
}