Completed
Push — master ( 48c049...4c7002 )
by Enrico
01:56
created

FactsFactory::factualize()   C

Complexity

Conditions 7
Paths 5

Size

Total Lines 41
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 20
CRAP Score 7.8542

Importance

Changes 0
Metric Value
dl 0
loc 41
ccs 20
cts 27
cp 0.7407
rs 6.7272
c 0
b 0
f 0
cc 7
eloc 29
nc 5
nop 1
crap 7.8542
1
<?php
2
namespace BOTK;
3
4
use BOTK\Exception\FactsValidatorWarning;
5
use BOTK\Exception\InvalidRawDataWarning;
6
use BOTK\Exception\TooManyErrorsException;
7
use BOTK\Exception\TooManyInsanesException;
8
9
/**
10
 * Create structured data from an array of raw data (i.e. ie just a sequence of scalars) managing errors.
11
 * The class provides a RDF triple counter and tresholds for data processinge errors
12
 * Options:
13
 * 	'datamapper' a callable that accepts a raw data array and create structured data as an array. Must be provided.
14
 *  'rawdataSanitizer a callable that validate raw data before datamapper. It returns an array of raw data of false if rawdata is invalid.
15
 *  'dataCleaner' a callable that filters the structured data returned by datamapper before to instanciate data model, 
16
 * 					by default it removes all empty properties.
17
 *  'factsErrorDetector' a callable that validate computed facts. It accepts a ModelInterface and returns an error description  or false. 
18
 * 					By defaults accepted raw data that produces empty facts are considered errors.
19
 */
20
class FactsFactory implements FactsFactoryInterface {
21
	
22
	protected $profile;
23
	protected $modelClass;
24
	protected $counter = array(
25
		'triple'		=> 0,			// rdf triples in facts
26
		'error'			=> 0,			// facts contains error
27
		'insane'		=> 0,			// raw data unaccepted
28
		'entity'		=> 0,			// raw data processed
29
	);
30
	
31
	
32 1
	public function __construct( array $profile =array() )
33
	{
34
		$defaults = array(
35 1
			'model'					  => 'LocalBusiness',
36
			'modelOptions'			  => array(),
37 1
			'entityThreshold'		  => 100, // min numbers of entity that trigger error resilence computation.
38 1
			'resilienceToErrors' 	  => 0.3, // if more than 30% of error throws a TooManyErrorException
39 1
			'resilienceToInsanes'	  => 0.9, // if more than 90% of unacceptable data throws a TooManyErrorException
40
			'source' 			  	  => null,	
41
			'datamapper'			  => function($rawdata){return $rawdata;},
42 1
			'dataCleaner' 		  	  => get_class().'::REMOVE_EMPTY',
43 1
			'factsErrorDetector' 	  => get_class().'::NOT_EMPTY_FACTS',
44
			'rawdataSanitizer' 		  => function($rawdata){return is_array($rawdata)?$rawdata:false;},
45
		);
46 1
		$this->profile = array_merge($defaults,$profile);
47 1
		$this->modelClass = class_exists($this->profile['model'])
48
			?$this->profile['model']
49 1
			:('\BOTK\Model\\'.$this->profile['model']);
50
		
51 1
		if( !class_exists($this->modelClass) || !is_subclass_of($this->modelClass, '\BOTK\ModelInterface')){
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of returns inconsistent results on some PHP versions for interfaces; you could instead use ReflectionClass::implementsInterface.
Loading history...
52
			throw new \InvalidArgumentException("The provided model ({$this->profile['model']} is unknown");	
53
		}
54 1
		if( !is_callable($this->profile['datamapper'])) {
55
			throw new \InvalidArgumentException("Invalid datamapper callback");	
56
		}
57 1
		if( !is_callable($this->profile['dataCleaner'])) {
58
			throw new \InvalidArgumentException("Invalid dataCleaner callback");	
59
		}
60 1
		if( !is_callable($this->profile['rawdataSanitizer'])) {
61
			throw new \InvalidArgumentException("Invalid rawdataSanitizer callback");	
62
		}
63 1
		if( !is_callable($this->profile['factsErrorDetector'])) {
64
			throw new \InvalidArgumentException("Invalid factsErrorDetector callback");	
65
		}
66 1
	}
67
	
68
	
69
	/**
70
	 * two level filter array, a default for dataCleaner callback
71
	 */
72 1
	public static function REMOVE_EMPTY( array $data)
73
	{
74 1
		$a = array();
75 1
	    foreach ($data as $key => $value) {
76 1
	       $a[$key] = is_array($value)?array_filter($value):$value;
77
	    }
78 1
	    return array_filter($a);
79
	}
80
	
81
	/**
82
	 * a default for dataValidator callback  
83
	 */
84 1
	public static function NOT_EMPTY_FACTS( \BOTK\ModelInterface $data)
85
	{
86 1
		return $data->getTripleCount()?false:'No facts found.';
87
	}
88
	
89
	
90
	/**
91
	 * create facts from rawdata. Please nothe that null facts does not means always an error (i.e. no facts is a fact).
92
	 * if you do not want empty facts use dataValidator
93
	 */
94 1
	public function factualize($rawData)
95
	{
96 1
		$rawdataSanitizer = $this->profile['rawdataSanitizer'];
97 1
		$validRawData = $rawdataSanitizer($rawData);
98
		
99 1
		if (!empty($validRawData)){
100 1
			$this->counter['entity']++;
101 1
			$datamapper = $this->profile['datamapper'];
102 1
			$dataCleaner = $this->profile['dataCleaner'];
103 1
			$factsErrorDetector = $this->profile['factsErrorDetector'];
104 1
			$data =$dataCleaner($datamapper($validRawData));
105 1
			$facts = call_user_func($this->modelClass.'::fromArray',$data,$this->profile['modelOptions']);
106 1
			$this->counter['triple'] += $facts->getTripleCount();
107 1
			if(!$error=$factsErrorDetector($facts)){
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
108
			} else {	
109
				$this->counter['error']++;
110 1
				throw new FactsValidatorWarning($error,$facts);
111
			}
112
		} else {
113
			$this->counter['insane']++;
114
			throw new InvalidRawDataWarning("Invalid rawdata",$rawData);
115
		}
116
		
117
		// ensure that not too many errors
118 1
		$errorRate = $this->counter['error']/$this->counter['entity'];
119 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
120 1
				&& ( $errorRate > $this->profile['resilienceToErrors'])){
121
			$x = $this->profile['resilienceToErrors']*100;
122
			throw new TooManyErrorsException("Error rate in data processing exceeded the $x% threshold");			
123
		}
124
125
		// ensure that not too many insaness raw data
126 1
		$insaneRate = $this->counter['insane']/$this->counter['entity'];
127 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
128 1
				&& ($insaneRate > $this->profile['resilienceToInsanes'])){
129
			$x = $this->profile['resilienceToInsanes']*100;
130
			throw new TooManyInsanesException("Unacceptable data rate exceeded the $x% threshold");			
131
		}
132
133 1
		return $facts;
134
	}
135
	
136
	
137
	public function generateLinkedDataHeader()
138
	{
139
		return call_user_func($this->modelClass.'::getTurtleHeader'); 
140
	}
141
	
142
	
143
	public function generateLinkedDataFooter()
144
	{
145
		$now = date('c');
146
		$rdf = "\n<> ";
147
		$this->counter['triple'] += 6;
148
149
		// add  provenance info
150
		$rdf .= "prov:generatedAtTime \"$now\"^^xsd:dateTime;";
151
		if(!empty($this->profile['source'])){
152
			$rdf.= "dct:source <{$this->profile['source']}>;";	
153
			$this->counter['triple']++;
154
		}
155
		
156
		// add dataset info and a human readable comment as last line
157
		$rdf.= "foaf:primaryTopic <#dataset>.\n";
158
		$rdf.= "<#dataset> a void:Dataset; void:datadump <>;void:triples {$this->counter['triple']} ;void:entities {$this->counter['entity']}.\n";
159
		$rdf.= "# Generated {$this->counter['triple']} good triples from {$this->counter['entity']} entities ({$this->counter['insane']} ignored), {$this->counter['error']} errors\n";
160
		
161
		return $rdf;
162
	}
163
164
165
	public function addToCounter($counter,$val=1)
166
	{
167
		if(!array_key_exists($counter,$this->counter)){
168
			throw new \InvalidArgumentException("Invalid counter name");
169
		}
170
		$this->counter[$counter]+= intval($val);
171
	}
172
	
173
	
174
	public function getCounters()
175
	{
176
		return $this->counter;
177
	}
178
179
}