Passed
Pull Request — master (#17)
by Enrico
01:44
created

FactsFactory   A

Complexity

Total Complexity 28

Size/Duplication

Total Lines 154
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 5

Test Coverage

Coverage 60.52%

Importance

Changes 0
Metric Value
wmc 28
lcom 1
cbo 5
dl 0
loc 154
ccs 46
cts 76
cp 0.6052
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
B __construct() 0 36 9
A generateLinkedDataFooter() 0 4 1
A getCounters() 0 4 1
A REMOVE_EMPTY() 0 8 3
A NOT_EMPTY_FACTS() 0 4 2
B factualize() 0 40 7
A generateLinkedDataHeader() 0 14 3
A addToCounter() 0 7 2
1
<?php
2
namespace BOTK;
3
4
use BOTK\Exception\FactsValidatorWarning;
5
use BOTK\Exception\InvalidRawDataWarning;
6
use BOTK\Exception\TooManyErrorsException;
7
use BOTK\Exception\TooManyInsanesException;
8
9
/**
10
 * Create structured data from an array of raw data (i.e. ie just a sequence of scalars) managing errors.
11
 * The class provides a RDF triple counter and tresholds for data processinge errors
12
 * Options:
13
 * 	'datamapper' a callable that accepts a raw data array and create structured data as an array. Must be provided.
14
 *  'rawdataSanitizer a callable that validate raw data before datamapper. It returns an array of raw data of false if rawdata is invalid.
15
 *  'dataCleaner' a callable that filters the structured data returned by datamapper before to instanciate data model, 
16
 * 					by default it removes all empty properties.
17
 *  'factsErrorDetector' a callable that validate computed facts. It accepts a ModelInterface and returns an error description  or false. 
18
 * 					By defaults accepted raw data that produces empty facts are considered errors.
19
 */
20
class FactsFactory implements FactsFactoryInterface {
21
	
22
	protected $profile;
23
	protected $modelClass;
24
	protected $counter = array(
25
		'triple'		=> 0,			// rdf triples in facts
26
		'error'			=> 0,			// facts contains error
27
		'insane'		=> 0,			// raw data unaccepted
28
		'entity'		=> 0,			// raw data processed
29
	);
30
	
31
	
32 1
	public function __construct( array $profile =array() )
33
	{
34
		$defaults = array(
35 1
			'model'					  => 'SampleSchemaThing',
36
			'modelOptions'			  => array(),
37 1
			'entityThreshold'		  => 100, // min numbers of entity that trigger error resilence computation.
38 1
			'resilienceToErrors' 	  => 0.3, // if more than 30% of error throws a TooManyErrorException
39 1
			'resilienceToInsanes'	  => 0.9, // if more than 90% of unacceptable data throws a TooManyErrorException
40
		    'source' 			  	  => null,
41
		    'documentURL' 	          => null,	// used if printing metadata to specify source file URL use empty string for <>
42 1
			'datamapper'			  => function($rawdata){return $rawdata;},
43 1
			'dataCleaner' 		  	  => get_class().'::REMOVE_EMPTY',
44 1
			'factsErrorDetector' 	  => get_class().'::NOT_EMPTY_FACTS',
45 1
			'rawdataSanitizer' 		  => function($rawdata){return is_array($rawdata)?$rawdata:false;},
46
		);
47 1
		$this->profile = array_merge($defaults,$profile);
48 1
		$this->modelClass = class_exists($this->profile['model'])
49
			?$this->profile['model']
50 1
			:('\BOTK\Model\\'.$this->profile['model']);
51
		
52 1
		if( !class_exists($this->modelClass) || !is_subclass_of($this->modelClass, '\BOTK\ModelInterface')){
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of returns inconsistent results on some PHP versions for interfaces; you could instead use ReflectionClass::implementsInterface.
Loading history...
53
			throw new \InvalidArgumentException("The provided model ({$this->profile['model']} is unknown");	
54
		}
55 1
		if( !is_callable($this->profile['datamapper'])) {
56
			throw new \InvalidArgumentException("Invalid datamapper callback");	
57
		}
58 1
		if( !is_callable($this->profile['dataCleaner'])) {
59
			throw new \InvalidArgumentException("Invalid dataCleaner callback");	
60
		}
61 1
		if( !is_callable($this->profile['rawdataSanitizer'])) {
62
			throw new \InvalidArgumentException("Invalid rawdataSanitizer callback");	
63
		}
64 1
		if( !is_callable($this->profile['factsErrorDetector'])) {
65
			throw new \InvalidArgumentException("Invalid factsErrorDetector callback");	
66
		}
67 1
	}
68
	
69
	
70
	/**
71
	 * two level filter array, a default for dataCleaner callback
72
	 */
73 1
	public static function REMOVE_EMPTY( array $data)
74
	{
75 1
		$a = array();
76 1
	    foreach ($data as $key => $value) {
77 1
	       $a[$key] = is_array($value)?array_filter($value):$value;
78
	    }
79 1
	    return array_filter($a);
80
	}
81
	
82
	/**
83
	 * a default for dataValidator callback  
84
	 */
85 1
	public static function NOT_EMPTY_FACTS( \BOTK\ModelInterface $data)
86
	{
87 1
		return $data->getTripleCount()?false:'No facts found.';
88
	}
89
	
90
	
91
	/**
92
	 * create facts from rawdata. Please nothe that null facts does not means always an error (i.e. no facts is a fact).
93
	 * if you do not want empty facts use dataValidator
94
	 */
95 1
	public function factualize($rawData)
96
	{
97 1
		$rawdataSanitizer = $this->profile['rawdataSanitizer'];
98 1
		$validRawData = $rawdataSanitizer($rawData);
99 1
		$this->counter['entity']++;
100
		
101 1
		if (!empty($validRawData)){
102 1
			$datamapper = $this->profile['datamapper'];
103 1
			$dataCleaner = $this->profile['dataCleaner'];
104 1
			$factsErrorDetector = $this->profile['factsErrorDetector'];
105 1
			$data =$dataCleaner($datamapper($validRawData));
106 1
			$facts = call_user_func($this->modelClass.'::fromArray',$data,$this->profile['modelOptions']);
107 1
			$this->counter['triple'] += $facts->getTripleCount();
108 1
			if($error=$factsErrorDetector($facts)){
109
				$this->counter['error']++;
110 1
				throw new FactsValidatorWarning($error,$facts);
111
			}
112
		} else {
113
			$this->counter['insane']++;
114
			throw new InvalidRawDataWarning("Invalid rawdata",$rawData);
115
		}
116
		
117
		// ensure that not too many errors
118 1
		$errorRate = ($this->counter['error']/$this->counter['entity']);
119 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
120 1
				&& ( $errorRate > $this->profile['resilienceToErrors'])){
121
			$x = $this->profile['resilienceToErrors']*100;
122
			throw new TooManyErrorsException("Error rate in data processing exceeded the $x% threshold");			
123
		}
124
125
		// ensure that not too many insaness raw data
126 1
		$insaneRate = ($this->counter['insane']/$this->counter['entity']);
127 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
128 1
				&& ($insaneRate > $this->profile['resilienceToInsanes'])){
129
			$x = $this->profile['resilienceToInsanes']*100;
130
			throw new TooManyInsanesException("Unacceptable data rate exceeded the $x% threshold");			
131
		}
132
133 1
		return $facts;
134
	}
135
	
136
	
137
	public function generateLinkedDataHeader()
138
	{
139
	    $metadata='';
140
	    if(!is_null($this->profile['documentURL'])) {
141
	        // Requires foaf:,  dct: and void: prefixes to be defined in model
142
	        $metadata.= "\n<{$this->profile['documentURL']}> a foaf:Document .\n";
143
	        if(!empty($this->profile['source'])){
144
	            $metadata .= "<{$this->profile['documentURL']}> dct:source <{$this->profile['source']}>. \n";
145
	        }
146
	        $metadata .= "[] a void:Dataset; void:datadump <{$this->profile['documentURL']}>;void:triples {$this->counter['triple']} ;void:entities {$this->counter['entity']}.\n";
147
	        
148
	    }
149
	    return call_user_func($this->modelClass.'::getTurtleHeader') . $metadata;
150
	}
151
	
152
	
153
	public function generateLinkedDataFooter()
154
	{
155
	    return "#Generated {$this->counter['triple']} good triples from {$this->counter['entity']} entities ({$this->counter['insane']} ignored), {$this->counter['error']} errors\n";
156
	}
157
158
159
	public function addToCounter($counter,$val=1)
160
	{
161
		if(!array_key_exists($counter,$this->counter)){
162
			throw new \InvalidArgumentException("Invalid counter name");
163
		}
164
		$this->counter[$counter]+= intval($val);
165
	}
166
	
167
	
168
	public function getCounters()
169
	{
170
		return $this->counter;
171
	}
172
173
}