FactsFactory   A
last analyzed

Complexity

Total Complexity 28

Size/Duplication

Total Lines 153
Duplicated Lines 0 %

Test Coverage

Coverage 59.21%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 79
c 1
b 0
f 0
dl 0
loc 153
ccs 45
cts 76
cp 0.5921
rs 10
wmc 28

8 Methods

Rating   Name   Duplication   Size   Complexity  
A addToCounter() 0 6 2
A generateLinkedDataHeader() 0 13 3
A generateLinkedDataFooter() 0 3 1
A REMOVE_EMPTY() 0 7 3
A NOT_EMPTY_FACTS() 0 3 2
B __construct() 0 34 9
B factualize() 0 40 7
A getCounters() 0 3 1
1
<?php
2
namespace BOTK;
3
4
use BOTK\Exception\FactsValidatorWarning;
5
use BOTK\Exception\InvalidRawDataWarning;
6
use BOTK\Exception\TooManyErrorsException;
7
use BOTK\Exception\TooManyInsanesException;
8
9
/**
10
 * Create structured data from an array of raw data (i.e. ie just a sequence of scalars) managing errors.
11
 * The class provides a RDF triple counter and tresholds for data processinge errors
12
 * Options:
13
 * 	'datamapper' a callable that accepts a raw data array and create structured data as an array. Must be provided.
14
 *  'rawdataSanitizer a callable that validate raw data before datamapper. It returns an array of raw data of false if rawdata is invalid.
15
 *  'dataCleaner' a callable that filters the structured data returned by datamapper before to instanciate data model, 
16
 * 					by default it removes all empty properties.
17
 *  'factsErrorDetector' a callable that validate computed facts. It accepts a ModelInterface and returns an error description  or false. 
18
 * 					By defaults accepted raw data that produces empty facts are considered errors.
19
 */
20
class FactsFactory implements FactsFactoryInterface {
21
	
22
	protected $profile;
23
	protected $modelClass;
24
	protected $counter = array(
25
		'triple'		=> 0,			// rdf triples in facts
26
		'error'			=> 0,			// facts contains error
27
		'insane'		=> 0,			// raw data unaccepted
28
		'entity'		=> 0,			// raw data processed
29
	);
30
	
31
	
32 1
	public function __construct( array $profile =array() )
33
	{
34
		$defaults = array(
35 1
			'model'					  => 'SampleSchemaThing',
36
			'modelOptions'			  => array(),
37 1
			'entityThreshold'		  => 100, // min numbers of entity that trigger error resilence computation.
38 1
			'resilienceToErrors' 	  => 0.3, // if more than 30% of error throws a TooManyErrorException
39 1
		    'resilienceToInsanes'	  => 0.9, // if more than 90% of unacceptable data throws a TooManyErrorException
40
		    'documentURL' 	          => null,	// used if printing metadata to specify source file URL use empty string for <>
41
		    'source' 			  	  => null,
42 1
			'datamapper'			  => function($rawdata){return $rawdata;},
43 1
			'dataCleaner' 		  	  => get_class().'::REMOVE_EMPTY',
44 1
			'factsErrorDetector' 	  => get_class().'::NOT_EMPTY_FACTS',
45 1
			'rawdataSanitizer' 		  => function($rawdata){return is_array($rawdata)?$rawdata:false;},
46
		);
47 1
		$this->profile = array_merge($defaults,$profile);
48 1
		$this->modelClass = class_exists($this->profile['model'])
49
			?$this->profile['model']
50 1
			:('\BOTK\Model\\'.$this->profile['model']);
51
		
52 1
		if( !class_exists($this->modelClass) || !is_subclass_of($this->modelClass, '\BOTK\ModelInterface')){
53
			throw new \InvalidArgumentException("The provided model ({$this->profile['model']} is unknown");	
54
		}
55 1
		if( !is_callable($this->profile['datamapper'])) {
56
			throw new \InvalidArgumentException("Invalid datamapper callback");	
57
		}
58 1
		if( !is_callable($this->profile['dataCleaner'])) {
59
			throw new \InvalidArgumentException("Invalid dataCleaner callback");	
60
		}
61 1
		if( !is_callable($this->profile['rawdataSanitizer'])) {
62
			throw new \InvalidArgumentException("Invalid rawdataSanitizer callback");	
63
		}
64 1
		if( !is_callable($this->profile['factsErrorDetector'])) {
65
			throw new \InvalidArgumentException("Invalid factsErrorDetector callback");	
66
		}
67 1
	}
68
	
69
	
70
	/**
71
	 * two level filter array, a default for dataCleaner callback
72
	 */
73 1
	public static function REMOVE_EMPTY( array $data)
74
	{
75 1
		$a = array();
76 1
	    foreach ($data as $key => $value) {
77 1
	       $a[$key] = is_array($value)?array_filter($value):$value;
78
	    }
79 1
	    return array_filter($a);
80
	}
81
	
82
	
83
	/**
84
	 * a default for dataValidator callback  
85 1
	 */
86
	public static function NOT_EMPTY_FACTS( \BOTK\ModelInterface $data)
87 1
	{
88
		return $data->getTripleCount()?false:'No facts found.';
89
	}
90
	
91
	
92
	/**
93
	 * create facts from rawdata. Please nothe that null facts does not means always an error (i.e. no facts is a fact).
94
	 * if you do not want empty facts use dataValidator
95 1
	 */
96
	public function factualize($rawData,  object $globalStorage=null)
97 1
	{
98 1
	    
99 1
		$rawdataSanitizer = $this->profile['rawdataSanitizer'];
100
		$validRawData = $rawdataSanitizer($rawData);
101 1
		$this->counter['entity']++;
102 1
		
103 1
		if (!empty($validRawData)){
104 1
			$datamapper = $this->profile['datamapper'];
105 1
			$dataCleaner = $this->profile['dataCleaner'];
106 1
			$factsErrorDetector = $this->profile['factsErrorDetector'];
107 1
			$data =$dataCleaner($datamapper($validRawData));
108 1
			$facts = call_user_func($this->modelClass.'::fromArray', $data, $this->profile['modelOptions'], $globalStorage);
109
			$this->counter['triple'] += $facts->getTripleCount();
110 1
			if($error=$factsErrorDetector($facts)){
111
				$this->counter['error']++;
112
				throw new FactsValidatorWarning($error,$facts);
113
			}
114
		} else {
115
			$this->counter['insane']++;
116
			throw new InvalidRawDataWarning("Invalid rawdata",$rawData);
117
		}
118 1
		
119 1
		// ensure that not too many errors
120 1
		$errorRate = ($this->counter['error']/$this->counter['entity']);
121
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
122
				&& ( $errorRate > $this->profile['resilienceToErrors'])){
123
			$x = $this->profile['resilienceToErrors']*100;
124
			throw new TooManyErrorsException("Error rate in data processing exceeded the $x% threshold");			
125
		}
126 1
127 1
		// ensure that not too many insaness raw data
128 1
		$insaneRate = ($this->counter['insane']/$this->counter['entity']);
129
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
130
				&& ($insaneRate > $this->profile['resilienceToInsanes'])){
131
			$x = $this->profile['resilienceToInsanes']*100;
132
			throw new TooManyInsanesException("Unacceptable data rate exceeded the $x% threshold");			
133 1
		}
134
135
		return $facts;
136
	}
137
	
138
	
139
	public function generateLinkedDataHeader()
140
	{
141
	    $metadata='';
142
	    if(!is_null($this->profile['documentURL'])) {
143
	        // Requires foaf:,  dct: and void: prefixes to be defined in model
144
	        $metadata.= "\n<{$this->profile['documentURL']}> a foaf:Document .\n";
145
	        if(!empty($this->profile['source'])){
146
	            $metadata .= "<{$this->profile['documentURL']}> dct:source <{$this->profile['source']}>. \n";
147
	        }
148
	        $metadata .= "[] a void:Dataset; dct:format \"text/turtle\" ; void:datadump <{$this->profile['documentURL']}>;void:triples {$this->counter['triple']} ;void:entities {$this->counter['entity']}.\n";	        
149
	    }
150
	    $base = $this->profile['modelOptions']['base']['default']?? null;
151
	    return call_user_func($this->modelClass.'::getTurtleHeader',$base) . $metadata;
152
	}
153
	
154
	
155
	public function generateLinkedDataFooter()
156
	{
157
	    return "#Generated {$this->counter['triple']} good triples from {$this->counter['entity']} entities ({$this->counter['insane']} ignored), {$this->counter['error']} errors\n";
158
	}
159
160
161
	public function addToCounter($counter,$val=1)
162
	{
163
		if(!array_key_exists($counter,$this->counter)){
164
			throw new \InvalidArgumentException("Invalid counter name");
165
		}
166
		$this->counter[$counter]+= intval($val);
167
	}
168
	
169
	
170
	public function getCounters()
171
	{
172
		return $this->counter;
173
	}
174
175
}