Passed
Pull Request — master (#18)
by Enrico
01:42
created

FactsFactory   A

Complexity

Total Complexity 28

Size/Duplication

Total Lines 155
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 5

Test Coverage

Coverage 59.74%

Importance

Changes 0
Metric Value
wmc 28
lcom 1
cbo 5
dl 0
loc 155
ccs 46
cts 77
cp 0.5974
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
B __construct() 0 36 9
A REMOVE_EMPTY() 0 8 3
A NOT_EMPTY_FACTS() 0 4 2
B factualize() 0 40 7
A generateLinkedDataHeader() 0 15 3
A generateLinkedDataFooter() 0 4 1
A addToCounter() 0 7 2
A getCounters() 0 4 1
1
<?php
2
namespace BOTK;
3
4
use BOTK\Exception\FactsValidatorWarning;
5
use BOTK\Exception\InvalidRawDataWarning;
6
use BOTK\Exception\TooManyErrorsException;
7
use BOTK\Exception\TooManyInsanesException;
8
9
/**
10
 * Create structured data from an array of raw data (i.e. ie just a sequence of scalars) managing errors.
11
 * The class provides a RDF triple counter and tresholds for data processinge errors
12
 * Options:
13
 * 	'datamapper' a callable that accepts a raw data array and create structured data as an array. Must be provided.
14
 *  'rawdataSanitizer a callable that validate raw data before datamapper. It returns an array of raw data of false if rawdata is invalid.
15
 *  'dataCleaner' a callable that filters the structured data returned by datamapper before to instanciate data model, 
16
 * 					by default it removes all empty properties.
17
 *  'factsErrorDetector' a callable that validate computed facts. It accepts a ModelInterface and returns an error description  or false. 
18
 * 					By defaults accepted raw data that produces empty facts are considered errors.
19
 */
20
class FactsFactory implements FactsFactoryInterface {
21
	
22
	protected $profile;
23
	protected $modelClass;
24
	protected $counter = array(
25
		'triple'		=> 0,			// rdf triples in facts
26
		'error'			=> 0,			// facts contains error
27
		'insane'		=> 0,			// raw data unaccepted
28
		'entity'		=> 0,			// raw data processed
29
	);
30
	
31
	
32 1
	public function __construct( array $profile =array() )
33
	{
34
		$defaults = array(
35 1
			'model'					  => 'SampleSchemaThing',
36
			'modelOptions'			  => array(),
37 1
			'entityThreshold'		  => 100, // min numbers of entity that trigger error resilence computation.
38 1
			'resilienceToErrors' 	  => 0.3, // if more than 30% of error throws a TooManyErrorException
39 1
			'resilienceToInsanes'	  => 0.9, // if more than 90% of unacceptable data throws a TooManyErrorException
40
		    'source' 			  	  => null,
41
		    'documentURL' 	          => null,	// used if printing metadata to specify source file URL use empty string for <>
42 1
			'datamapper'			  => function($rawdata){return $rawdata;},
43 1
			'dataCleaner' 		  	  => get_class().'::REMOVE_EMPTY',
44 1
			'factsErrorDetector' 	  => get_class().'::NOT_EMPTY_FACTS',
45 1
			'rawdataSanitizer' 		  => function($rawdata){return is_array($rawdata)?$rawdata:false;},
46
		);
47 1
		$this->profile = array_merge($defaults,$profile);
48 1
		$this->modelClass = class_exists($this->profile['model'])
49
			?$this->profile['model']
50 1
			:('\BOTK\Model\\'.$this->profile['model']);
51
		
52 1
		if( !class_exists($this->modelClass) || !is_subclass_of($this->modelClass, '\BOTK\ModelInterface')){
0 ignored issues
show
Bug introduced by
Due to PHP Bug #53727, is_subclass_of returns inconsistent results on some PHP versions for interfaces; you could instead use ReflectionClass::implementsInterface.
Loading history...
53
			throw new \InvalidArgumentException("The provided model ({$this->profile['model']} is unknown");	
54
		}
55 1
		if( !is_callable($this->profile['datamapper'])) {
56
			throw new \InvalidArgumentException("Invalid datamapper callback");	
57
		}
58 1
		if( !is_callable($this->profile['dataCleaner'])) {
59
			throw new \InvalidArgumentException("Invalid dataCleaner callback");	
60
		}
61 1
		if( !is_callable($this->profile['rawdataSanitizer'])) {
62
			throw new \InvalidArgumentException("Invalid rawdataSanitizer callback");	
63
		}
64 1
		if( !is_callable($this->profile['factsErrorDetector'])) {
65
			throw new \InvalidArgumentException("Invalid factsErrorDetector callback");	
66
		}
67 1
	}
68
	
69
	
70
	/**
71
	 * two level filter array, a default for dataCleaner callback
72
	 */
73 1
	public static function REMOVE_EMPTY( array $data)
74
	{
75 1
		$a = array();
76 1
	    foreach ($data as $key => $value) {
77 1
	       $a[$key] = is_array($value)?array_filter($value):$value;
78
	    }
79 1
	    return array_filter($a);
80
	}
81
	
82
	/**
83
	 * a default for dataValidator callback  
84
	 */
85 1
	public static function NOT_EMPTY_FACTS( \BOTK\ModelInterface $data)
86
	{
87 1
		return $data->getTripleCount()?false:'No facts found.';
88
	}
89
	
90
	
91
	/**
92
	 * create facts from rawdata. Please nothe that null facts does not means always an error (i.e. no facts is a fact).
93
	 * if you do not want empty facts use dataValidator
94
	 */
95 1
	public function factualize($rawData)
96
	{
97 1
		$rawdataSanitizer = $this->profile['rawdataSanitizer'];
98 1
		$validRawData = $rawdataSanitizer($rawData);
99 1
		$this->counter['entity']++;
100
		
101 1
		if (!empty($validRawData)){
102 1
			$datamapper = $this->profile['datamapper'];
103 1
			$dataCleaner = $this->profile['dataCleaner'];
104 1
			$factsErrorDetector = $this->profile['factsErrorDetector'];
105 1
			$data =$dataCleaner($datamapper($validRawData));
106 1
			$facts = call_user_func($this->modelClass.'::fromArray',$data,$this->profile['modelOptions']);
107 1
			$this->counter['triple'] += $facts->getTripleCount();
108 1
			if($error=$factsErrorDetector($facts)){
109
				$this->counter['error']++;
110 1
				throw new FactsValidatorWarning($error,$facts);
111
			}
112
		} else {
113
			$this->counter['insane']++;
114
			throw new InvalidRawDataWarning("Invalid rawdata",$rawData);
115
		}
116
		
117
		// ensure that not too many errors
118 1
		$errorRate = ($this->counter['error']/$this->counter['entity']);
119 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
120 1
				&& ( $errorRate > $this->profile['resilienceToErrors'])){
121
			$x = $this->profile['resilienceToErrors']*100;
122
			throw new TooManyErrorsException("Error rate in data processing exceeded the $x% threshold");			
123
		}
124
125
		// ensure that not too many insaness raw data
126 1
		$insaneRate = ($this->counter['insane']/$this->counter['entity']);
127 1
		if(( $this->counter['entity'] > $this->profile['entityThreshold']) 
128 1
				&& ($insaneRate > $this->profile['resilienceToInsanes'])){
129
			$x = $this->profile['resilienceToInsanes']*100;
130
			throw new TooManyInsanesException("Unacceptable data rate exceeded the $x% threshold");			
131
		}
132
133 1
		return $facts;
134
	}
135
	
136
	
137
	public function generateLinkedDataHeader()
138
	{
139
	    $metadata='';
140
	    if(!is_null($this->profile['documentURL'])) {
141
	        // Requires foaf:,  dct: and void: prefixes to be defined in model
142
	        $metadata.= "\n<{$this->profile['documentURL']}> a foaf:Document .\n";
143
	        if(!empty($this->profile['source'])){
144
	            $metadata .= "<{$this->profile['documentURL']}> dct:source <{$this->profile['source']}>. \n";
145
	        }
146
	        $metadata .= "[] a void:Dataset; void:datadump <{$this->profile['documentURL']}>;void:triples {$this->counter['triple']} ;void:entities {$this->counter['entity']}.\n";
147
	        
148
	    }
149
	    $base = $this->modelOptions['base']['default']?? null;
0 ignored issues
show
Bug introduced by
The property modelOptions does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
150
	    return call_user_func($this->modelClass.'::getTurtleHeader',$base) . $metadata;
151
	}
152
	
153
	
154
	public function generateLinkedDataFooter()
155
	{
156
	    return "#Generated {$this->counter['triple']} good triples from {$this->counter['entity']} entities ({$this->counter['insane']} ignored), {$this->counter['error']} errors\n";
157
	}
158
159
160
	public function addToCounter($counter,$val=1)
161
	{
162
		if(!array_key_exists($counter,$this->counter)){
163
			throw new \InvalidArgumentException("Invalid counter name");
164
		}
165
		$this->counter[$counter]+= intval($val);
166
	}
167
	
168
	
169
	public function getCounters()
170
	{
171
		return $this->counter;
172
	}
173
174
}