Test Setup Failed
Push — master ( 51a98e...17386c )
by Enrico
02:02 queued 10s
created

Levenshtein   A

Complexity

Total Complexity 23

Size/Duplication

Total Lines 87
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 0
Metric Value
wmc 23
lcom 1
cbo 1
dl 0
loc 87
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A setSensitivity() 0 7 2
B findClosestUriToToken() 0 29 11
B run() 0 39 10
1
<?php
2
/*
3
 * Copyright (c) 2019 LinkedData.Center. All rights reserved.
4
 */
5
namespace BOTK\Reasoner;
6
7
/**
8
 * input sensitivity: an acceptable rate for changes (0-1)
9
 * Input stream specification:
10
 *   The input stream is divided in two part, the first part contains, one for row,
11
 *   a couple of object Uri,  string
12
 *   then there is an empty line
13
 *   the second part contains, one for row, a couple of an subject uri and a token to be searched in the string
14
 *   
15
 * Output stream:
16
 *   rules to insert in a graphName, RDF statements that links subject and object having  
17
 *   the minimun Levenshtein distance evaluated on string tokens
18
 */
19
class Levenshtein extends AbstractReasoner
20
{
21
    protected $sensitivity=0.2;
22
   
23
    public function setSensitivity( float $sensitivity )
24
    {
25
        assert( $sentistivity >= 0 && $sentistivity <=1 );
0 ignored issues
show
Bug introduced by
The variable $sentistivity does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
26
        
27
        $this->sensitivity = $sensitivity;
28
        return $this ;
29
    }
30
      
31
 		
32
    /**
33
	 * targets must be an hash of tokenized strings
34
     */
35
	protected function findClosestUriToToken( $token, array $targets) 
36
	{
37
	    $tokenLength=strlen($token);
38
	    $absSensitivity= round($tokenLength * $this->sensitivity);
39
	    $minLength=max( 1, $tokenLength - $absSensitivity);
40
	    $maxLength=$tokenLength + $absSensitivity;
41
	    $closestTarget = null;
42
	    $shortest = -1;
43
	    foreach($targets as $targetUri=>$targetTokens){
44
	        if( $shortest === 0 ) break; // a perfect match found.
45
	        foreach ($targetTokens as $targetToken ){
46
	            $targetTokenLength=strlen($targetToken);
47
	            if( ($targetTokenLength >= $minLength) && ($targetTokenLength <= $maxLength)) {
48
	                $lev = levenshtein($token, $targetToken);
49
	                if ($lev <= $shortest || $shortest < 0) {
50
	                    $closestTarget  = $targetUri;
51
	                    $shortest = $lev;
52
	                }
53
	                if( $shortest === 0 ) break; // a perfect match found.
54
	            }
55
	        }
56
	    }
57
	    
58
	    if( $closestTarget && ($shortest <= $absSensitivity) ){
59
	        return $closestTarget;
60
	    } else {
61
	        return false;
62
	    }
63
	}
64
	
65
	
66
	public function run ()
67
	{
68
	    // analyze first input stream part: read targets
69
	    // only tokens with length > sensitivity are considered.
70
	    $targets=[];
71
	    
72
	    while (($data=fgetcsv($this->inputStream)) && isset($data[1])) {
73
	        assert( $data[0] && $data[1] ) ;
74
	        
75
	        $subjectUri=$data[0];
76
	        $string=$data[1];
77
	        
78
	        // tokenize string
79
	        $tokenizedString=[];
80
	        $tok= strtok($string, ' ');
81
	        while ($tok !== false) {
82
	            // ignore too short tokens
83
	            if( floor(strlen($tok)*$this->sensitivity)>0) { $tokenizedString[]=strtolower($tok);}
84
                $tok = strtok(' ');
85
	        }
86
	        
87
	        if( !empty($tokenizedString) ) { $targets[$subjectUri]=$tokenizedString;}
88
	    }
89
	    
90
	    //analyze second input stream part: token to be searched in targets
91
	    fwrite($this->outputStream, "INSERT DATA { GRAPH <$this->graphName> {\n");
92
	    
93
	    while (($data = fgetcsv($this->inputStream)) !== FALSE) {
94
	       
95
	        assert( $data[0] && $data[1] ) ;
96
	        
97
	        list ($uri, $token)=$data;
98
	        if( $closestUri=$this->findClosestUriToToken( strtolower($token), $targets) ){
99
	            fprintf($this->outputStream, "<%s> <%s> <%s>.\n", $closestUri, $this->property, $uri);
100
	        }
101
	        
102
	    }
103
	    fwrite($this->outputStream, "}}");	    
104
	}
105
}