| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  * CSVelte: Slender, elegant CSV for PHP | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * Inspired by Python's CSV module and Frictionless Data and the W3C's CSV | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * standardization efforts, CSVelte was written in an effort to take all the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * suck out of working with CSV. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * @copyright Copyright (c) 2018 Luke Visinoni | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * @author    Luke Visinoni <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * @license   See LICENSE file (MIT license) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | namespace CSVelte\Sniffer; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | use function Noz\collect; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | use Noz\Collection\Collection; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | use function Stringy\create as s; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | class SniffDelimiterByDistribution extends AbstractSniffer | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |      * Guess delimiter in a string of data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |      * Guesses the delimiter in a data set by analyzing which of the provided possible delimiter characters is most | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |      * evenly distributed (horizontally) across the dataset. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |      * @param string $data The data to analyze | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |      * @return string[] | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 30 |  |  |      */ | 
            
                                                        
            
                                    
            
            
                | 31 | 1 |  |     public function sniff($data) | 
            
                                                        
            
                                    
            
            
                | 32 |  |  |     { | 
            
                                                        
            
                                    
            
            
                | 33 | 1 |  |         $lineTerminator = $this->getOption('lineTerminator') ?: "\n"; | 
            
                                                        
            
                                    
            
            
                | 34 | 1 |  |         $delimiters = $this->getOption('delimiters'); | 
            
                                                        
            
                                    
            
            
                | 35 | 1 |  |         $lines = collect(explode($lineTerminator, $this->removeQuotedStrings($data))); | 
            
                                                        
            
                                    
            
            
                | 36 |  |  |         return collect($delimiters)->flip()->map(function($x, $char) use ($lines) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 37 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 38 |  |  |                 // standard deviation | 
            
                                                        
            
                                    
            
            
                | 39 |  |  |                 $sd = $lines->map(function($line, $line_no) use ($char) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 40 | 1 |  |                     $delimited = collect(s($line)->split($char)) | 
            
                                                        
            
                                    
            
            
                | 41 |  |  |                         ->map(function($str) { | 
            
                                                        
            
                                    
            
            
                | 42 | 1 |  |                             return $str->length(); | 
            
                                                        
            
                                    
            
            
                | 43 | 1 |  |                         }); | 
            
                                                        
            
                                    
            
            
                | 44 |  |  |                     // standard deviation | 
            
                                                        
            
                                    
            
            
                | 45 | 1 |  |                     $avg = $delimited->average(); | 
            
                                                        
            
                                    
            
            
                | 46 | 1 |  |                     return sqrt($delimited->fold(function($d, $len) use ($avg) { | 
            
                                                        
            
                                    
            
            
                | 47 | 1 |  |                             return $d->add(pow($len - $avg, 2)); | 
            
                                                        
            
                                    
            
            
                | 48 | 1 |  |                         }, new Collection) | 
            
                                                        
            
                                    
            
            
                | 49 | 1 |  |                             ->sum() / $delimited->count()); | 
            
                                                        
            
                                    
            
            
                | 50 | 1 |  |                 }); | 
            
                                                        
            
                                    
            
            
                | 51 | 1 |  |                 return $sd->average(); | 
            
                                                        
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 53 | 1 |  |             }) | 
            
                                                        
            
                                    
            
            
                | 54 | 1 |  |             ->sort() | 
            
                                                        
            
                                    
            
            
                | 55 | 1 |  |             ->getKeyAt(1); | 
            
                                                        
            
                                    
            
            
                | 56 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 57 |  |  | } |