| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  * CSVelte: Slender, elegant CSV for PHP | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * Inspired by Python's CSV module and Frictionless Data and the W3C's CSV | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * standardization efforts, CSVelte was written in an effort to take all the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * suck out of working with CSV. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * @copyright Copyright (c) 2018 Luke Visinoni | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * @author    Luke Visinoni <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * @license   See LICENSE file (MIT license) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | namespace CSVelte\Sniffer; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | use function Noz\collect; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | use Noz\Collection\Collection; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | use function Stringy\create as s; | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 19 |  |  | class SniffDelimiterByConsistency extends AbstractSniffer | 
            
                                                                        
                            
            
                                    
            
            
                | 20 |  |  | { | 
            
                                                                        
                            
            
                                    
            
            
                | 21 |  |  |     /** | 
            
                                                                        
                            
            
                                    
            
            
                | 22 |  |  |      * Guess delimiter in a string of data | 
            
                                                                        
                            
            
                                    
            
            
                | 23 |  |  |      * | 
            
                                                                        
                            
            
                                    
            
            
                | 24 |  |  |      * Guesses the delimiter character by analyzing the count consistency of possible delimiters across several lines. | 
            
                                                                        
                            
            
                                    
            
            
                | 25 |  |  |      * Basically, the character that occurs roughly the same number of times on each line will be returned. It is | 
            
                                                                        
                            
            
                                    
            
            
                | 26 |  |  |      * possible for this sniffer to return multiple characters if there is a tie. | 
            
                                                                        
                            
            
                                    
            
            
                | 27 |  |  |      * | 
            
                                                                        
                            
            
                                    
            
            
                | 28 |  |  |      * @param string $data The data to analyze | 
            
                                                                        
                            
            
                                    
            
            
                | 29 |  |  |      * | 
            
                                                                        
                            
            
                                    
            
            
                | 30 |  |  |      * @return string[] | 
            
                                                                        
                            
            
                                    
            
            
                | 31 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 | 2 |  |     public function sniff($data) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |         // build a table of characters and their frequencies for each line. We | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         // will use this frequency table to then build a table of frequencies of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |         // each frequency (in 10 lines, "tab" occurred 5 times on 7 of those | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |         // lines, 6 times on 2 lines, and 7 times on 1 line) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 | 2 |  |         $delimiters = $this->getOption('delimiters'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 | 2 |  |         $lineTerminator = $this->getOption('lineTerminator') ?: "\n"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |         // @todo it would probably make for more consistent results if you popped the last line since it will most likely be truncated due to the arbitrary nature of the sample size | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 | 2 |  |         $lines = collect(explode($lineTerminator, $this->removeQuotedStrings($data))); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |         $frequencies = $lines->map(function($line) use ($delimiters) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 | 2 |  |             $preferred = array_flip($delimiters); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 | 2 |  |             return collect($preferred) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |                 ->map(function() { return 0; }) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 | 2 |  |                 ->merge(collect(s($line)->chars())->frequency()->kintersect($preferred)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 | 2 |  |                 ->toArray(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 | 2 |  |         }); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         // now determine the mode for each char to decide the "expected" amount | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         // of times a char (possible delim) will occur on each line... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 | 2 |  |         $modes = collect($delimiters) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 | 2 |  |             ->flip() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |             ->map(function($freq, $delim) use ($frequencies) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 | 2 |  |                 return $frequencies->getColumn($delim)->mode(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 | 2 |  |             }) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 | 2 |  |             ->filter(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         /** @var Collection $consistencies */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         $consistencies = $frequencies->recollect(function(Collection $accum, $freq, $line_no) use ($modes) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |                 $modes->each(function($expected, $char) use ($accum, $freq) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |                     /** @var Collection $freq */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 | 2 |  |                     if (collect($freq)->get($char) == $expected) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 | 2 |  |                         $matches = $accum->get($char, 0); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 | 2 |  |                         $accum->set($char, ++$matches); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 | 2 |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 | 2 |  |                 }); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 | 2 |  |                 return $accum; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 | 2 |  |             }) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 | 2 |  |             ->sort() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 | 2 |  |             ->reverse(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 | 2 |  |         $winners = $consistencies->filter(function($freq) use ($consistencies) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 | 2 |  |                 return $freq === $consistencies->max(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 | 2 |  |             }) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 | 2 |  |             ->keys(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |         // return winners in order of preference | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 | 2 |  |         return collect($delimiters) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 | 2 |  |             ->intersect($winners) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 | 2 |  |             ->values() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 | 2 |  |             ->toArray(); | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 86 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 87 |  |  | } | 
            
                        
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.