@@ -513,12 +513,12 @@ discard block |
||
513 | 513 | return $this->guessDelimByDistribution($decision, $eol); |
514 | 514 | } catch (TasterException $e) { |
515 | 515 | // if somehow we STILL can't come to a consensus, then fall back to a |
516 | - // "preferred delimiters" list... |
|
517 | - foreach ($this->delims as $key => $chr) { |
|
518 | - if (collect($decision)->contains($chr)) { |
|
519 | - return $chr; |
|
520 | - } |
|
521 | - } |
|
516 | + // "preferred delimiters" list... |
|
517 | + foreach ($this->delims as $key => $chr) { |
|
518 | + if (collect($decision)->contains($chr)) { |
|
519 | + return $chr; |
|
520 | + } |
|
521 | + } |
|
522 | 522 | } |
523 | 523 | } |
524 | 524 | |
@@ -588,8 +588,8 @@ discard block |
||
588 | 588 | })->map(function ($dists) { |
589 | 589 | return $dists->average(); |
590 | 590 | })->sort() |
591 | - ->reverse() |
|
592 | - ->getKeyAtPosition(0)]; |
|
591 | + ->reverse() |
|
592 | + ->getKeyAtPosition(0)]; |
|
593 | 593 | } catch (Exception $e) { |
594 | 594 | throw new TasterException('delimiter cannot be determined by distribution', TasterException::ERR_DELIMITER); |
595 | 595 | } |
@@ -15,12 +15,10 @@ |
||
15 | 15 | |
16 | 16 | use CSVelte\Collection\AbstractCollection; |
17 | 17 | use CSVelte\Collection\CharCollection; |
18 | -use CSVelte\Collection\Collection; |
|
19 | 18 | use CSVelte\Collection\NumericCollection; |
20 | 19 | use CSVelte\Collection\TabularCollection; |
21 | 20 | use CSVelte\Contract\Streamable; |
22 | 21 | use CSVelte\Exception\TasterException; |
23 | - |
|
24 | 22 | use DateTime; |
25 | 23 | use Exception; |
26 | 24 | use OutOfBoundsException; |
@@ -232,12 +232,12 @@ discard block |
||
232 | 232 | $types = new TabularCollection(); |
233 | 233 | |
234 | 234 | // callback to build the aforementioned collection |
235 | - $buildTypes = function ($line, $line_no) use ($types, $delim, $eol) { |
|
235 | + $buildTypes = function($line, $line_no) use ($types, $delim, $eol) { |
|
236 | 236 | if ($line_no > 2) { |
237 | 237 | return; |
238 | 238 | } |
239 | 239 | $line = str_replace(self::PLACEHOLDER_NEWLINE, $eol, $line); |
240 | - $getType = function ($field, $colpos) use ($types, $line, $line_no, $delim) { |
|
240 | + $getType = function($field, $colpos) use ($types, $line, $line_no, $delim) { |
|
241 | 241 | $field = str_replace(self::PLACEHOLDER_DELIM, $delim, $field); |
242 | 242 | $fieldMeta = [ |
243 | 243 | 'value' => $field, |
@@ -264,8 +264,8 @@ discard block |
||
264 | 264 | |
265 | 265 | $hasHeader = new NumericCollection(); |
266 | 266 | $possibleHeader = collect($types->shift()); |
267 | - $types->walk(function (AbstractCollection $row) use ($hasHeader, $possibleHeader) { |
|
268 | - $row->walk(function (AbstractCollection $fieldMeta, $col_no) use ($hasHeader, $possibleHeader) { |
|
267 | + $types->walk(function(AbstractCollection $row) use ($hasHeader, $possibleHeader) { |
|
268 | + $row->walk(function(AbstractCollection $fieldMeta, $col_no) use ($hasHeader, $possibleHeader) { |
|
269 | 269 | try { |
270 | 270 | $col = collect($possibleHeader->get($col_no, null, true)); |
271 | 271 | if ($fieldMeta->get('type') == self::TYPE_STRING) { |
@@ -329,9 +329,9 @@ discard block |
||
329 | 329 | { |
330 | 330 | $str = $this->removeQuotedStrings($this->sample); |
331 | 331 | $eols = [ |
332 | - self::EOL_WINDOWS => "\r\n", // 0x0D - 0x0A - Windows, DOS OS/2 |
|
333 | - self::EOL_UNIX => "\n", // 0x0A - - Unix, OSX |
|
334 | - self::EOL_TRS80 => "\r", // 0x0D - - Apple ][, TRS80 |
|
332 | + self::EOL_WINDOWS => "\r\n", // 0x0D - 0x0A - Windows, DOS OS/2 |
|
333 | + self::EOL_UNIX => "\n", // 0x0A - - Unix, OSX |
|
334 | + self::EOL_TRS80 => "\r", // 0x0D - - Apple ][, TRS80 |
|
335 | 335 | ]; |
336 | 336 | |
337 | 337 | $curCount = 0; |
@@ -374,12 +374,12 @@ discard block |
||
374 | 374 | $patterns = []; |
375 | 375 | // delim can be anything but line breaks, quotes, alphanumeric, underscore, backslash, or any type of spaces |
376 | 376 | $antidelims = implode(["\r", "\n", "\w", preg_quote('"', '/'), preg_quote("'", '/'), preg_quote(chr(self::SPACE), '/')]); |
377 | - $delim = '(?P<delim>[^' . $antidelims . '])'; |
|
377 | + $delim = '(?P<delim>[^'.$antidelims.'])'; |
|
378 | 378 | $quote = '(?P<quoteChar>"|\'|`)'; // @todo I think MS Excel uses some strange encoding for fancy open/close quotes |
379 | - $patterns[] = '/' . $delim . ' ?' . $quote . '.*?\2\1/ms'; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace |
|
380 | - $patterns[] = '/(?:^|\n)' . $quote . '.*?\1' . $delim . ' ?/ms'; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes |
|
381 | - $patterns[] = '/' . $delim . ' ?' . $quote . '.*?\2(?:^|\n)/ms'; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line |
|
382 | - $patterns[] = '/(?:^|\n)' . $quote . '.*?\2(?:$|\n)/ms'; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line |
|
379 | + $patterns[] = '/'.$delim.' ?'.$quote.'.*?\2\1/ms'; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace |
|
380 | + $patterns[] = '/(?:^|\n)'.$quote.'.*?\1'.$delim.' ?/ms'; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes |
|
381 | + $patterns[] = '/'.$delim.' ?'.$quote.'.*?\2(?:^|\n)/ms'; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line |
|
382 | + $patterns[] = '/(?:^|\n)'.$quote.'.*?\2(?:$|\n)/ms'; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line |
|
383 | 383 | foreach ($patterns as $pattern) { |
384 | 384 | // @todo I had to add the error suppression char here because it was |
385 | 385 | // causing undefined offset errors with certain data sets. strange... |
@@ -430,15 +430,15 @@ discard block |
||
430 | 430 | // each frequency (in 10 lines, "tab" occurred 5 times on 7 of those |
431 | 431 | // lines, 6 times on 2 lines, and 7 times on 1 line) |
432 | 432 | collect(explode($eol, $this->removeQuotedStrings($this->sample))) |
433 | - ->walk(function ($line, $line_no) use ($frequencies) { |
|
433 | + ->walk(function($line, $line_no) use ($frequencies) { |
|
434 | 434 | collect(str_split($line)) |
435 | - ->filter(function ($c) { |
|
435 | + ->filter(function($c) { |
|
436 | 436 | return collect($this->delims)->contains($c); |
437 | 437 | }) |
438 | 438 | ->frequency() |
439 | 439 | ->sort() |
440 | 440 | ->reverse() |
441 | - ->walk(function ($count, $char) use ($frequencies, $line_no) { |
|
441 | + ->walk(function($count, $char) use ($frequencies, $line_no) { |
|
442 | 442 | try { |
443 | 443 | $char_counts = $frequencies->get($char, null, true); |
444 | 444 | } catch (OutOfBoundsException $e) { |
@@ -451,8 +451,8 @@ discard block |
||
451 | 451 | // the above only finds frequencies for characters if they exist in |
452 | 452 | // a given line. This will go back and fill in zeroes where a char |
453 | 453 | // didn't occur at all in a given line (needed to determine mode) |
454 | - ->walk(function ($line, $line_no) use ($frequencies) { |
|
455 | - $frequencies->walk(function ($counts, $char) use ($line_no, $frequencies) { |
|
454 | + ->walk(function($line, $line_no) use ($frequencies) { |
|
455 | + $frequencies->walk(function($counts, $char) use ($line_no, $frequencies) { |
|
456 | 456 | try { |
457 | 457 | $char_counts = $frequencies->get($char, null, true); |
458 | 458 | } catch (OutOfBoundsException $e) { |
@@ -471,8 +471,8 @@ discard block |
||
471 | 471 | foreach ($frequencies as $char => $freq) { |
472 | 472 | $modes->set($char, (new NumericCollection($freq))->mode()); |
473 | 473 | } |
474 | - $frequencies->walk(function ($f, $chr) use ($modes, $consistencies) { |
|
475 | - collect($f)->walk(function ($num) use ($modes, $chr, $consistencies) { |
|
474 | + $frequencies->walk(function($f, $chr) use ($modes, $consistencies) { |
|
475 | + collect($f)->walk(function($num) use ($modes, $chr, $consistencies) { |
|
476 | 476 | if ($expected = $modes->get($chr)) { |
477 | 477 | if ($num == $expected) { |
478 | 478 | // met the goal, yay! |
@@ -563,19 +563,19 @@ discard block |
||
563 | 563 | // @todo Write a method that does this... |
564 | 564 | $lines = collect(explode($eol, $this->removeQuotedStrings($this->sample))); |
565 | 565 | |
566 | - return $delims[collect($delims)->map(function ($delim) use (&$distrib, $lines) { |
|
566 | + return $delims[collect($delims)->map(function($delim) use (&$distrib, $lines) { |
|
567 | 567 | $linedist = collect(); |
568 | - $lines->walk(function ($line, $line_no) use (&$linedist, $delim) { |
|
568 | + $lines->walk(function($line, $line_no) use (&$linedist, $delim) { |
|
569 | 569 | if (!strlen($line)) { |
570 | 570 | return; |
571 | 571 | } |
572 | 572 | $sectstot = 10; |
573 | 573 | $sectlen = (int) (strlen($line) / $sectstot); |
574 | 574 | $sections = collect(str_split($line, $sectlen)) |
575 | - ->map(function ($section) use ($delim) { |
|
575 | + ->map(function($section) use ($delim) { |
|
576 | 576 | return substr_count($section, $delim); |
577 | 577 | }) |
578 | - ->filter(function ($count) { |
|
578 | + ->filter(function($count) { |
|
579 | 579 | return (bool) $count; |
580 | 580 | }); |
581 | 581 | if (is_numeric($count = $sections->count())) { |
@@ -584,7 +584,7 @@ discard block |
||
584 | 584 | }); |
585 | 585 | |
586 | 586 | return $linedist; |
587 | - })->map(function ($dists) { |
|
587 | + })->map(function($dists) { |
|
588 | 588 | return $dists->average(); |
589 | 589 | })->sort() |
590 | 590 | ->reverse() |
@@ -634,9 +634,9 @@ discard block |
||
634 | 634 | |
635 | 635 | // walk through each line from the data sample to determine which fields |
636 | 636 | // are quoted and which aren't |
637 | - $qsFunc = function ($line) use (&$quoting_styles, &$freq, $eol, $delim) { |
|
637 | + $qsFunc = function($line) use (&$quoting_styles, &$freq, $eol, $delim) { |
|
638 | 638 | $line = str_replace(self::PLACEHOLDER_NEWLINE, $eol, $line); |
639 | - $qnqaFunc = function ($field) use (&$quoting_styles, &$freq, $delim) { |
|
639 | + $qnqaFunc = function($field) use (&$quoting_styles, &$freq, $delim) { |
|
640 | 640 | $field = str_replace(self::PLACEHOLDER_DELIM, $delim, $field); |
641 | 641 | if ($this->isQuoted($field)) { |
642 | 642 | $field = $this->unQuote($field); |
@@ -657,7 +657,7 @@ discard block |
||
657 | 657 | $lines->walk($qsFunc->bindTo($this)); |
658 | 658 | |
659 | 659 | $types = $freq->get('quoted')->unique(); |
660 | - $quoting_styles = $quoting_styles->filter(function ($val) { |
|
660 | + $quoting_styles = $quoting_styles->filter(function($val) { |
|
661 | 661 | return (bool) $val; |
662 | 662 | }); |
663 | 663 | // if quoting_styles still has QUOTE_ALL or QUOTE_NONE, then return |
@@ -677,7 +677,7 @@ discard block |
||
677 | 677 | if ($types->contains(self::DATA_NONNUMERIC)) { |
678 | 678 | // allow for a SMALL amount of error here |
679 | 679 | $counts = collect([self::DATA_SPECIAL => 0, self::DATA_NONNUMERIC => 0]); |
680 | - $freq->get('quoted')->walk(function ($type) use (&$counts) { |
|
680 | + $freq->get('quoted')->walk(function($type) use (&$counts) { |
|
681 | 681 | $counts->increment($type); |
682 | 682 | }); |
683 | 683 | // @todo is all this even necessary? seems unnecessary to me... |
@@ -770,7 +770,7 @@ discard block |
||
770 | 770 | */ |
771 | 771 | protected function replaceQuotedSpecialChars($data, $delim) |
772 | 772 | { |
773 | - return preg_replace_callback('/([\'"])(.*)\1/imsU', function ($matches) use ($delim) { |
|
773 | + return preg_replace_callback('/([\'"])(.*)\1/imsU', function($matches) use ($delim) { |
|
774 | 774 | $ret = preg_replace("/([\r\n])/", self::PLACEHOLDER_NEWLINE, $matches[0]); |
775 | 775 | $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret); |
776 | 776 | |
@@ -810,7 +810,7 @@ discard block |
||
810 | 810 | $day = '[0-3]?[0-9]'; |
811 | 811 | $sep = '[\/\.\-]?'; |
812 | 812 | $time = '([0-2]?[0-9](:[0-5][0-9]){1,2}(am|pm)?|[01]?[0-9](am|pm))'; |
813 | - $date = '(' . $month . $sep . $day . $sep . $year . '|' . $day . $sep . $month . $sep . $year . '|' . $year . $sep . $month . $sep . $day . ')'; |
|
813 | + $date = '('.$month.$sep.$day.$sep.$year.'|'.$day.$sep.$month.$sep.$year.'|'.$year.$sep.$month.$sep.$day.')'; |
|
814 | 814 | $dt = new DateTime($data); |
815 | 815 | $dt->setTime(0, 0, 0); |
816 | 816 | $now = new DateTime(); |
@@ -282,7 +282,7 @@ discard block |
||
282 | 282 | * also optionally throw an OutOfBoundsException if no value is found. |
283 | 283 | * |
284 | 284 | * @param mixed $index The index of the data you want to get |
285 | - * @param mixed $default The default value to return if none available |
|
285 | + * @param null|integer $default The default value to return if none available |
|
286 | 286 | * @param bool $throw True if you want an exception to be thrown if no data found at $index |
287 | 287 | * @throws OutOfBoundsException If $throw is true and $index isn't found |
288 | 288 | * @return mixed The data found at $index or failing that, the $default |
@@ -547,7 +547,7 @@ discard block |
||
547 | 547 | * Returns a new collection with $items added. |
548 | 548 | * |
549 | 549 | * @param array $items Any number of arguments will be pushed onto the |
550 | - * @return mixed The first item in this collection |
|
550 | + * @return AbstractCollection The first item in this collection |
|
551 | 551 | */ |
552 | 552 | public function push(...$items) |
553 | 553 | { |
@@ -560,7 +560,7 @@ discard block |
||
560 | 560 | * |
561 | 561 | * Returns a new collection with $items added. |
562 | 562 | * |
563 | - * @return mixed The first item in this collection |
|
563 | + * @return AbstractCollection The first item in this collection |
|
564 | 564 | */ |
565 | 565 | public function unshift(...$items) |
566 | 566 | { |
@@ -616,7 +616,7 @@ discard block |
||
616 | 616 | /** |
617 | 617 | * Iterate over each item that matches criteria in callback |
618 | 618 | * |
619 | - * @param Closure|callable $callback A callback to use |
|
619 | + * @param Closure $callback A callback to use |
|
620 | 620 | * @param object $bindTo The object to bind to |
621 | 621 | * @return AbstractCollection |
622 | 622 | */ |
@@ -724,7 +724,7 @@ discard block |
||
724 | 724 | /** |
725 | 725 | * Returns collection in reverse order. |
726 | 726 | * |
727 | - * @param null $preserveKeys True if you want to preserve collection's keys |
|
727 | + * @param boolean $preserveKeys True if you want to preserve collection's keys |
|
728 | 728 | * @return AbstractCollection This collection in reverse order. |
729 | 729 | */ |
730 | 730 | public function reverse($preserveKeys = null) |
@@ -297,7 +297,7 @@ discard block |
||
297 | 297 | return $this->data[$index]; |
298 | 298 | } |
299 | 299 | if ($throw) { |
300 | - throw new OutOfBoundsException(__CLASS__ . ' could not find value at index ' . $index); |
|
300 | + throw new OutOfBoundsException(__CLASS__.' could not find value at index '.$index); |
|
301 | 301 | } |
302 | 302 | |
303 | 303 | return $default; |
@@ -338,7 +338,7 @@ discard block |
||
338 | 338 | unset($this->data[$index]); |
339 | 339 | } else { |
340 | 340 | if ($throw) { |
341 | - throw new OutOfBoundsException('No value found at given index: ' . $index); |
|
341 | + throw new OutOfBoundsException('No value found at given index: '.$index); |
|
342 | 342 | } |
343 | 343 | } |
344 | 344 | |
@@ -493,7 +493,7 @@ discard block |
||
493 | 493 | */ |
494 | 494 | public function contains($value, $index = null) |
495 | 495 | { |
496 | - return (bool) $this->first(function ($val, $key) use ($value, $index) { |
|
496 | + return (bool) $this->first(function($val, $key) use ($value, $index) { |
|
497 | 497 | if (is_callable($value)) { |
498 | 498 | $found = $value($val, $key); |
499 | 499 | } else { |
@@ -525,11 +525,11 @@ discard block |
||
525 | 525 | public function duplicates() |
526 | 526 | { |
527 | 527 | $dups = []; |
528 | - $this->walk(function ($val, $key) use (&$dups) { |
|
528 | + $this->walk(function($val, $key) use (&$dups) { |
|
529 | 529 | $dups[$val][] = $key; |
530 | 530 | }); |
531 | 531 | |
532 | - return static::factory($dups)->filter(function ($val) { |
|
532 | + return static::factory($dups)->filter(function($val) { |
|
533 | 533 | return count($val) > 1; |
534 | 534 | }); |
535 | 535 | } |
@@ -671,7 +671,7 @@ discard block |
||
671 | 671 | public function pairs() |
672 | 672 | { |
673 | 673 | return static::factory(array_map( |
674 | - function ($key, $val) { |
|
674 | + function($key, $val) { |
|
675 | 675 | return [$key, $val]; |
676 | 676 | }, |
677 | 677 | array_keys($this->data), |
@@ -880,7 +880,7 @@ discard block |
||
880 | 880 | } |
881 | 881 | } |
882 | 882 | // if row contains an array it isn't tabular |
883 | - if (array_reduce($row, function ($carry, $item) { |
|
883 | + if (array_reduce($row, function($carry, $item) { |
|
884 | 884 | return is_array($item) && $carry; |
885 | 885 | }, true)) { |
886 | 886 | return false; |
@@ -989,7 +989,7 @@ discard block |
||
989 | 989 | protected function assertCorrectInputDataType($data) |
990 | 990 | { |
991 | 991 | if (!$this->isConsistentDataStructure($data)) { |
992 | - throw new InvalidArgumentException(__CLASS__ . ' expected traversable data, got: ' . gettype($data)); |
|
992 | + throw new InvalidArgumentException(__CLASS__.' expected traversable data, got: '.gettype($data)); |
|
993 | 993 | } |
994 | 994 | } |
995 | 995 |
@@ -40,7 +40,7 @@ discard block |
||
40 | 40 | return call_user_func_array([$column, $method], $args); |
41 | 41 | } |
42 | 42 | } |
43 | - throw new BadMethodCallException('Method does not exist: ' . __CLASS__ . "::{$method}()"); |
|
43 | + throw new BadMethodCallException('Method does not exist: '.__CLASS__."::{$method}()"); |
|
44 | 44 | } |
45 | 45 | |
46 | 46 | /** |
@@ -76,7 +76,7 @@ discard block |
||
76 | 76 | return static::factory($values); |
77 | 77 | } |
78 | 78 | if ($throw) { |
79 | - throw new OutOfBoundsException(__CLASS__ . ' could not find column: ' . $column); |
|
79 | + throw new OutOfBoundsException(__CLASS__.' could not find column: '.$column); |
|
80 | 80 | } |
81 | 81 | |
82 | 82 | return false; |