@@ -513,12 +513,12 @@ discard block |
||
513 | 513 | return $this->guessDelimByDistribution($decision, $eol); |
514 | 514 | } catch (TasterException $e) { |
515 | 515 | // if somehow we STILL can't come to a consensus, then fall back to a |
516 | - // "preferred delimiters" list... |
|
517 | - foreach ($this->delims as $key => $chr) { |
|
518 | - if (collect($decision)->contains($chr)) { |
|
519 | - return $chr; |
|
520 | - } |
|
521 | - } |
|
516 | + // "preferred delimiters" list... |
|
517 | + foreach ($this->delims as $key => $chr) { |
|
518 | + if (collect($decision)->contains($chr)) { |
|
519 | + return $chr; |
|
520 | + } |
|
521 | + } |
|
522 | 522 | } |
523 | 523 | } |
524 | 524 | |
@@ -588,8 +588,8 @@ discard block |
||
588 | 588 | })->map(function ($dists) { |
589 | 589 | return $dists->average(); |
590 | 590 | })->sort() |
591 | - ->reverse() |
|
592 | - ->getKeyAtPosition(0)]; |
|
591 | + ->reverse() |
|
592 | + ->getKeyAtPosition(0)]; |
|
593 | 593 | } catch (Exception $e) { |
594 | 594 | throw new TasterException('delimiter cannot be determined by distribution', TasterException::ERR_DELIMITER); |
595 | 595 | } |
@@ -233,11 +233,11 @@ discard block |
||
233 | 233 | $types = new TabularCollection(); |
234 | 234 | |
235 | 235 | // callback to build the aforementioned collection |
236 | - $buildTypes = function ($line, $line_no) use ($types, $delim, $eol) { |
|
236 | + $buildTypes = function($line, $line_no) use ($types, $delim, $eol) { |
|
237 | 237 | |
238 | 238 | if ($line_no > 2) return; |
239 | 239 | $line = str_replace(self::PLACEHOLDER_NEWLINE, $eol, $line); |
240 | - $getType = function ($field, $colpos) use ($types, $line, $line_no, $delim) { |
|
240 | + $getType = function($field, $colpos) use ($types, $line, $line_no, $delim) { |
|
241 | 241 | $field = str_replace(self::PLACEHOLDER_DELIM, $delim, $field); |
242 | 242 | $fieldMeta = [ |
243 | 243 | "value" => $field, |
@@ -265,8 +265,8 @@ discard block |
||
265 | 265 | |
266 | 266 | $hasHeader = new NumericCollection(); |
267 | 267 | $possibleHeader = collect($types->shift()); |
268 | - $types->walk(function (AbstractCollection $row) use ($hasHeader, $possibleHeader) { |
|
269 | - $row->walk(function (AbstractCollection $fieldMeta, $col_no) use ($hasHeader, $possibleHeader) { |
|
268 | + $types->walk(function(AbstractCollection $row) use ($hasHeader, $possibleHeader) { |
|
269 | + $row->walk(function(AbstractCollection $fieldMeta, $col_no) use ($hasHeader, $possibleHeader) { |
|
270 | 270 | try { |
271 | 271 | $col = collect($possibleHeader->get($col_no, null, true)); |
272 | 272 | if ($fieldMeta->get('type') == self::TYPE_STRING) { |
@@ -330,9 +330,9 @@ discard block |
||
330 | 330 | { |
331 | 331 | $str = $this->removeQuotedStrings($this->sample); |
332 | 332 | $eols = [ |
333 | - self::EOL_WINDOWS => "\r\n", // 0x0D - 0x0A - Windows, DOS OS/2 |
|
334 | - self::EOL_UNIX => "\n", // 0x0A - - Unix, OSX |
|
335 | - self::EOL_TRS80 => "\r", // 0x0D - - Apple ][, TRS80 |
|
333 | + self::EOL_WINDOWS => "\r\n", // 0x0D - 0x0A - Windows, DOS OS/2 |
|
334 | + self::EOL_UNIX => "\n", // 0x0A - - Unix, OSX |
|
335 | + self::EOL_TRS80 => "\r", // 0x0D - - Apple ][, TRS80 |
|
336 | 336 | ]; |
337 | 337 | |
338 | 338 | $curCount = 0; |
@@ -375,12 +375,12 @@ discard block |
||
375 | 375 | $patterns = []; |
376 | 376 | // delim can be anything but line breaks, quotes, alphanumeric, underscore, backslash, or any type of spaces |
377 | 377 | $antidelims = implode(["\r", "\n", "\w", preg_quote('"', '/'), preg_quote("'", '/'), preg_quote(chr(self::SPACE), '/')]); |
378 | - $delim = '(?P<delim>[^' . $antidelims . '])'; |
|
378 | + $delim = '(?P<delim>[^'.$antidelims.'])'; |
|
379 | 379 | $quote = '(?P<quoteChar>"|\'|`)'; // @todo I think MS Excel uses some strange encoding for fancy open/close quotes |
380 | - $patterns[] = '/' . $delim . ' ?' . $quote . '.*?\2\1/ms'; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace |
|
381 | - $patterns[] = '/(?:^|\n)' . $quote . '.*?\1' . $delim . ' ?/ms'; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes |
|
382 | - $patterns[] = '/' . $delim . ' ?' . $quote . '.*?\2(?:^|\n)/ms'; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line |
|
383 | - $patterns[] = '/(?:^|\n)' . $quote . '.*?\2(?:$|\n)/ms'; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line |
|
380 | + $patterns[] = '/'.$delim.' ?'.$quote.'.*?\2\1/ms'; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace |
|
381 | + $patterns[] = '/(?:^|\n)'.$quote.'.*?\1'.$delim.' ?/ms'; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes |
|
382 | + $patterns[] = '/'.$delim.' ?'.$quote.'.*?\2(?:^|\n)/ms'; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line |
|
383 | + $patterns[] = '/(?:^|\n)'.$quote.'.*?\2(?:$|\n)/ms'; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line |
|
384 | 384 | foreach ($patterns as $pattern) { |
385 | 385 | // @todo I had to add the error suppression char here because it was |
386 | 386 | // causing undefined offset errors with certain data sets. strange... |
@@ -389,7 +389,7 @@ discard block |
||
389 | 389 | } |
390 | 390 | } |
391 | 391 | if ($matches) { |
392 | - $qcad = array_intersect_key($matches, array_flip(['quoteChar','delim'])); |
|
392 | + $qcad = array_intersect_key($matches, array_flip(['quoteChar', 'delim'])); |
|
393 | 393 | if (!empty($matches['quoteChar']) && !empty($matches['delim'])) { |
394 | 394 | try { |
395 | 395 | return [ |
@@ -431,15 +431,15 @@ discard block |
||
431 | 431 | // each frequency (in 10 lines, "tab" occurred 5 times on 7 of those |
432 | 432 | // lines, 6 times on 2 lines, and 7 times on 1 line) |
433 | 433 | collect(explode($eol, $this->removeQuotedStrings($this->sample))) |
434 | - ->walk(function ($line, $line_no) use ($frequencies) { |
|
434 | + ->walk(function($line, $line_no) use ($frequencies) { |
|
435 | 435 | collect(str_split($line)) |
436 | - ->filter(function ($c) { |
|
436 | + ->filter(function($c) { |
|
437 | 437 | return collect($this->delims)->contains($c); |
438 | 438 | }) |
439 | 439 | ->frequency() |
440 | 440 | ->sort() |
441 | 441 | ->reverse() |
442 | - ->walk(function ($count, $char) use ($frequencies, $line_no) { |
|
442 | + ->walk(function($count, $char) use ($frequencies, $line_no) { |
|
443 | 443 | try { |
444 | 444 | $char_counts = $frequencies->get($char, null, true); |
445 | 445 | } catch (OutOfBoundsException $e) { |
@@ -452,8 +452,8 @@ discard block |
||
452 | 452 | // the above only finds frequencies for characters if they exist in |
453 | 453 | // a given line. This will go back and fill in zeroes where a char |
454 | 454 | // didn't occur at all in a given line (needed to determine mode) |
455 | - ->walk(function ($line, $line_no) use ($frequencies) { |
|
456 | - $frequencies->walk(function ($counts, $char) use ($line_no, $frequencies) { |
|
455 | + ->walk(function($line, $line_no) use ($frequencies) { |
|
456 | + $frequencies->walk(function($counts, $char) use ($line_no, $frequencies) { |
|
457 | 457 | try { |
458 | 458 | $char_counts = $frequencies->get($char, null, true); |
459 | 459 | } catch (OutOfBoundsException $e) { |
@@ -472,8 +472,8 @@ discard block |
||
472 | 472 | foreach ($frequencies as $char => $freq) { |
473 | 473 | $modes->set($char, (new NumericCollection($freq))->mode()); |
474 | 474 | } |
475 | - $frequencies->walk(function ($f, $chr) use ($modes, $consistencies) { |
|
476 | - collect($f)->walk(function ($num) use ($modes, $chr, $consistencies) { |
|
475 | + $frequencies->walk(function($f, $chr) use ($modes, $consistencies) { |
|
476 | + collect($f)->walk(function($num) use ($modes, $chr, $consistencies) { |
|
477 | 477 | if ($expected = $modes->get($chr)) { |
478 | 478 | if ($num == $expected) { |
479 | 479 | // met the goal, yay! |
@@ -564,19 +564,19 @@ discard block |
||
564 | 564 | // @todo Write a method that does this... |
565 | 565 | $lines = collect(explode($eol, $this->removeQuotedStrings($this->sample))); |
566 | 566 | |
567 | - return $delims[collect($delims)->map(function ($delim) use (&$distrib, $lines) { |
|
567 | + return $delims[collect($delims)->map(function($delim) use (&$distrib, $lines) { |
|
568 | 568 | $linedist = collect(); |
569 | - $lines->walk(function ($line, $line_no) use (&$linedist, $delim) { |
|
569 | + $lines->walk(function($line, $line_no) use (&$linedist, $delim) { |
|
570 | 570 | if (!strlen($line)) { |
571 | 571 | return; |
572 | 572 | } |
573 | 573 | $sectstot = 10; |
574 | 574 | $sectlen = (int) (strlen($line) / $sectstot); |
575 | 575 | $sections = collect(str_split($line, $sectlen)) |
576 | - ->map(function ($section) use ($delim) { |
|
576 | + ->map(function($section) use ($delim) { |
|
577 | 577 | return substr_count($section, $delim); |
578 | 578 | }) |
579 | - ->filter(function ($count) { |
|
579 | + ->filter(function($count) { |
|
580 | 580 | return (bool) $count; |
581 | 581 | }); |
582 | 582 | if (is_numeric($count = $sections->count())) { |
@@ -585,7 +585,7 @@ discard block |
||
585 | 585 | }); |
586 | 586 | |
587 | 587 | return $linedist; |
588 | - })->map(function ($dists) { |
|
588 | + })->map(function($dists) { |
|
589 | 589 | return $dists->average(); |
590 | 590 | })->sort() |
591 | 591 | ->reverse() |
@@ -635,9 +635,9 @@ discard block |
||
635 | 635 | |
636 | 636 | // walk through each line from the data sample to determine which fields |
637 | 637 | // are quoted and which aren't |
638 | - $qsFunc = function ($line) use (&$quoting_styles, &$freq, $eol, $delim) { |
|
638 | + $qsFunc = function($line) use (&$quoting_styles, &$freq, $eol, $delim) { |
|
639 | 639 | $line = str_replace(self::PLACEHOLDER_NEWLINE, $eol, $line); |
640 | - $qnqaFunc = function ($field) use (&$quoting_styles, &$freq, $delim) { |
|
640 | + $qnqaFunc = function($field) use (&$quoting_styles, &$freq, $delim) { |
|
641 | 641 | $field = str_replace(self::PLACEHOLDER_DELIM, $delim, $field); |
642 | 642 | if ($this->isQuoted($field)) { |
643 | 643 | $field = $this->unQuote($field); |
@@ -658,7 +658,7 @@ discard block |
||
658 | 658 | $lines->walk($qsFunc->bindTo($this)); |
659 | 659 | |
660 | 660 | $types = $freq->get('quoted')->unique(); |
661 | - $quoting_styles = $quoting_styles->filter(function ($val) { |
|
661 | + $quoting_styles = $quoting_styles->filter(function($val) { |
|
662 | 662 | return (bool) $val; |
663 | 663 | }); |
664 | 664 | // if quoting_styles still has QUOTE_ALL or QUOTE_NONE, then return |
@@ -678,7 +678,7 @@ discard block |
||
678 | 678 | if ($types->contains(self::DATA_NONNUMERIC)) { |
679 | 679 | // allow for a SMALL amount of error here |
680 | 680 | $counts = collect([self::DATA_SPECIAL => 0, self::DATA_NONNUMERIC => 0]); |
681 | - $freq->get('quoted')->walk(function ($type) use (&$counts) { |
|
681 | + $freq->get('quoted')->walk(function($type) use (&$counts) { |
|
682 | 682 | $counts->increment($type); |
683 | 683 | }); |
684 | 684 | // @todo is all this even necessary? seems unnecessary to me... |
@@ -771,7 +771,7 @@ discard block |
||
771 | 771 | */ |
772 | 772 | protected function replaceQuotedSpecialChars($data, $delim) |
773 | 773 | { |
774 | - return preg_replace_callback('/([\'"])(.*)\1/imsU', function ($matches) use ($delim) { |
|
774 | + return preg_replace_callback('/([\'"])(.*)\1/imsU', function($matches) use ($delim) { |
|
775 | 775 | $ret = preg_replace("/([\r\n])/", self::PLACEHOLDER_NEWLINE, $matches[0]); |
776 | 776 | $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret); |
777 | 777 | |
@@ -811,7 +811,7 @@ discard block |
||
811 | 811 | $day = '[0-3]?[0-9]'; |
812 | 812 | $sep = '[\/\.\-]?'; |
813 | 813 | $time = '([0-2]?[0-9](:[0-5][0-9]){1,2}(am|pm)?|[01]?[0-9](am|pm))'; |
814 | - $date = '(' . $month . $sep . $day . $sep . $year . '|' . $day . $sep . $month . $sep . $year . '|' . $year . $sep . $month . $sep . $day . ')'; |
|
814 | + $date = '('.$month.$sep.$day.$sep.$year.'|'.$day.$sep.$month.$sep.$year.'|'.$year.$sep.$month.$sep.$day.')'; |
|
815 | 815 | $dt = new DateTime($data); |
816 | 816 | $dt->setTime(0, 0, 0); |
817 | 817 | $now = new DateTime(); |
@@ -64,7 +64,7 @@ discard block |
||
64 | 64 | return static::factory($values); |
65 | 65 | } |
66 | 66 | if ($throw) { |
67 | - throw new OutOfBoundsException(__CLASS__ . " could not find column: " . $column); |
|
67 | + throw new OutOfBoundsException(__CLASS__." could not find column: ".$column); |
|
68 | 68 | } |
69 | 69 | return false; |
70 | 70 | } |
@@ -169,6 +169,6 @@ discard block |
||
169 | 169 | return call_user_func_array([$column, $method], $args); |
170 | 170 | } |
171 | 171 | } |
172 | - throw new BadMethodCallException("Method does not exist: " . __CLASS__ . "::{$method}()"); |
|
172 | + throw new BadMethodCallException("Method does not exist: ".__CLASS__."::{$method}()"); |
|
173 | 173 | } |
174 | 174 | } |
175 | 175 | \ No newline at end of file |