Completed
Pull Request — master (#183)
by Luke
06:36 queued 03:52
created

AbstractSniffer::sniff()

Size

Total Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
nc 1
dl 0
loc 1
ccs 0
cts 0
cp 0
c 0
b 0
f 0
1
<?php
2
/**
3
 * CSVelte: Slender, elegant CSV for PHP
4
 *
5
 * Inspired by Python's CSV module and Frictionless Data and the W3C's CSV
6
 * standardization efforts, CSVelte was written in an effort to take all the
7
 * suck out of working with CSV.
8
 *
9
 * @copyright Copyright (c) 2018 Luke Visinoni
10
 * @author    Luke Visinoni <[email protected]>
11
 * @license   See LICENSE file (MIT license)
12
 */
13
namespace CSVelte\Sniffer;
14
15
abstract class AbstractSniffer
16
{
17
    /**
18
     * Placeholder strings -- hold the place of newlines and delimiters contained
19
     * within quoted text so that the explode method doesn't split incorrectly.
20
     */
21
    const PLACEHOLDER_NEWLINE = '[__NEWLINE__]';
22
    const PLACEHOLDER_DELIM   = '[__DELIMIT__]';
23
24
    protected $options = [];
25
26 5
    public function __construct(array $options = [])
27
    {
28 5
        $this->setOptions($options);
29 5
    }
30
31 5
    protected function setOptions(array $options)
32
    {
33 5
        $this->options = array_merge($this->options, $options);
34 5
        return $this;
35
    }
36
37
    protected function setOption($option, $value)
38
    {
39
        if (array_key_exists($option, $this->options)) {
40
            $this->options[$option] = $value;
41
        }
42
        return $this;
43
    }
44
45 4
    protected function getOption($option)
46
    {
47 4
        if (array_key_exists($option, $this->options)) {
48 4
            return $this->options[$option];
49
        };
50
    }
51
52
    /**
53
     * Replace all instances of newlines and whatever character you specify (as
54
     * the delimiter) that are contained within quoted text. The replacements are
55
     * simply a special placeholder string. This is done so that I can use the
56
     * very unsmart "explode" function and not have to worry about it exploding
57
     * on delimiters or newlines within quotes. Once I have exploded, I typically
58
     * sub back in the real characters before doing anything else. Although
59
     * currently there is no dedicated method for doing so I just use str_replace.
60
     *
61
     * @param string $data  The string to do the replacements on
62
     * @param string $delim The delimiter character to replace
63
     *
64
     * @return string The data with replacements performed
65
     */
66 1
    protected function replaceQuotedSpecialChars($data, $delim = null, $eol = null)
67
    {
68 1
        if (is_null($eol)) {
69 1
            $eol = "\r\n|\r|\n";
70 1
        }
71 1
        return preg_replace_callback('/([\'"])(.*)\1/imsU', function ($matches) use ($delim, $eol) {
72 1
            $ret = preg_replace("/({$eol})/", self::PLACEHOLDER_NEWLINE, $matches[0]);
73 1
            if (!is_null($delim)) {
74 1
                $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret);
75 1
            }
76 1
            return $ret;
77 1
        }, $data);
78
    }
79
80
    /**
81
     * Replaces all quoted columns with a blank string. I was using this method
82
     * to prevent explode() from incorrectly splitting at delimiters and newlines
83
     * within quotes when parsing a file. But this was before I wrote the
84
     * replaceQuotedSpecialChars method which (at least to me) makes more sense.
85
     *
86
     * @param string $data The string to replace quoted strings within
87
     *
88
     * @return string The input string with quoted strings removed
89
     */
90 3
    protected function removeQuotedStrings($data)
91
    {
92 3
        return preg_replace($pattern = '/(["\'])(?:(?=(\\\\?))\2.)*?\1/sm', $replace = '', $data);
93
    }
94
95
    /**
96
     * Analyze data (sniff)
97
     *
98
     * @param string $data The data to analyze (sniff)
99
     *
100
     * @return string|string[]
101
     */
102
    abstract public function sniff($data);
103
}