AbstractIndexer::parseDirectory()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 3
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 5
rs 10
1
<?php declare(strict_types=1);
2
namespace html_go\indexing;
3
4
use InvalidArgumentException;
5
use html_go\exceptions\InternalException;
6
7
abstract class AbstractIndexer
8
{
9
    protected string $parentDir;
10
    protected string $commonDir;
11
    protected string $userDataDir;
12
13
    protected string $pageInxFile;
14
    protected string $catInxFile;
15
    protected string $postInxFile;
16
    protected string $tagInxFile;
17
    protected string $tag2postInxFile;
18
    protected string $cat2postInxFile;
19
    protected string $menuInxFile;
20
21
    public function __construct(string $parentDir) {
22
        if (($path = \realpath($parentDir)) === false) {
23
            throw new InternalException("realpath() function failed on [$parentDir]"); // @codeCoverageIgnore
24
        }
25
        $this->parentDir = $path;
26
27
        $this->commonDir = $path.DS.'content'.DS.'common';
28
        if (\is_dir($this->commonDir) === false) {
29
            throw new InvalidArgumentException("The content/common directory cannot be found [$this->commonDir]");
30
        }
31
32
        $this->userDataDir = $path.DS.'content'.DS.'user-data';
33
        if (\is_dir($this->userDataDir) === false) {
34
            throw new InvalidArgumentException("The content/user-data directory cannot be found [$this->userDataDir]");
35
        }
36
        $indexDir = $path.DS.'cache'.DS.'indexes';
37
        $this->pageInxFile = $indexDir.DS.'page.inx';
38
        $this->catInxFile = $indexDir.DS.'category.inx';
39
        $this->postInxFile = $indexDir.DS.'post.inx';
40
        $this->tagInxFile = $indexDir.DS.'tag.inx';
41
        $this->tag2postInxFile = $indexDir.DS.'tag2post.inx';
42
        $this->cat2postInxFile = $indexDir.DS.'cat2post.inx';
43
        $this->menuInxFile = $indexDir.DS.'menu.inx';
44
    }
45
46
    /**
47
     * Load the given index file.
48
     * @param string $filename
49
     * @throws InternalException
50
     * @throws InvalidArgumentException
51
     * @return array<mixed>
52
     */
53
    protected function loadIndex(string $filename): array {
54
        if (\file_exists($filename) === false) {
55
            throw new InvalidArgumentException("Index file does not exist [$filename]. Call 'redindex()'"); // @codeCoverageIgnore
56
        }
57
        if (($data = \file_get_contents($filename)) === false) {
58
            throw new InternalException("file_get_contents() failed [$filename]"); // @codeCoverageIgnore
59
        }
60
        if (($data = \unserialize($data)) === false) {
61
            throw new InternalException("unserialize() failed [$filename]"); // @codeCoverageIgnore
62
        }
63
        return $data;
64
    }
65
66
    /**
67
     * Recursively scans a folder heirarchy returning the all the files and folders
68
     * in an array.
69
     * @return array<int, string>
70
     * @throws InternalException
71
     */
72
    protected function scanDirectory(string $rootDir): array {
73
        static $files = [];
74
        if (($handle = \opendir($rootDir)) === false) {
75
            throw new InternalException("opendir() failed [$rootDir]"); // @codeCoverageIgnore
76
        }
77
        while (($entry = \readdir($handle)) !== false) {
78
            $path = $rootDir.DS.$entry;
79
            if (\is_dir($path)) {
80
                if ($entry === '.' || $entry === '..') {
81
                    continue;
82
                }
83
                $this->scanDirectory($path);
84
                continue;
85
            }
86
            $files[] = $path;
87
        }
88
        \closedir($handle);
89
        return $files;
90
    }
91
92
    /**
93
     * @return array<int, string>
94
     * @throws InternalException
95
     */
96
    protected function parseDirectory(string $pattern): array {
97
        if (($files = \glob($pattern, GLOB_NOSORT)) === false) {
98
            throw new InternalException("glob() failed [$pattern]"); // @codeCoverageIgnore
99
        }
100
        return $files;
101
    }
102
103
    /**
104
     * Writes data to an index file, creating the file if necessary.
105
     * @param string $filepath
106
     * @param array<mixed> $index
107
     * @throws InternalException
108
     */
109
    protected function writeIndex(string $filepath, array $index): void {
110
        $index = \serialize($index);
111
        if (\file_put_contents($filepath, print_r($index, true)) === false) {
112
            throw new InternalException("file_put_contents() failed [$filepath]"); // @codeCoverageIgnore
113
        }
114
    }
115
116
    /**
117
     * Creates and populates an index Element class.
118
     * @param string $key The index key
119
     * @param string $path The filepath
120
     * @param string $section 'pages', 'posts', 'categories' or 'tags'
121
     * @param string $optional When populating with variable arguments, use the
122
     * following <b>named parameters<b>:
123
     * <ul>
124
     *   <li>type:</li>
125
     *   <li>category:</li>
126
     *   <li>username:</li>
127
     *   <li>date:</li>
128
     *   <li>tags:</li>
129
     * </ul>
130
     * @return \stdClass
131
     */
132
    protected function createElementClass(string $key, string $path, string $section, string ...$optional): \stdClass {
133
        if (\in_array($section, [CATEGORY_SECTION, TAG_SECTION, PAGE_SECTION, POST_SECTION]) === false) {
134
            throw new \InvalidArgumentException("Unknown section [$section]");
135
        }
136
        $obj = new \stdClass();
137
        $obj->key = $key;
138
        $obj->path = $path;
139
        $obj->section = $section;
140
        $obj->type = $this->checkSetOrDefault($optional, 'type', EMPTY_VALUE);
141
        $obj->category = $this->checkSetOrDefault($optional, 'category', EMPTY_VALUE);
142
        $obj->username = $this->checkSetOrDefault($optional, 'username', EMPTY_VALUE);
143
        $obj->timestamp = $this->checkSetOrDefault($optional, 'timestamp', EMPTY_VALUE);
144
145
        $tags = [];
146
        if (!empty($optional['tags'])) {
147
            $tags = \explode(',', $optional['tags']);
148
        }
149
150
        $obj->tags = $tags;
151
        return $obj;
152
    }
153
154
    /**
155
     * Create an Element object. The type of element and what properties are poplutated and
156
     * persisted to the index is determined by the <code>section</code>.
157
     * @param string $key
158
     * @param string $filepath
159
     * @param string $section
160
     * @throws InternalException
161
     * @throws InvalidArgumentException
162
     * @return \stdClass
163
     */
164
    protected function createElement(string $key, string $filepath, string $section): \stdClass {
165
        if (empty($key) || empty($section)) {
166
            throw new \InvalidArgumentException("A parameter is empty for [$key][$filepath][$section]"); // @codeCoverageIgnore
167
        }
168
169
        if( $section === POST_SECTION) {
170
                $uriDateStringTagList = $this->getPostUriDateStringAndTagListFromIndexKey($key);
171
                $typeCatUsername = $this->getTypeCategoryUsernameFromFilepath($filepath);
172
                return $this->createElementClass($uriDateStringTagList[0], $filepath, POST_SECTION, type: $typeCatUsername[0], category: $typeCatUsername[1], username: $typeCatUsername[2], timestamp: $uriDateStringTagList[1], tags: $uriDateStringTagList[2]);
173
        }
174
        return $this->createElementClass($key, $filepath, $section);
175
    }
176
177
    /**
178
     * Checks if the given key is set in the given array. If so, returns the value,
179
     * otherwise returns the default value.
180
     * @param array<mixed> $ar
181
     * @param string $key
182
     * @param mixed $default
183
     * @return mixed
184
     */
185
    private function checkSetOrDefault(array $ar, string $key, mixed $default): mixed {
186
        if (isset($ar[$key])) {
187
            return $ar[$key];
188
        }
189
        return $default;
190
    }
191
192
    /**
193
     *
194
     * @param string $key
195
     * @throws InvalidArgumentException
196
     * @return array<string>
197
     */
198
    private function getPostUriDateStringAndTagListFromIndexKey(string $key): array {
199
        if (\strlen($key) < 17) {
200
            throw new InvalidArgumentException("Post content filename is too short [$key]"); // @codeCoverageIgnore
201
        }
202
        $dateString = \substr($key, 0, TIMESTAMP_LEN);
203
        $start = 15;
204
        if (($end = \strpos($key, '_', $start)) === false) {
205
            throw new InvalidArgumentException("Post content filename syntax error [$key]"); // @codeCoverageIgnore
206
        }
207
        $tagList = \substr($key, $start, $end - $start);
208
        $title = \substr($key, $end + 1);
209
        $year = \substr($dateString, 0, 4);
210
        $month = \substr($dateString, 4, 2);
211
        $uri = $year.FWD_SLASH.$month.FWD_SLASH.$title;
212
        return [$uri, $dateString, $tagList];
213
    }
214
215
    /**
216
     * Extract the post's type, category and username from its filepath.
217
     * @param string $filepath
218
     * @return array<string> value index: [0] = type, [1] = category, [2] = username
219
     */
220
    private function getTypeCategoryUsernameFromFilepath(string $filepath): array {
221
        $pathinfo = \pathinfo($filepath);
222
        $parts = \explode(DS, $pathinfo['dirname']);
223
        $cnt = \count($parts);
224
        // type,  category, username
225
        return [$parts[$cnt - 1], CATEGORY_SECTION.FWD_SLASH.$parts[$cnt - 2], $parts[$cnt - 4]];
226
    }
227
228
    /**
229
     * Reindex the whole system. Generally called when new content has been added.
230
     */
231
    public abstract function reindex(): void;
232
}
233