Complex classes like YoutubeDl often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use YoutubeDl, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 20 | class YoutubeDl |
||
| 21 | { |
||
| 22 | const PROGRESS_PATTERN = '#\[download\]\s+(?<percentage>\d+(?:\.\d+)?%)\s+of\s+(?<size>\d+(?:\.\d+)?(?:K|M|G)iB)(?:\s+at\s+(?<speed>\d+(?:\.\d+)?(?:K|M|G)iB/s))?(?:\s+ETA\s+(?<eta>[\d]{2}:[\d]{2}))?#i'; |
||
| 23 | |||
| 24 | /** |
||
| 25 | * @var array |
||
| 26 | */ |
||
| 27 | protected $options = []; |
||
| 28 | |||
| 29 | /** |
||
| 30 | * @var string |
||
| 31 | */ |
||
| 32 | protected $binPath; |
||
| 33 | |||
| 34 | /** |
||
| 35 | * @var string |
||
| 36 | */ |
||
| 37 | protected $pythonPath; |
||
| 38 | |||
| 39 | /** |
||
| 40 | * @var string |
||
| 41 | */ |
||
| 42 | protected $downloadPath; |
||
| 43 | |||
| 44 | /** |
||
| 45 | * @var callable |
||
| 46 | */ |
||
| 47 | protected $debug; |
||
| 48 | |||
| 49 | /** |
||
| 50 | * @var int |
||
| 51 | */ |
||
| 52 | protected $timeout = 0; |
||
| 53 | |||
| 54 | /** |
||
| 55 | * @var array |
||
| 56 | */ |
||
| 57 | protected $allowedAudioFormats = ['best', 'aac', 'vorbis', 'mp3', 'm4a', 'opus', 'wav']; |
||
| 58 | |||
| 59 | /** |
||
| 60 | * @var callable |
||
| 61 | */ |
||
| 62 | private $progress; |
||
| 63 | |||
| 64 | private static $blacklist = [ |
||
| 65 | '#soundcloud.com/.+/sets.+#', |
||
| 66 | ]; |
||
| 67 | |||
| 68 | public function __construct(array $options = []) |
||
| 69 | { |
||
| 70 | $resolver = new OptionsResolver(); |
||
| 71 | $this->configureOptions($resolver); |
||
| 72 | |||
| 73 | $this->options = $resolver->resolve($options); |
||
| 74 | } |
||
| 75 | |||
| 76 | public function setBinPath(string $binPath) |
||
| 77 | { |
||
| 78 | $this->binPath = $binPath; |
||
| 79 | } |
||
| 80 | |||
| 81 | public function setPythonPath(string $pythonPath) |
||
| 82 | { |
||
| 83 | $this->pythonPath = $pythonPath; |
||
| 84 | } |
||
| 85 | |||
| 86 | /** |
||
| 87 | * @param string $downloadPath Download path without trailing slash |
||
| 88 | */ |
||
| 89 | public function setDownloadPath(string $downloadPath) |
||
| 90 | { |
||
| 91 | $this->downloadPath = $downloadPath; |
||
| 92 | } |
||
| 93 | |||
| 94 | public function debug(callable $debug) |
||
| 95 | { |
||
| 96 | $this->debug = $debug; |
||
| 97 | } |
||
| 98 | |||
| 99 | public function setTimeout(int $timeout) |
||
| 100 | { |
||
| 101 | $this->timeout = $timeout; |
||
| 102 | } |
||
| 103 | |||
| 104 | public function onProgress(callable $onProgress) |
||
| 105 | { |
||
| 106 | $this->progress = $onProgress; |
||
| 107 | } |
||
| 108 | |||
| 109 | public function download(string $url): Video |
||
| 110 | { |
||
| 111 | if (!$this->downloadPath) { |
||
| 112 | throw new \RuntimeException('No download path was set.'); |
||
| 113 | } |
||
| 114 | |||
| 115 | if (!$this->isUrlSupported($url)) { |
||
| 116 | throw new UrlNotSupportedException(sprintf('Provided url "%s" is not supported.', $url)); |
||
| 117 | } |
||
| 118 | |||
| 119 | $arguments = [ |
||
| 120 | $url, |
||
| 121 | '--no-playlist', |
||
| 122 | '--ignore-config', |
||
| 123 | '--write-info-json', |
||
| 124 | ]; |
||
| 125 | |||
| 126 | foreach ($this->options as $option => $value) { |
||
| 127 | if ('add-header' === $option) { |
||
| 128 | foreach ($value as $header) { |
||
| 129 | $arguments[] = sprintf('--%s=%s', $option, $header); |
||
| 130 | } |
||
| 131 | } elseif (is_bool($value)) { |
||
| 132 | $arguments[] = sprintf('--%s', $option); |
||
| 133 | } else { |
||
| 134 | $arguments[] = sprintf('--%s=%s', $option, $value); |
||
| 135 | } |
||
| 136 | } |
||
| 137 | |||
| 138 | $process = $this->createProcess($arguments); |
||
| 139 | |||
| 140 | try { |
||
| 141 | $process->mustRun(function ($type, $buffer) { |
||
| 142 | $debug = $this->debug; |
||
| 143 | $progress = $this->progress; |
||
| 144 | |||
| 145 | if (is_callable($debug)) { |
||
| 146 | $debug($type, $buffer); |
||
| 147 | } |
||
| 148 | |||
| 149 | if (is_callable($progress) && Process::OUT === $type && preg_match(self::PROGRESS_PATTERN, $buffer, $matches)) { |
||
| 150 | unset($matches[0], $matches[1], $matches[2], $matches[3], $matches[4]); |
||
| 151 | |||
| 152 | $progress($matches); |
||
| 153 | } |
||
| 154 | }); |
||
| 155 | } catch (\Exception $e) { |
||
| 156 | throw $this->handleException($e); |
||
| 157 | } |
||
| 158 | |||
| 159 | return $this->processDownload($process); |
||
| 160 | } |
||
| 161 | |||
| 162 | public function getExtractorsList(): array |
||
| 163 | { |
||
| 164 | $process = $this->createProcess(['--list-extractors']); |
||
| 165 | $process->mustRun(is_callable($this->debug) ? $this->debug : null); |
||
| 166 | |||
| 167 | return array_filter(explode("\n", $process->getOutput())); |
||
| 168 | } |
||
| 169 | |||
| 170 | private function jsonDecode($data): array |
||
| 171 | { |
||
| 172 | $decoded = json_decode($data, true); |
||
| 173 | |||
| 174 | if (JSON_ERROR_NONE !== json_last_error()) { |
||
| 175 | throw new YoutubeDlException(sprintf('Response can\'t be decoded: %s.', $data)); |
||
| 176 | } |
||
| 177 | |||
| 178 | return $decoded; |
||
| 179 | } |
||
| 180 | |||
| 181 | private function processDownload(Process $process): Video |
||
| 182 | { |
||
| 183 | if (!preg_match('/Writing video description metadata as JSON to:\s(.+)/', $process->getOutput(), $m)) { |
||
| 184 | throw new YoutubeDlException('Failed to detect metadata file.'); |
||
| 185 | } |
||
| 186 | |||
| 187 | $metadataFile = $this->downloadPath.'/'.$m[1]; |
||
| 188 | |||
| 189 | $videoData = $this->jsonDecode(trim(file_get_contents($metadataFile))); |
||
| 190 | |||
| 191 | @unlink($metadataFile); |
||
| 192 | |||
| 193 | if (!isset($this->options['skip-download']) || false === $this->options['skip-download']) { |
||
| 194 | if (isset($this->options['extract-audio']) && true === $this->options['extract-audio']) { |
||
| 195 | $videoData['_filename'] = $this->findFile($videoData['_filename'], implode('|', $this->allowedAudioFormats)); |
||
| 196 | } elseif (preg_match('/merged into mkv/', $process->getErrorOutput())) { |
||
| 197 | $videoData['_filename'] = $this->findFile($videoData['_filename'], 'mkv'); |
||
| 198 | } |
||
| 199 | |||
| 200 | $videoData['file'] = new \SplFileInfo($this->downloadPath.'/'.$videoData['_filename']); |
||
| 201 | } else { |
||
| 202 | $videoData['file'] = null; |
||
| 203 | } |
||
| 204 | |||
| 205 | return new Video($videoData); |
||
| 206 | } |
||
| 207 | |||
| 208 | private function handleException(\Exception $e): \Exception |
||
| 209 | { |
||
| 210 | $message = $e->getMessage(); |
||
| 211 | |||
| 212 | if (preg_match('/please sign in to view this video|video is protected by a password/i', $message)) { |
||
| 213 | return new PrivateVideoException(); |
||
| 214 | } elseif (preg_match('/copyright infringement/i', $message)) { |
||
| 215 | return new CopyrightException(); |
||
| 216 | } elseif (preg_match('/this video does not exist|404/i', $message)) { |
||
| 217 | return new NotFoundException(); |
||
| 218 | } elseif (preg_match('/account associated with this video has been terminated/', $message)) { |
||
| 219 | return new AccountTerminatedException(); |
||
| 220 | } |
||
| 221 | |||
| 222 | return $e; |
||
| 223 | } |
||
| 224 | |||
| 225 | private function createProcess(array $arguments = []): Process |
||
| 226 | { |
||
| 227 | $binPath = $this->binPath ?: (new ExecutableFinder())->find('youtube-dl'); |
||
| 228 | |||
| 229 | if (null === $binPath) { |
||
| 230 | throw new ExecutableNotFoundException('"youtube-dl" executable was not found. Did you forgot to add it to environment variables? Or set it via $yt->setBinPath(\'/usr/bin/youtube-dl\').'); |
||
| 231 | } |
||
| 232 | |||
| 233 | array_unshift($arguments, $binPath); |
||
| 234 | |||
| 235 | if ($this->pythonPath) { |
||
| 236 | array_unshift($arguments, $this->pythonPath); |
||
| 237 | } |
||
| 238 | |||
| 239 | $process = new Process($arguments); |
||
| 240 | $process->setTimeout($this->timeout); |
||
| 241 | |||
| 242 | if ($this->downloadPath) { |
||
| 243 | $process->setWorkingDirectory($this->downloadPath); |
||
| 244 | } |
||
| 245 | |||
| 246 | return $process; |
||
| 247 | } |
||
| 248 | |||
| 249 | private function findFile(string $fileName, string $extension) |
||
| 250 | { |
||
| 251 | $dirName = pathinfo($fileName, PATHINFO_DIRNAME); |
||
| 252 | $path = $this->downloadPath.(('.' === $dirName) ? '' : DIRECTORY_SEPARATOR.$dirName); |
||
| 253 | |||
| 254 | $iterator = new \RegexIterator(new \DirectoryIterator($path), sprintf('/%s\.%s$/ui', preg_quote(pathinfo($fileName, PATHINFO_FILENAME), '/'), '('.$extension.')'), \RegexIterator::GET_MATCH); |
||
| 255 | |||
| 256 | $iterator->rewind(); |
||
| 257 | |||
| 258 | return (('.' === $dirName) ? '' : $dirName.DIRECTORY_SEPARATOR).$iterator->current()[0]; |
||
| 259 | } |
||
| 260 | |||
| 261 | private function configureOptions(OptionsResolver $resolver) |
||
| 262 | { |
||
| 263 | $options = [ |
||
| 264 | // General options |
||
| 265 | 'ignore-errors' => 'bool', |
||
| 266 | 'abort-on-error' => 'bool', |
||
| 267 | 'default-search' => 'string', |
||
| 268 | 'force-generic-extractor' => 'bool', |
||
| 269 | // Network options |
||
| 270 | 'proxy' => 'string', |
||
| 271 | 'socket-timeout' => 'int', |
||
| 272 | 'source-address' => 'string', |
||
| 273 | 'force-ipv4' => 'bool', |
||
| 274 | 'force-ipv6' => 'bool', |
||
| 275 | // Video selection options |
||
| 276 | 'match-title' => 'string', |
||
| 277 | 'reject-title' => 'string', |
||
| 278 | 'max-downloads' => 'int', |
||
| 279 | 'min-filesize' => 'string', |
||
| 280 | 'max-filesize' => 'string', |
||
| 281 | 'date' => 'string', |
||
| 282 | 'datebefore' => 'string', |
||
| 283 | 'dateafter' => 'string', |
||
| 284 | 'min-views' => 'int', |
||
| 285 | 'max-views' => 'int', |
||
| 286 | 'match-filter' => 'string', |
||
| 287 | 'download-archive' => 'string', |
||
| 288 | 'include-ads' => 'bool', |
||
| 289 | // Download Options |
||
| 290 | 'rate-limit' => 'string', |
||
| 291 | 'retries' => 'int|string', |
||
| 292 | 'buffer-size' => 'string', |
||
| 293 | 'no-resize-buffer' => 'bool', |
||
| 294 | 'xattr-set-filesize' => 'bool', |
||
| 295 | 'hls-prefer-native' => 'bool', |
||
| 296 | 'external-downloader' => 'string', |
||
| 297 | 'external-downloader-args' => 'string', |
||
| 298 | // Filesystem Options |
||
| 299 | 'batch-file' => 'string', |
||
| 300 | 'output' => 'string', |
||
| 301 | 'autonumber-size' => 'int', |
||
| 302 | 'restrict-filenames' => 'bool', |
||
| 303 | 'no-overwrites' => 'bool', |
||
| 304 | 'continue' => 'bool', |
||
| 305 | 'no-continue' => 'bool', |
||
| 306 | 'no-part' => 'bool', |
||
| 307 | 'no-mtime' => 'bool', |
||
| 308 | 'write-description' => 'bool', |
||
| 309 | 'write-annotations' => 'bool', |
||
| 310 | 'cookies' => 'string', |
||
| 311 | 'cache-dir' => 'string', |
||
| 312 | 'no-cache-dir' => 'bool', |
||
| 313 | 'rm-cache-dir' => 'bool', |
||
| 314 | 'id' => 'bool', |
||
| 315 | // Thumbnail images |
||
| 316 | 'write-thumbnail' => 'bool', |
||
| 317 | 'write-all-thumbnails' => 'bool', |
||
| 318 | // Verbosity / Simulation Options |
||
| 319 | 'quiet' => 'bool', |
||
| 320 | 'no-warnings' => 'bool', |
||
| 321 | 'simulate' => 'bool', |
||
| 322 | 'skip-download' => 'bool', |
||
| 323 | 'call-home' => 'bool', |
||
| 324 | 'no-call-home' => 'bool', |
||
| 325 | // Workarounds |
||
| 326 | 'encoding' => 'string', |
||
| 327 | 'no-check-certificate' => 'bool', |
||
| 328 | 'prefer-insecure' => 'bool', |
||
| 329 | 'user-agent' => 'string', |
||
| 330 | 'referer' => 'string', |
||
| 331 | 'add-header' => 'array', |
||
| 332 | 'bidi-workaround' => 'bool', |
||
| 333 | 'sleep-interval' => 'int', |
||
| 334 | // Video Format Options |
||
| 335 | 'format' => 'string', |
||
| 336 | 'prefer-free-formats' => 'bool', |
||
| 337 | 'max-quality' => 'string', |
||
| 338 | 'youtube-skip-dash-manifest' => 'bool', |
||
| 339 | 'merge-output-format' => 'string', |
||
| 340 | // Subtitle Options |
||
| 341 | 'write-sub' => 'bool', |
||
| 342 | 'write-auto-sub' => 'bool', |
||
| 343 | 'all-subs' => 'bool', |
||
| 344 | 'sub-format' => 'string', |
||
| 345 | 'sub-lang' => 'string', |
||
| 346 | // Authentication Options |
||
| 347 | 'username' => 'string', |
||
| 348 | 'password' => 'string', |
||
| 349 | 'twofactor' => 'string', |
||
| 350 | 'netrc' => 'bool', |
||
| 351 | 'video-password' => 'string', |
||
| 352 | // Post-processing Options |
||
| 353 | 'extract-audio' => 'bool', |
||
| 354 | 'audio-format' => 'string', |
||
| 355 | 'audio-quality' => 'int', |
||
| 356 | 'recode-video' => 'string', |
||
| 357 | 'keep-video' => 'bool', |
||
| 358 | 'no-post-overwrites' => 'bool', |
||
| 359 | 'embed-subs' => 'bool', |
||
| 360 | 'embed-thumbnail' => 'bool', |
||
| 361 | 'add-metadata' => 'bool', |
||
| 362 | 'metadata-from-title' => 'string', |
||
| 363 | 'xattrs' => 'bool', |
||
| 364 | 'fixup' => 'string', |
||
| 365 | 'prefer-avconv' => 'bool', |
||
| 366 | 'prefer-ffmpeg' => 'bool', |
||
| 367 | 'ffmpeg-location' => 'string', |
||
| 368 | 'exec' => 'string', |
||
| 369 | 'convert-subtitles' => 'string', |
||
| 370 | ]; |
||
| 371 | |||
| 372 | $resolver->setDefined(array_keys($options)); |
||
| 373 | |||
| 374 | foreach ($options as $option => $types) { |
||
| 375 | $resolver->setAllowedTypes($option, explode('|', $types)); |
||
| 376 | } |
||
| 377 | |||
| 378 | $resolver->setAllowedValues('retries', function ($value) { |
||
| 379 | if (is_string($value) && 'infinite' != $value) { |
||
| 380 | return false; |
||
| 381 | } |
||
| 382 | |||
| 383 | return true; |
||
| 384 | }); |
||
| 385 | |||
| 386 | $resolver->setAllowedValues('external-downloader', ['aria2c', 'avconv', 'axel', 'curl', 'ffmpeg', 'httpie', 'wget']); |
||
| 387 | |||
| 388 | $resolver->setAllowedValues('audio-format', $this->allowedAudioFormats); |
||
| 389 | |||
| 390 | $resolver->setAllowedValues('ffmpeg-location', function ($value) { |
||
| 391 | if (!is_file($value) && !is_dir($value)) { |
||
| 392 | return false; |
||
| 393 | } |
||
| 394 | |||
| 395 | return true; |
||
| 396 | }); |
||
| 397 | |||
| 398 | $resolver->setNormalizer('add-header', function (Options $options, $value) { |
||
| 399 | foreach ($value as $k => $v) { |
||
| 400 | if (false === strpos($v, ':')) { |
||
| 401 | unset($value[$k]); |
||
| 402 | } |
||
| 403 | } |
||
| 404 | |||
| 405 | return $value; |
||
| 406 | }); |
||
| 407 | } |
||
| 408 | |||
| 409 | private function isUrlSupported(string $url): bool |
||
| 410 | { |
||
| 411 | foreach (self::$blacklist as $pattern) { |
||
| 412 | if (preg_match($pattern, $url)) { |
||
| 413 | return false; |
||
| 414 | } |
||
| 415 | } |
||
| 416 | |||
| 417 | return true; |
||
| 418 | } |
||
| 419 | } |
||
| 420 |