Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 134 |
|
0.00% |
0 / 13 |
CRAP | |
0.00% |
0 / 1 |
| ArchiveExtractor | |
0.00% |
0 / 134 |
|
0.00% |
0 / 13 |
3422 | |
0.00% |
0 / 1 |
| extract | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
110 | |||
| getArchiveType | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
42 | |||
| extractZip | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
110 | |||
| extractTar | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
90 | |||
| isPathTraversal | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| filterImageFiles | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
| isValidImageFile | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| getTempDirectory | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| cleanup | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| deleteDirectory | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
| getSupportedImageTypes | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getSupportedArchiveTypes | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| setLimits | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php |
| 2 | declare(strict_types=1); |
| 3 | |
| 4 | namespace App\Utility; |
| 5 | |
| 6 | use Exception; |
| 7 | use PharData; |
| 8 | use RuntimeException; |
| 9 | use ZipArchive; |
| 10 | |
| 11 | /** |
| 12 | * ArchiveExtractor Utility |
| 13 | * |
| 14 | * Handles extraction of various archive formats (ZIP, TAR, GZIP) and |
| 15 | * filters extracted files to return only valid image files. |
| 16 | * |
| 17 | * Security features: |
| 18 | * - Path traversal prevention |
| 19 | * - File size limits |
| 20 | * - Maximum extraction count limits |
| 21 | * - Supported file type validation |
| 22 | */ |
| 23 | class ArchiveExtractor |
| 24 | { |
| 25 | /** |
| 26 | * Supported image file extensions |
| 27 | */ |
| 28 | private array $supportedImageTypes = ['jpg', 'jpeg', 'png', 'gif', 'webp']; |
| 29 | |
| 30 | /** |
| 31 | * Supported archive file extensions |
| 32 | */ |
| 33 | private array $supportedArchiveTypes = ['zip', 'tar', 'gz', 'tar.gz', 'tgz']; |
| 34 | |
| 35 | /** |
| 36 | * Maximum number of files to extract (security limit) |
| 37 | */ |
| 38 | private int $maxFileCount = 100; |
| 39 | |
| 40 | /** |
| 41 | * Maximum file size for individual files (10MB) |
| 42 | */ |
| 43 | private int $maxFileSize = 10485760; |
| 44 | |
| 45 | /** |
| 46 | * Maximum total extraction size (100MB) |
| 47 | */ |
| 48 | private int $maxTotalSize = 104857600; |
| 49 | |
| 50 | /** |
| 51 | * Extract archive and return list of image files |
| 52 | * |
| 53 | * @param string $archivePath Path to the archive file |
| 54 | * @return array List of extracted image file paths |
| 55 | * @throws \RuntimeException |
| 56 | */ |
| 57 | public function extract(string $archivePath): array |
| 58 | { |
| 59 | if (!file_exists($archivePath)) { |
| 60 | throw new RuntimeException("Archive file not found: {$archivePath}"); |
| 61 | } |
| 62 | |
| 63 | $archiveType = $this->getArchiveType($archivePath); |
| 64 | if (!$archiveType) { |
| 65 | throw new RuntimeException("Unsupported archive type: {$archivePath}"); |
| 66 | } |
| 67 | |
| 68 | $tempDir = $this->getTempDirectory(); |
| 69 | $extractedFiles = []; |
| 70 | |
| 71 | try { |
| 72 | switch ($archiveType) { |
| 73 | case 'zip': |
| 74 | $extractedFiles = $this->extractZip($archivePath, $tempDir); |
| 75 | break; |
| 76 | case 'tar': |
| 77 | case 'tar.gz': |
| 78 | case 'tgz': |
| 79 | $extractedFiles = $this->extractTar($archivePath, $tempDir); |
| 80 | break; |
| 81 | default: |
| 82 | throw new RuntimeException("Unsupported archive type: {$archiveType}"); |
| 83 | } |
| 84 | |
| 85 | $imageFiles = $this->filterImageFiles($extractedFiles); |
| 86 | |
| 87 | if (empty($imageFiles)) { |
| 88 | $this->cleanup($tempDir); |
| 89 | throw new RuntimeException('No valid image files found in archive'); |
| 90 | } |
| 91 | |
| 92 | return $imageFiles; |
| 93 | } catch (RuntimeException $e) { |
| 94 | $this->cleanup($tempDir); |
| 95 | throw $e; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | /** |
| 100 | * Detect archive type based on file extension and mime type |
| 101 | * |
| 102 | * @param string $path |
| 103 | * @return string|null Archive type or null if unsupported |
| 104 | */ |
| 105 | private function getArchiveType(string $path): ?string |
| 106 | { |
| 107 | $pathInfo = pathinfo(strtolower($path)); |
| 108 | $extension = $pathInfo['extension'] ?? ''; |
| 109 | |
| 110 | // Handle double extensions like .tar.gz |
| 111 | if ($extension === 'gz' && isset($pathInfo['filename'])) { |
| 112 | $innerInfo = pathinfo($pathInfo['filename']); |
| 113 | if (($innerInfo['extension'] ?? '') === 'tar') { |
| 114 | return 'tar.gz'; |
| 115 | } |
| 116 | |
| 117 | return 'gz'; |
| 118 | } |
| 119 | |
| 120 | if ($extension === 'tgz') { |
| 121 | return 'tgz'; |
| 122 | } |
| 123 | |
| 124 | if (in_array($extension, $this->supportedArchiveTypes)) { |
| 125 | return $extension; |
| 126 | } |
| 127 | |
| 128 | return null; |
| 129 | } |
| 130 | |
| 131 | /** |
| 132 | * Extract ZIP files using ZipArchive |
| 133 | * |
| 134 | * @param string $path |
| 135 | * @param string $targetDir |
| 136 | * @return array List of extracted files |
| 137 | * @throws \RuntimeException |
| 138 | */ |
| 139 | private function extractZip(string $path, string $targetDir): array |
| 140 | { |
| 141 | $zip = new ZipArchive(); |
| 142 | $result = $zip->open($path); |
| 143 | |
| 144 | if ($result !== true) { |
| 145 | throw new RuntimeException("Failed to open ZIP file: {$path} (Error: {$result})"); |
| 146 | } |
| 147 | |
| 148 | if ($zip->numFiles > $this->maxFileCount) { |
| 149 | $zip->close(); |
| 150 | throw new RuntimeException( |
| 151 | "Archive contains too many files ({$zip->numFiles}). Maximum allowed: {$this->maxFileCount}", |
| 152 | ); |
| 153 | } |
| 154 | |
| 155 | $extractedFiles = []; |
| 156 | $totalSize = 0; |
| 157 | |
| 158 | for ($i = 0; $i < $zip->numFiles; $i++) { |
| 159 | $stat = $zip->statIndex($i); |
| 160 | |
| 161 | if ($stat === false) { |
| 162 | continue; |
| 163 | } |
| 164 | |
| 165 | // Security: Check for path traversal |
| 166 | $filename = $stat['name']; |
| 167 | if ($this->isPathTraversal($filename)) { |
| 168 | continue; |
| 169 | } |
| 170 | |
| 171 | // Skip directories |
| 172 | if (substr($filename, -1) === '/') { |
| 173 | continue; |
| 174 | } |
| 175 | |
| 176 | // Check file size limits |
| 177 | if ($stat['size'] > $this->maxFileSize) { |
| 178 | continue; |
| 179 | } |
| 180 | |
| 181 | $totalSize += $stat['size']; |
| 182 | if ($totalSize > $this->maxTotalSize) { |
| 183 | $zip->close(); |
| 184 | throw new RuntimeException('Archive total size exceeds limit'); |
| 185 | } |
| 186 | |
| 187 | $extractPath = $targetDir . DIRECTORY_SEPARATOR . basename($filename); |
| 188 | |
| 189 | if ($zip->extractTo($targetDir, $filename)) { |
| 190 | $extractedFiles[] = $extractPath; |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | $zip->close(); |
| 195 | |
| 196 | return $extractedFiles; |
| 197 | } |
| 198 | |
| 199 | /** |
| 200 | * Extract TAR files using PharData |
| 201 | * |
| 202 | * @param string $path |
| 203 | * @param string $targetDir |
| 204 | * @return array List of extracted files |
| 205 | * @throws \RuntimeException |
| 206 | */ |
| 207 | private function extractTar(string $path, string $targetDir): array |
| 208 | { |
| 209 | try { |
| 210 | $phar = new PharData($path); |
| 211 | |
| 212 | // Count files first |
| 213 | $fileCount = iterator_count($phar); |
| 214 | if ($fileCount > $this->maxFileCount) { |
| 215 | throw new RuntimeException( |
| 216 | "Archive contains too many files ({$fileCount}). Maximum allowed: {$this->maxFileCount}", |
| 217 | ); |
| 218 | } |
| 219 | |
| 220 | $extractedFiles = []; |
| 221 | $totalSize = 0; |
| 222 | |
| 223 | foreach ($phar as $file) { |
| 224 | $filename = $file->getFilename(); |
| 225 | |
| 226 | // Security: Check for path traversal |
| 227 | if ($this->isPathTraversal($filename)) { |
| 228 | continue; |
| 229 | } |
| 230 | |
| 231 | // Skip directories |
| 232 | if ($file->isDir()) { |
| 233 | continue; |
| 234 | } |
| 235 | |
| 236 | // Check file size limits |
| 237 | $fileSize = $file->getSize(); |
| 238 | if ($fileSize > $this->maxFileSize) { |
| 239 | continue; |
| 240 | } |
| 241 | |
| 242 | $totalSize += $fileSize; |
| 243 | if ($totalSize > $this->maxTotalSize) { |
| 244 | throw new RuntimeException('Archive total size exceeds limit'); |
| 245 | } |
| 246 | |
| 247 | $extractPath = $targetDir . DIRECTORY_SEPARATOR . basename($filename); |
| 248 | |
| 249 | if (copy($file->getPathname(), $extractPath)) { |
| 250 | $extractedFiles[] = $extractPath; |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | return $extractedFiles; |
| 255 | } catch (Exception $e) { |
| 256 | throw new RuntimeException('Failed to extract TAR file: ' . $e->getMessage()); |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | /** |
| 261 | * Check for path traversal attempts |
| 262 | * |
| 263 | * @param string $filename |
| 264 | * @return bool True if path traversal detected |
| 265 | */ |
| 266 | private function isPathTraversal(string $filename): bool |
| 267 | { |
| 268 | // Check for .. in path |
| 269 | if (strpos($filename, '..') !== false) { |
| 270 | return true; |
| 271 | } |
| 272 | |
| 273 | // Check for absolute paths |
| 274 | if (substr($filename, 0, 1) === '/' || substr($filename, 1, 1) === ':') { |
| 275 | return true; |
| 276 | } |
| 277 | |
| 278 | return false; |
| 279 | } |
| 280 | |
| 281 | /** |
| 282 | * Filter and validate image files |
| 283 | * |
| 284 | * @param array $files |
| 285 | * @return array Only valid image files |
| 286 | */ |
| 287 | private function filterImageFiles(array $files): array |
| 288 | { |
| 289 | $imageFiles = []; |
| 290 | |
| 291 | foreach ($files as $file) { |
| 292 | if (!file_exists($file)) { |
| 293 | continue; |
| 294 | } |
| 295 | |
| 296 | $pathInfo = pathinfo(strtolower($file)); |
| 297 | $extension = $pathInfo['extension'] ?? ''; |
| 298 | |
| 299 | if (in_array($extension, $this->supportedImageTypes)) { |
| 300 | // Additional validation: check if it's actually an image |
| 301 | if ($this->isValidImageFile($file)) { |
| 302 | $imageFiles[] = $file; |
| 303 | } |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | return $imageFiles; |
| 308 | } |
| 309 | |
| 310 | /** |
| 311 | * Validate that a file is actually an image |
| 312 | * |
| 313 | * @param string $filePath |
| 314 | * @return bool |
| 315 | */ |
| 316 | private function isValidImageFile(string $filePath): bool |
| 317 | { |
| 318 | $imageInfo = getimagesize($filePath); |
| 319 | |
| 320 | return $imageInfo !== false; |
| 321 | } |
| 322 | |
| 323 | /** |
| 324 | * Get temporary directory for extraction |
| 325 | * |
| 326 | * @return string Path to temp directory |
| 327 | * @throws \RuntimeException |
| 328 | */ |
| 329 | private function getTempDirectory(): string |
| 330 | { |
| 331 | $tempBase = sys_get_temp_dir(); |
| 332 | $tempDir = $tempBase . DIRECTORY_SEPARATOR . 'willow_extract_' . uniqid(); |
| 333 | |
| 334 | if (!mkdir($tempDir, 0755, true)) { |
| 335 | throw new RuntimeException("Failed to create temporary directory: {$tempDir}"); |
| 336 | } |
| 337 | |
| 338 | return $tempDir; |
| 339 | } |
| 340 | |
| 341 | /** |
| 342 | * Cleanup temporary files and directories |
| 343 | * |
| 344 | * @param string $tempDir |
| 345 | * @return void |
| 346 | */ |
| 347 | public function cleanup(string $tempDir): void |
| 348 | { |
| 349 | if (empty($tempDir) || !is_dir($tempDir)) { |
| 350 | return; |
| 351 | } |
| 352 | |
| 353 | // Security: Only delete directories we created |
| 354 | if (strpos($tempDir, 'willow_extract_') === false) { |
| 355 | return; |
| 356 | } |
| 357 | |
| 358 | $this->deleteDirectory($tempDir); |
| 359 | } |
| 360 | |
| 361 | /** |
| 362 | * Recursively delete a directory and all its contents |
| 363 | * |
| 364 | * @param string $dir Directory to delete |
| 365 | * @return void |
| 366 | */ |
| 367 | private function deleteDirectory(string $dir): void |
| 368 | { |
| 369 | if (!is_dir($dir)) { |
| 370 | return; |
| 371 | } |
| 372 | |
| 373 | $files = array_diff(scandir($dir), ['.', '..']); |
| 374 | foreach ($files as $file) { |
| 375 | $path = $dir . DIRECTORY_SEPARATOR . $file; |
| 376 | if (is_dir($path)) { |
| 377 | $this->deleteDirectory($path); |
| 378 | } else { |
| 379 | unlink($path); |
| 380 | } |
| 381 | } |
| 382 | rmdir($dir); |
| 383 | } |
| 384 | |
| 385 | /** |
| 386 | * Get supported image types |
| 387 | * |
| 388 | * @return array |
| 389 | */ |
| 390 | public function getSupportedImageTypes(): array |
| 391 | { |
| 392 | return $this->supportedImageTypes; |
| 393 | } |
| 394 | |
| 395 | /** |
| 396 | * Get supported archive types |
| 397 | * |
| 398 | * @return array |
| 399 | */ |
| 400 | public function getSupportedArchiveTypes(): array |
| 401 | { |
| 402 | return $this->supportedArchiveTypes; |
| 403 | } |
| 404 | |
| 405 | /** |
| 406 | * Set security limits |
| 407 | * |
| 408 | * @param int $maxFileCount |
| 409 | * @param int $maxFileSize |
| 410 | * @param int $maxTotalSize |
| 411 | * @return void |
| 412 | */ |
| 413 | public function setLimits(int $maxFileCount, int $maxFileSize, int $maxTotalSize): void |
| 414 | { |
| 415 | $this->maxFileCount = $maxFileCount; |
| 416 | $this->maxFileSize = $maxFileSize; |
| 417 | $this->maxTotalSize = $maxTotalSize; |
| 418 | } |
| 419 | } |