"""Tests for the ZipFileIndexer class.""" from pathlib import Path import pytest from main import ZipFileIndexer class TestZipIndex: """Tests for ZipFileIndexer._index.""" def test_indexes_real_zip(self, tmp_path: Path) -> None: """Index a real zip file with multiple entries.""" import zipfile zip_path = tmp_path / "archive.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr("a.txt", "aaa") zf.writestr("b/c.txt", "ccc") zf.writestr("b/d.txt", "ddd") indexer = ZipFileIndexer(str(zip_path), salt="test") assert len(indexer._file_mapping) == 3 def test_excludes_directories(self, tmp_path: Path) -> None: """Directory entries in the zip are excluded from the index.""" import zipfile zip_path = tmp_path / "archive.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr("folder/", "") # directory entry zf.writestr("folder/file.txt", "content") indexer = ZipFileIndexer(str(zip_path), salt="test") assert len(indexer._file_mapping) == 1 filename = list(indexer._file_mapping.values())[0] assert filename == "folder/file.txt" def test_hash_matches_filename(self, tmp_path: Path) -> None: """Hash is computed from the filename inside the zip.""" import zipfile zip_path = tmp_path / "archive.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr("hello.txt", "world") indexer = ZipFileIndexer(str(zip_path), salt="test") expected_hash = indexer._hash_path("hello.txt") assert expected_hash in indexer._file_mapping assert indexer._file_mapping[expected_hash] == "hello.txt" class TestZipGetFileByHash: """Tests for ZipFileIndexer.get_file_by_hash.""" def test_returns_file_content(self, tmp_path: Path) -> None: """Returns the content of a file inside the zip.""" import zipfile zip_path = tmp_path / "archive.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr("data.txt", "zip content here") indexer = ZipFileIndexer(str(zip_path), salt="test") file_hash = indexer._hash_path("data.txt") content = b"".join(indexer.get_file_by_hash(file_hash)) assert content == b"zip content here" def test_returns_empty_for_invalid_hash(self, tmp_path: Path) -> None: """Returns an empty generator for a hash that doesn't exist in the zip.""" import zipfile zip_path = tmp_path / "archive.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr("file.txt", "data") indexer = ZipFileIndexer(str(zip_path), salt="test") content = b"".join(indexer.get_file_by_hash("nonexistent-hash")) assert content == b"" def test_returns_binary_content(self, tmp_path: Path) -> None: """Binary content from zip is returned correctly.""" import zipfile zip_path = tmp_path / "archive.zip" binary_data = b"\x00\xff\x80\x7f" with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr("binary.bin", binary_data) indexer = ZipFileIndexer(str(zip_path), salt="test") file_hash = indexer._hash_path("binary.bin") content = b"".join(indexer.get_file_by_hash(file_hash)) assert content == binary_data class TestZipGetFilenameByHash: """Tests for ZipFileIndexer.get_filename_by_hash.""" def test_returns_filename(self, tmp_path: Path) -> None: """Returns the internal filename for a valid hash.""" import zipfile zip_path = tmp_path / "archive.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr("folder/nested.txt", "content") indexer = ZipFileIndexer(str(zip_path), salt="test") file_hash = indexer._hash_path("folder/nested.txt") assert indexer.get_filename_by_hash(file_hash) == "folder/nested.txt" def test_returns_none_for_invalid_hash(self, tmp_path: Path) -> None: """Returns None for a hash that doesn't exist.""" import zipfile zip_path = tmp_path / "archive.zip" with zipfile.ZipFile(zip_path, "w") as zf: zf.writestr("file.txt", "data") indexer = ZipFileIndexer(str(zip_path), salt="test") assert indexer.get_filename_by_hash("bad-hash") is None