image_server/tests/test_file_indexer.py
2026-04-23 22:11:51 -05:00

155 lines
6.6 KiB
Python

"""Tests for the FileIndexer class."""
import hashlib
from pathlib import Path
import pytest
from main import FileIndexer
class TestHashPath:
"""Tests for FileIndexer._hash_path."""
def test_consistent_hash_with_same_salt(
self, sample_files: dict[str, Path]
) -> None:
"""Hash is deterministic when salt is fixed."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="fixed")
path = str(sample_files["root_file"])
hash1 = indexer._hash_path(path)
hash2 = indexer._hash_path(path)
assert hash1 == hash2
def test_different_hash_with_different_salt(
self, sample_files: dict[str, Path]
) -> None:
"""Different salts produce different hashes for the same path."""
indexer1 = FileIndexer(str(sample_files["root_file"].parent), salt="salt-a")
indexer2 = FileIndexer(str(sample_files["root_file"].parent), salt="salt-b")
path = str(sample_files["root_file"])
assert indexer1._hash_path(path) != indexer2._hash_path(path)
def test_different_hash_for_different_paths(
self, sample_files: dict[str, Path]
) -> None:
"""Different paths produce different hashes."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="fixed")
hash1 = indexer._hash_path(str(sample_files["root_file"]))
hash2 = indexer._hash_path(str(sample_files["sub_file"]))
assert hash1 != hash2
def test_hash_is_sha256_hex(self, sample_files: dict[str, Path]) -> None:
"""Hash output is a 64-character hex string (SHA-256)."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="fixed")
path = str(sample_files["root_file"])
result = indexer._hash_path(path)
assert len(result) == 64
assert all(c in "0123456789abcdef" for c in result)
def test_hash_includes_salt(self, sample_files: dict[str, Path]) -> None:
"""Hash is computed from path + salt concatenated."""
salt = "mysalt"
path = "/some/file.txt"
expected = hashlib.sha256((path + salt).encode()).hexdigest()
indexer = FileIndexer(str(sample_files["root_file"].parent), salt=salt)
assert indexer._hash_path(path) == expected
class TestSalt:
"""Tests for FileIndexer.salt property."""
def test_provided_salt_is_returned(self, sample_files: dict[str, Path]) -> None:
"""Explicitly provided salt is returned unchanged."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="explicit")
assert indexer.salt == "explicit"
def test_none_salt_generates_random(self, sample_files: dict[str, Path]) -> None:
"""None salt generates a random hex string."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt=None)
salt = indexer.salt
assert salt is not None
assert len(salt) == 32 # secrets.token_hex(16) = 32 hex chars
assert all(c in "0123456789abcdef" for c in salt)
def test_salt_is_stable_after_first_access(
self, sample_files: dict[str, Path]
) -> None:
"""Salt doesn't change between accesses."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt=None)
first = indexer.salt
second = indexer.salt
assert first == second
class TestIndex:
"""Tests for FileIndexer._index and indexing behavior."""
def test_indexes_all_files(self, sample_files: dict[str, Path]) -> None:
"""All files in the directory tree are indexed."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
assert (
len(indexer._file_mapping) == 4
) # root.txt, nested.txt, data.bin, photo.jpg
def test_hash_maps_to_correct_path(self, sample_files: dict[str, Path]) -> None:
"""Each hash maps to the correct file path."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
expected_hash = indexer._hash_path(str(sample_files["root_file"]))
assert indexer._file_mapping[expected_hash] == str(sample_files["root_file"])
def test_empty_directory(self, tmp_path: Path) -> None:
"""Empty directory produces an empty mapping."""
indexer = FileIndexer(str(tmp_path), salt="test")
assert indexer._file_mapping == {}
def test_nested_directories(self, sample_files: dict[str, Path]) -> None:
"""Files in subdirectories are included."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
sub_hash = indexer._hash_path(str(sample_files["sub_file"]))
assert sub_hash in indexer._file_mapping
class TestGetFileByHash:
"""Tests for FileIndexer.get_file_by_hash."""
def test_returns_file_content(self, sample_files: dict[str, Path]) -> None:
"""Returns the binary content of the file."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
file_hash = indexer._hash_path(str(sample_files["root_file"]))
content = b"".join(indexer.get_file_by_hash(file_hash))
assert content == b"root content"
def test_returns_empty_for_invalid_hash(
self, sample_files: dict[str, Path]
) -> None:
"""Returns an empty generator for a hash that doesn't exist."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
content = b"".join(indexer.get_file_by_hash("nonexistent"))
assert content == b""
def test_returns_binary_content_correctly(
self, sample_files: dict[str, Path]
) -> None:
"""Binary file content is returned byte-for-byte."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
file_hash = indexer._hash_path(str(sample_files["binary_file"]))
content = b"".join(indexer.get_file_by_hash(file_hash))
assert content == b"\x00\x01\x02\x03"
class TestGetFilenameByHash:
"""Tests for FileIndexer.get_filename_by_hash."""
def test_returns_filename(self, sample_files: dict[str, Path]) -> None:
"""Returns the full file path for a valid hash."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
file_hash = indexer._hash_path(str(sample_files["root_file"]))
result = indexer.get_filename_by_hash(file_hash)
assert result == str(sample_files["root_file"])
def test_returns_none_for_invalid_hash(self, sample_files: dict[str, Path]) -> None:
"""Returns None for a hash that doesn't exist."""
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
assert indexer.get_filename_by_hash("nonexistent") is None