155 lines
6.6 KiB
Python
155 lines
6.6 KiB
Python
"""Tests for the FileIndexer class."""
|
|
|
|
import hashlib
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from main import FileIndexer
|
|
|
|
|
|
class TestHashPath:
|
|
"""Tests for FileIndexer._hash_path."""
|
|
|
|
def test_consistent_hash_with_same_salt(
|
|
self, sample_files: dict[str, Path]
|
|
) -> None:
|
|
"""Hash is deterministic when salt is fixed."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="fixed")
|
|
path = str(sample_files["root_file"])
|
|
hash1 = indexer._hash_path(path)
|
|
hash2 = indexer._hash_path(path)
|
|
assert hash1 == hash2
|
|
|
|
def test_different_hash_with_different_salt(
|
|
self, sample_files: dict[str, Path]
|
|
) -> None:
|
|
"""Different salts produce different hashes for the same path."""
|
|
indexer1 = FileIndexer(str(sample_files["root_file"].parent), salt="salt-a")
|
|
indexer2 = FileIndexer(str(sample_files["root_file"].parent), salt="salt-b")
|
|
path = str(sample_files["root_file"])
|
|
assert indexer1._hash_path(path) != indexer2._hash_path(path)
|
|
|
|
def test_different_hash_for_different_paths(
|
|
self, sample_files: dict[str, Path]
|
|
) -> None:
|
|
"""Different paths produce different hashes."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="fixed")
|
|
hash1 = indexer._hash_path(str(sample_files["root_file"]))
|
|
hash2 = indexer._hash_path(str(sample_files["sub_file"]))
|
|
assert hash1 != hash2
|
|
|
|
def test_hash_is_sha256_hex(self, sample_files: dict[str, Path]) -> None:
|
|
"""Hash output is a 64-character hex string (SHA-256)."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="fixed")
|
|
path = str(sample_files["root_file"])
|
|
result = indexer._hash_path(path)
|
|
assert len(result) == 64
|
|
assert all(c in "0123456789abcdef" for c in result)
|
|
|
|
def test_hash_includes_salt(self, sample_files: dict[str, Path]) -> None:
|
|
"""Hash is computed from path + salt concatenated."""
|
|
salt = "mysalt"
|
|
path = "/some/file.txt"
|
|
expected = hashlib.sha256((path + salt).encode()).hexdigest()
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt=salt)
|
|
assert indexer._hash_path(path) == expected
|
|
|
|
|
|
class TestSalt:
|
|
"""Tests for FileIndexer.salt property."""
|
|
|
|
def test_provided_salt_is_returned(self, sample_files: dict[str, Path]) -> None:
|
|
"""Explicitly provided salt is returned unchanged."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="explicit")
|
|
assert indexer.salt == "explicit"
|
|
|
|
def test_none_salt_generates_random(self, sample_files: dict[str, Path]) -> None:
|
|
"""None salt generates a random hex string."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt=None)
|
|
salt = indexer.salt
|
|
assert salt is not None
|
|
assert len(salt) == 32 # secrets.token_hex(16) = 32 hex chars
|
|
assert all(c in "0123456789abcdef" for c in salt)
|
|
|
|
def test_salt_is_stable_after_first_access(
|
|
self, sample_files: dict[str, Path]
|
|
) -> None:
|
|
"""Salt doesn't change between accesses."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt=None)
|
|
first = indexer.salt
|
|
second = indexer.salt
|
|
assert first == second
|
|
|
|
|
|
class TestIndex:
|
|
"""Tests for FileIndexer._index and indexing behavior."""
|
|
|
|
def test_indexes_all_files(self, sample_files: dict[str, Path]) -> None:
|
|
"""All files in the directory tree are indexed."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
|
|
assert (
|
|
len(indexer._file_mapping) == 4
|
|
) # root.txt, nested.txt, data.bin, photo.jpg
|
|
|
|
def test_hash_maps_to_correct_path(self, sample_files: dict[str, Path]) -> None:
|
|
"""Each hash maps to the correct file path."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
|
|
expected_hash = indexer._hash_path(str(sample_files["root_file"]))
|
|
assert indexer._file_mapping[expected_hash] == str(sample_files["root_file"])
|
|
|
|
def test_empty_directory(self, tmp_path: Path) -> None:
|
|
"""Empty directory produces an empty mapping."""
|
|
indexer = FileIndexer(str(tmp_path), salt="test")
|
|
assert indexer._file_mapping == {}
|
|
|
|
def test_nested_directories(self, sample_files: dict[str, Path]) -> None:
|
|
"""Files in subdirectories are included."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
|
|
sub_hash = indexer._hash_path(str(sample_files["sub_file"]))
|
|
assert sub_hash in indexer._file_mapping
|
|
|
|
|
|
class TestGetFileByHash:
|
|
"""Tests for FileIndexer.get_file_by_hash."""
|
|
|
|
def test_returns_file_content(self, sample_files: dict[str, Path]) -> None:
|
|
"""Returns the binary content of the file."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
|
|
file_hash = indexer._hash_path(str(sample_files["root_file"]))
|
|
content = b"".join(indexer.get_file_by_hash(file_hash))
|
|
assert content == b"root content"
|
|
|
|
def test_returns_empty_for_invalid_hash(
|
|
self, sample_files: dict[str, Path]
|
|
) -> None:
|
|
"""Returns an empty generator for a hash that doesn't exist."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
|
|
content = b"".join(indexer.get_file_by_hash("nonexistent"))
|
|
assert content == b""
|
|
|
|
def test_returns_binary_content_correctly(
|
|
self, sample_files: dict[str, Path]
|
|
) -> None:
|
|
"""Binary file content is returned byte-for-byte."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
|
|
file_hash = indexer._hash_path(str(sample_files["binary_file"]))
|
|
content = b"".join(indexer.get_file_by_hash(file_hash))
|
|
assert content == b"\x00\x01\x02\x03"
|
|
|
|
|
|
class TestGetFilenameByHash:
|
|
"""Tests for FileIndexer.get_filename_by_hash."""
|
|
|
|
def test_returns_filename(self, sample_files: dict[str, Path]) -> None:
|
|
"""Returns the full file path for a valid hash."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
|
|
file_hash = indexer._hash_path(str(sample_files["root_file"]))
|
|
result = indexer.get_filename_by_hash(file_hash)
|
|
assert result == str(sample_files["root_file"])
|
|
|
|
def test_returns_none_for_invalid_hash(self, sample_files: dict[str, Path]) -> None:
|
|
"""Returns None for a hash that doesn't exist."""
|
|
indexer = FileIndexer(str(sample_files["root_file"].parent), salt="test")
|
|
assert indexer.get_filename_by_hash("nonexistent") is None
|