bidmaster-cli/tests/unit/test_rag_context_utils.py

28 lines
777 B
Python

from __future__ import annotations
from bidmaster.utils.rag_context import (
estimate_tokens,
fit_texts_to_token_budget,
truncate_to_token_budget,
)
def test_estimate_tokens_cjk_vs_ascii():
assert estimate_tokens("中文") >= 2
assert estimate_tokens("abcd") >= 1
def test_truncate_to_token_budget_truncates():
text = "中文中文中文" # 6 CJK chars
truncated = truncate_to_token_budget(text, 3)
assert truncated
assert estimate_tokens(truncated) <= 3
def test_fit_texts_to_token_budget_drops_or_truncates_tail():
first = "中文中文" # ~4 tokens
second = "中文中文中文" # ~6 tokens
budget = estimate_tokens(first)
fitted = fit_texts_to_token_budget([first, second], budget)
assert fitted == [first]