from __future__ import annotations from bidmaster.utils.rag_context import ( estimate_tokens, fit_texts_to_token_budget, truncate_to_token_budget, ) def test_estimate_tokens_cjk_vs_ascii(): assert estimate_tokens("中文") >= 2 assert estimate_tokens("abcd") >= 1 def test_truncate_to_token_budget_truncates(): text = "中文中文中文" # 6 CJK chars truncated = truncate_to_token_budget(text, 3) assert truncated assert estimate_tokens(truncated) <= 3 def test_fit_texts_to_token_budget_drops_or_truncates_tail(): first = "中文中文" # ~4 tokens second = "中文中文中文" # ~6 tokens budget = estimate_tokens(first) fitted = fit_texts_to_token_budget([first, second], budget) assert fitted == [first]