# Tests for the shared utility functions # Run: python -m pytest test_utils.py -v def test_preprocess_text_strips_html(): from utils import preprocess_text result = preprocess_text('Hello world') assert '<' not in result assert '>' not in result def test_preprocess_text_removes_urls(): from utils import preprocess_text result = preprocess_text('Visit http://example.com for details') assert 'http' not in result def test_preprocess_text_removes_emails(): from utils import preprocess_text result = preprocess_text('Contact user@example.com for info') assert '@' not in result def test_preprocess_text_lowercases(): from utils import preprocess_text result = preprocess_text('HELLO WORLD') assert result == result.lower() def test_preprocess_text_removes_stopwords(): from utils import preprocess_text result = preprocess_text('this is a test of the system') assert 'this' not in result.split() assert 'the' not in result.split() def test_preprocess_text_empty_input(): from utils import preprocess_text result = preprocess_text('') assert result == '' import numpy as np def test_compute_metadata_features_shape(): from utils import compute_metadata_features result = compute_metadata_features(['Hello world!', 'Buy now!!!']) assert isinstance(result, np.ndarray) assert result.shape == (2, 24) def test_compute_metadata_features_exclamation_density(): from utils import compute_metadata_features # 'Buy now!!!' has 3 exclamation marks and 1 sentence -> density = 3.0 result = compute_metadata_features(['Buy now!!!']) exclamation_density = result[0][0] assert exclamation_density == 3.0 def test_compute_metadata_features_dollar_count(): from utils import compute_metadata_features result = compute_metadata_features(['Win $100 or $200']) dollar_count = result[0][1] assert dollar_count == 2 def test_compute_metadata_features_spam_phrases(): from utils import compute_metadata_features # 'act now' and 'buy now' are both in the spam_context_phrases list result = compute_metadata_features(['Act now! Buy now!']) spam_phrase_count = result[0][3] assert spam_phrase_count >= 2