spam-xai-model-v2 / test_utils.py
VoltageVagabond's picture
Upload folder using huggingface_hub
960ec3d verified
raw
history blame
2.25 kB
# Tests for the shared utility functions
# Run: python -m pytest test_utils.py -v
def test_preprocess_text_strips_html():
from utils import preprocess_text
result = preprocess_text('<b>Hello</b> world')
assert '<' not in result
assert '>' not in result
def test_preprocess_text_removes_urls():
from utils import preprocess_text
result = preprocess_text('Visit http://example.com for details')
assert 'http' not in result
def test_preprocess_text_removes_emails():
from utils import preprocess_text
result = preprocess_text('Contact user@example.com for info')
assert '@' not in result
def test_preprocess_text_lowercases():
from utils import preprocess_text
result = preprocess_text('HELLO WORLD')
assert result == result.lower()
def test_preprocess_text_removes_stopwords():
from utils import preprocess_text
result = preprocess_text('this is a test of the system')
assert 'this' not in result.split()
assert 'the' not in result.split()
def test_preprocess_text_empty_input():
from utils import preprocess_text
result = preprocess_text('')
assert result == ''
import numpy as np
def test_compute_metadata_features_shape():
from utils import compute_metadata_features
result = compute_metadata_features(['Hello world!', 'Buy now!!!'])
assert isinstance(result, np.ndarray)
assert result.shape == (2, 24)
def test_compute_metadata_features_exclamation_density():
from utils import compute_metadata_features
# 'Buy now!!!' has 3 exclamation marks and 1 sentence -> density = 3.0
result = compute_metadata_features(['Buy now!!!'])
exclamation_density = result[0][0]
assert exclamation_density == 3.0
def test_compute_metadata_features_dollar_count():
from utils import compute_metadata_features
result = compute_metadata_features(['Win $100 or $200'])
dollar_count = result[0][1]
assert dollar_count == 2
def test_compute_metadata_features_spam_phrases():
from utils import compute_metadata_features
# 'act now' and 'buy now' are both in the spam_context_phrases list
result = compute_metadata_features(['Act now! Buy now!'])
spam_phrase_count = result[0][3]
assert spam_phrase_count >= 2