| |
| |
|
|
| def test_preprocess_text_strips_html(): |
| from utils import preprocess_text |
| result = preprocess_text('<b>Hello</b> world') |
| assert '<' not in result |
| assert '>' not in result |
|
|
| def test_preprocess_text_removes_urls(): |
| from utils import preprocess_text |
| result = preprocess_text('Visit http://example.com for details') |
| assert 'http' not in result |
|
|
| def test_preprocess_text_removes_emails(): |
| from utils import preprocess_text |
| result = preprocess_text('Contact user@example.com for info') |
| assert '@' not in result |
|
|
| def test_preprocess_text_lowercases(): |
| from utils import preprocess_text |
| result = preprocess_text('HELLO WORLD') |
| assert result == result.lower() |
|
|
| def test_preprocess_text_removes_stopwords(): |
| from utils import preprocess_text |
| result = preprocess_text('this is a test of the system') |
| assert 'this' not in result.split() |
| assert 'the' not in result.split() |
|
|
| def test_preprocess_text_empty_input(): |
| from utils import preprocess_text |
| result = preprocess_text('') |
| assert result == '' |
|
|
|
|
| import numpy as np |
|
|
| def test_compute_metadata_features_shape(): |
| from utils import compute_metadata_features |
| result = compute_metadata_features(['Hello world!', 'Buy now!!!']) |
| assert isinstance(result, np.ndarray) |
| assert result.shape == (2, 24) |
|
|
| def test_compute_metadata_features_exclamation_density(): |
| from utils import compute_metadata_features |
| |
| result = compute_metadata_features(['Buy now!!!']) |
| exclamation_density = result[0][0] |
| assert exclamation_density == 3.0 |
|
|
| def test_compute_metadata_features_dollar_count(): |
| from utils import compute_metadata_features |
| result = compute_metadata_features(['Win $100 or $200']) |
| dollar_count = result[0][1] |
| assert dollar_count == 2 |
|
|
| def test_compute_metadata_features_spam_phrases(): |
| from utils import compute_metadata_features |
| |
| result = compute_metadata_features(['Act now! Buy now!']) |
| spam_phrase_count = result[0][3] |
| assert spam_phrase_count >= 2 |
|
|