{ "dataset": "oneryalcin/financial-filings-sparse-retrieval-training", "config": "combined", "split": "test", "rows": 1000, "base_model": "opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill", "constant_settings": { "steps": 500, "train_size_requested": 20000, "usable_train_rows": 18247, "batch_size": 8, "query_regularizer_weight": 0.0001, "loss": "SpladeLoss(SparseMultipleNegativesRankingLoss)", "sampler": "BatchSamplers.NO_DUPLICATES" }, "results": [ {"document_regularizer_weight": 0.00008, "accuracy": 0.738, "positive_doc_active_dims": 4204.911, "negative_doc_active_dims": 3784.729, "note": "High score but too dense for practical sparse indexing."}, {"document_regularizer_weight": 0.00015, "accuracy": 0.748, "positive_doc_active_dims": 3214.797, "negative_doc_active_dims": 2956.645, "note": "Best raw 500-step triplet accuracy but still too dense."}, {"document_regularizer_weight": 0.0003, "accuracy": 0.734, "positive_doc_active_dims": 2101.448, "negative_doc_active_dims": 1875.179}, {"document_regularizer_weight": 0.001, "accuracy": 0.727, "positive_doc_active_dims": 1340.540, "negative_doc_active_dims": 1136.954}, {"document_regularizer_weight": 0.003, "accuracy": 0.734, "positive_doc_active_dims": 657.067, "negative_doc_active_dims": 563.964}, {"document_regularizer_weight": 0.01, "accuracy": 0.732, "positive_doc_active_dims": 296.654, "negative_doc_active_dims": 261.160, "note": "Chosen for longer run because quality stayed strong while native vectors became much more indexable."}, {"document_regularizer_weight": 0.03, "accuracy": 0.723, "positive_doc_active_dims": 160.887, "negative_doc_active_dims": 143.986, "note": "Compact but lower quality than 0.01."} ] }