Image-Text-to-Text
PEFT
Safetensors
lora
vk-education
deepvk
gqa-ru
visual-question-answering
lmms-eval
conversational
Instructions to use lockR/vk-vlm-gqa-ru-qwen35-08b-lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use lockR/vk-vlm-gqa-ru-qwen35-08b-lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3.5-0.8B") model = PeftModel.from_pretrained(base_model, "lockR/vk-vlm-gqa-ru-qwen35-08b-lora") - Notebooks
- Google Colab
- Kaggle
| { | |
| "project": "VK Education Vision-Language Modeling", | |
| "author": { | |
| "name": "Ибрагимов Далгат Магомедалиевич", | |
| "institution": "МАИ, институт 8", | |
| "group": "М8О-308Б-32" | |
| }, | |
| "primary_hf_artifact": "https://huggingface.co/lockR/vk-vlm-gqa-ru-qwen35-08b-lora", | |
| "primary_run": "gqa_ru_qwen35_0_8b_lora_fast_v1", | |
| "base_model": "Qwen/Qwen3.5-0.8B", | |
| "adapter_type": "LoRA", | |
| "dataset": { | |
| "name": "deepvk/GQA-ru", | |
| "source": "https://huggingface.co/datasets/deepvk/GQA-ru", | |
| "train_samples": 38019, | |
| "validation_samples": 1981, | |
| "testdev_samples": 12216, | |
| "usage": "Image-question-answer records were used for multimodal VLM LoRA fine-tuning and official lmms-eval evaluation." | |
| }, | |
| "training": { | |
| "multimodal": true, | |
| "vision_encoder_trainable": false, | |
| "lora_scope": "language_model attention projection layers", | |
| "seed": 42, | |
| "epochs": 1.0, | |
| "batch_size": 8, | |
| "learning_rate": 0.0002, | |
| "mixed_precision": "bf16", | |
| "max_seq_length": 1024, | |
| "image_resolution": 336, | |
| "lora_r": 16, | |
| "lora_alpha": 32, | |
| "lora_dropout": 0.05, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj" | |
| ], | |
| "best_checkpoint": "checkpoint-4560", | |
| "best_metric_name": "eval_loss", | |
| "best_metric_value": 0.4337001144886017 | |
| }, | |
| "train_metrics": { | |
| "train_runtime": 6219.1947, | |
| "train_samples_per_second": 6.113, | |
| "train_steps_per_second": 0.764, | |
| "train_loss": 0.04432422036801592, | |
| "epoch": 1.0, | |
| "eval_loss": 0.4337001144886017, | |
| "eval_runtime": 116.0177, | |
| "eval_samples_per_second": 17.075, | |
| "eval_steps_per_second": 1.069, | |
| "metrics_path": "runs/gqa_ru_qwen35_0_8b_lora_fast_v1/train_metrics.json" | |
| }, | |
| "official_benchmark_full": { | |
| "runner": "lmms-eval", | |
| "task": "gqa-ru", | |
| "dataset_path": "deepvk/GQA-ru", | |
| "dataset_name": "testdev_balanced_instructions", | |
| "split": "testdev", | |
| "metric": "exact_match", | |
| "metric_higher_is_better": true, | |
| "prompt_suffix": "Ответь одним словом.", | |
| "samples": 12216, | |
| "limit": null, | |
| "model_backend": "qwen3_5", | |
| "model_args_common": "enable_thinking=False", | |
| "base_exact_match": 0.2861820563195809, | |
| "adapter_exact_match": 0.48321872953503603, | |
| "exact_match_delta": 0.19703667321545515, | |
| "relative_improvement": 0.6885011441647597, | |
| "base_stderr": 0.004089480999753636, | |
| "adapter_stderr": 0.004521458266039995, | |
| "base_correct": 3496, | |
| "adapter_correct": 5903, | |
| "correct_delta": 2407, | |
| "base_results_path": "runs/lmms_eval/gqa_ru_qwen35_base_full/Qwen__Qwen3.5-0.8B/20260604_141134_results.json", | |
| "adapter_results_path": "runs/lmms_eval/gqa_ru_qwen35_lora_full/artifacts__merged_qwen35_gqa_ru_full/20260604_145224_results.json" | |
| }, | |
| "secondary_experiment": { | |
| "base_model": "Qwen/Qwen2.5-VL-3B-Instruct", | |
| "status": "Full GQA-ru training and full base-vs-adapter evaluation are in progress.", | |
| "smoke_hf_artifact": "https://huggingface.co/lockR/vk-vlm-gqa-ru-qwen25vl-3b-lora-smoke", | |
| "smoke_limit_100_base_exact_match": 0.39, | |
| "smoke_limit_100_adapter_exact_match": 0.48 | |
| }, | |
| "limitations": [ | |
| "The vision encoder was frozen; LoRA was trained only in language model attention projection layers.", | |
| "MMBench-ru has not yet been measured.", | |
| "The Qwen2.5-VL full experiment is still running and is not included in the primary result." | |
| ] | |
| } | |