Instructions to use zainabfatima097/SAWiT.AI with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use zainabfatima097/SAWiT.AI with Transformers:
# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("zainabfatima097/SAWiT.AI", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| import torch | |
| torch.cuda.empty_cache() | |
| # β Clear GPU memory before training | |
| import torch | |
| torch.cuda.empty_cache() | |
| # β Load necessary libraries | |
| from datasets import load_dataset | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer | |
| import os | |
| # β Load Dataset | |
| dataset = load_dataset("zainabfatima097/My_Dataset") # Change to your dataset path | |
| # β Check available splits | |
| print(f"Available dataset splits: {dataset.keys()}") | |
| # β If dataset has only 'validation' split, rename it to 'train' | |
| if "train" not in dataset: | |
| dataset["train"] = dataset["validation"] | |
| # β Extract Text for Translation Task | |
| source_lang = "en" | |
| target_lang = "hi" | |
| def preprocess_function(examples): | |
| """ Extracts input and target texts for translation """ | |
| inputs = [ex[source_lang] for ex in examples["translation"]] | |
| targets = [ex[target_lang] for ex in examples["translation"]] | |
| return {"input_text": inputs, "target_text": targets} | |
| # β Apply Text Extraction | |
| dataset = dataset.map(preprocess_function, batched=True) | |
| # β Load Tokenizer | |
| model_checkpoint = "Helsinki-NLP/opus-mt-en-hi" # Use your model | |
| tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) | |
| # β Tokenization | |
| def tokenize_function(examples): | |
| inputs = tokenizer(examples["input_text"], truncation=True, padding="max_length", max_length=128) | |
| targets = tokenizer(examples["target_text"], truncation=True, padding="max_length", max_length=128) | |
| inputs["labels"] = targets["input_ids"] | |
| return inputs | |
| # β Apply Tokenization | |
| tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["translation", "input_text", "target_text"]) | |
| # β Set Train & Validation Splits | |
| train_dataset = tokenized_datasets["train"] | |
| eval_dataset = tokenized_datasets.get("validation", train_dataset) # Use train if validation is missing | |
| # β Load Model | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) | |
| # β Training Arguments (Handles Memory Issues) | |
| training_args = TrainingArguments( | |
| output_dir="./results", | |
| per_device_train_batch_size=2, # Reduce batch size to prevent OOM | |
| per_device_eval_batch_size=2, | |
| gradient_accumulation_steps=4, # Accumulate gradients to simulate larger batch | |
| fp16=True, # Mixed precision to reduce memory | |
| optim="adamw_torch", # More efficient optimizer | |
| evaluation_strategy="epoch", | |
| save_strategy="epoch", | |
| load_best_model_at_end=True, | |
| push_to_hub=False | |
| ) | |
| # β Initialize Trainer | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| eval_dataset=eval_dataset, | |
| tokenizer=tokenizer, | |
| ) | |
| # β Train Model (Handling GPU Memory Errors) | |
| try: | |
| trainer.train() | |
| except torch.cuda.OutOfMemoryError: | |
| print("β οΈ CUDA Out of Memory! Switching to CPU...") | |
| os.environ["CUDA_VISIBLE_DEVICES"] = "" # Disable GPU | |
| model.to("cpu") | |
| trainer.train() | |
| # β Save Model | |
| trainer.save_model("./final_model") | |
| print("π Training complete! Model saved.") |