Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| # Initialize model and tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("diabolic6045/ELN-Llama-1B-base") | |
| model = AutoModelForCausalLM.from_pretrained("diabolic6045/ELN-Llama-1B-base") | |
| def generate_response(message, temperature, max_length): | |
| # Tokenize input | |
| inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=512) | |
| input_ids = inputs["input_ids"] | |
| current_text = message | |
| # Generate response token by token | |
| for _ in range(max_length - input_ids.shape[1]): | |
| with torch.no_grad(): | |
| outputs = model(input_ids) | |
| next_token_logits = outputs.logits[:, -1, :] | |
| # Apply temperature | |
| next_token_logits = next_token_logits / temperature | |
| # Sample from the distribution | |
| probs = torch.softmax(next_token_logits, dim=-1) | |
| next_token = torch.multinomial(probs, num_samples=1) | |
| # Stop if we generate an EOS token | |
| if next_token.item() == tokenizer.eos_token_id: | |
| break | |
| # Append the new token to input_ids | |
| input_ids = torch.cat([input_ids, next_token], dim=-1) | |
| # Decode only the new token and add it to current text | |
| new_token_text = tokenizer.decode(next_token[0], skip_special_tokens=True) | |
| current_text += new_token_text | |
| yield current_text | |
| # Create the Gradio interface | |
| demo = gr.Interface( | |
| fn=generate_response, | |
| inputs=[ | |
| gr.Textbox(label="Input Text", lines=4, placeholder="Enter your text here and the model will complete it..."), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature (higher = more creative, lower = more focused)"), | |
| gr.Slider(minimum=50, maximum=500, value=200, step=50, label="Max Length (longer text = more completion)"), | |
| ], | |
| outputs=gr.Textbox(label="Generated Completion", lines=4), | |
| title="Llama 3.2 1B Finetuned With Evolution Learning Network (ELN) Text Completion Demo", | |
| description= """ | |
| > This project implements an Evolution Learning Network (ELN) to fine-tune transformer-based models like LLaMA using a combination of Quantized Low-Rank Adaptation (QLoRA) and Genetic Algorithms (GA). The primary objective is to evolve a population of models across multiple generations to optimize for performance (fitness) and specialization, while maintaining diversity. | |
| This is a demo of [`diabolic6045/ELN-Llama-1B-base`](https://huggingface.co/diabolic6045/ELN-Llama-1B-base). | |
| """, | |
| examples=[ | |
| ["Once upon a time in a magical forest", 0.7, 50], | |
| ["The recipe for making the perfect chocolate cake requires", 0.7, 50], | |
| ["In the year 2150, humanity had finally achieved", 0.7, 50], | |
| ["The most important principles of effective programming are", 0.8, 50], | |
| ], | |
| article=""" | |
| ## Tips for better completions: | |
| - Start with a clear and detailed prompt | |
| - Adjust temperature: Higher for creative writing, lower for factual completion | |
| - Adjust max length based on how much text you want to generate | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |