"""TEXT SUMMARIZATION Web APP""" # Importing Packages import base64 import streamlit as st import torch from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from transformers import T5Tokenizer, T5ForConditionalGeneration from transformers import pipeline # Load the tokenizer and model checkpoint = 'Lamini-1' tokenizer = T5Tokenizer.from_pretrained(checkpoint) base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map="auto", torch_dtype=torch.float32) # File Loader & Processing def file_processing(file): loader = PyPDFLoader(file) pages = loader.load_and_split() text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50) texts = text_splitter.split_documents(pages) final_texts = "" for text in texts: print(text) final_texts = final_texts + text.page_content return final_texts # Language Model Pipeline -> Summarization def llm_pipeline(filepath, summary_length): pipe_summ = pipeline( "summarization", model=base_model, # T5ForConditionalGeneration.from_pretrained(checkpoint), tokenizer=tokenizer, # T5Tokenizer.from_pretrained(checkpoint), max_length=summary_length, min_length=50, ) input = file_processing(filepath) result = pipe_summ(input) result = result[0]["summary_text"] return result # Streamlit Code st.set_page_config(layout="wide") # Display Background def add_bg_from_local(image_file): with open(image_file, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()) st.markdown( f""" """, unsafe_allow_html=True, ) add_bg_from_local("Images/background2.jpg") # Font Style with open("font.css") as f: st.markdown("".format(f.read()), unsafe_allow_html=True) # Sidebar st.sidebar.image("Images/sidebar_pic2.png") st.sidebar.title("ABOUT THE APP") st.sidebar.write( "SummaScribe: Your PDF wingman! 🚀 Unleash the power of Streamlit and LangChain to transform boring text PDFs into " "snappy summaries. Lightning-fast processing,ninja-level NLP algorithms, and a touch of magic—making info " "extraction a breeze!" ) selected_summary_length = st.sidebar.slider("SELECT SUMMARY STRENGTH", min_value=50, max_value=1000, value=500) # Display pdf of a given file @st.cache_data def display(file): # Opening file from filepath with open(file, "rb") as f: base64_pdf = base64.b64encode(f.read()).decode("utf-8") # Embedding pdf in html display_pdf = ( f'' ) # Displaying File st.markdown(display_pdf, unsafe_allow_html=True) # Main content st.markdown( """ """, unsafe_allow_html=True, ) text = "SummaScribe" # Text to be styled colored_text = ''.join( ['{}'.format(70 - (i * 10 / len(text)), char) for i, char in enumerate(text)]) colored_text_with_malt = colored_text + ' ✧' st.markdown(f'