# app.py script that converts pdf or png txt image to UTF8 text # Kabyle OCR tool import streamlit as st import pytesseract from pdf2image import convert_from_bytes from PIL import Image, ImageEnhance, ImageFilter import os # Set TESSDATA_PREFIX to the local tessdata directory within the Space os.environ['TESSDATA_PREFIX'] = os.path.join(os.getcwd(), 'tessdata') # --- Configuration --- # Config now points to the model using the TESSDATA_PREFIX custom_config = r'-l kab --oem 3 --psm 6' # Example config, adjust as needed # --- Function Definition --- def enhance_image(image, for_display=False): """Applies enhancements to improve OCR quality or display quality.""" # Convert to 'L' mode (grayscale) if not already if image.mode != 'L': image = image.convert('L') # Enhance Contrast contrast_enhancer = ImageEnhance.Contrast(image) # Increase contrast slightly for OCR. Values > 1.0 increase contrast. # For display, we might want a slightly different value or skip this step entirely # depending on the original image quality. Let's use the same value for now. contrast_factor = 1.5 if for_display: # Potentially use a different factor for display if needed # contrast_factor = 1.3 # Example for display pass # Using same factor for now image = contrast_enhancer.enhance(contrast_factor) # Enhance Sharpness sharpness_enhancer = ImageEnhance.Sharpness(image) # Slightly increase sharpness for OCR. Values > 1.0 increase sharpness. # Again, for display, a different value might be preferred. sharpness_factor = 1.2 if for_display: # Potentially use a different factor for display if needed # sharpness_factor = 1.1 # Example for display pass # Using same factor for now image = sharpness_enhancer.enhance(sharpness_factor) # Optional: Apply a slight unsharp mask filter for further sharpening # if not for_display: # Only for OCR processing? # image = image.filter(ImageFilter.UnsharpMask(radius=1, percent=50, threshold=0)) return image def process_image(image): """Processes a single image using pytesseract, applying enhancements first.""" try: # Apply enhancements before OCR enhanced_image = enhance_image(image, for_display=False) # Explicitly for OCR text = pytesseract.image_to_string(enhanced_image, config=custom_config) return text except Exception as e: return f"Error during OCR: {e}" # --- Page Setup --- st.set_page_config(page_title="Kabyle OCR", layout="wide") st.title("Asemmezdey Asekdan n Teqbaylit - Kabyle OCR") # --- Sidebar --- st.sidebar.header("Isefka") uploaded_file = st.sidebar.file_uploader("Ssali-d Afaylu", type=["png", "jpg", "jpeg", "pdf"]) # Add font size selector to the sidebar font_size = st.sidebar.slider("Tiddi n Yisekkilen n Uḍris seg OCR (px)", min_value=10, max_value=30, value=18, step=1) # Add the preview quality (DPI) slider to the sidebar, under font size preview_dpi = st.sidebar.slider("Amerkid n Uskan (DPI)", min_value=150, max_value=700, value=300, step=50) # --- Inject Custom CSS for Font Size, Text Color, and Image Layout --- # This CSS applies the selected font size and a darker color to the text area # It also ensures images in col1 behave predictably with max-width st.markdown( f""" """, unsafe_allow_html=True ) # --- Main App Logic --- if uploaded_file is not None: # Check if the uploaded file object has changed (a new file was selected) # Compare the new file's info with the one stored in session state (if it exists) current_file_info = (uploaded_file.name, uploaded_file.size, uploaded_file.type) if 'current_file_info' not in st.session_state or st.session_state.current_file_info != current_file_info: # New file detected, reset session state st.session_state.current_file_info = current_file_info st.session_state.ocr_text = "" st.session_state.display_image = None st.session_state.all_pdf_images = [] # Add list for all PDF preview images st.session_state.current_page_index = 0 # Add index for slideshow # Optional: Clear other relevant session state variables if needed # st.session_state.some_other_var = default_value st.info(f"Afaylu i d-yulin: {uploaded_file.name}") # Initialize session state for text and display image if needed (should be after reset check) if 'ocr_text' not in st.session_state: st.session_state.ocr_text = "" if 'display_image' not in st.session_state: st.session_state.display_image = None # Initialize list for all PDF preview images if 'all_pdf_images' not in st.session_state: st.session_state.all_pdf_images = [] # Initialize current page index for slideshow if 'current_page_index' not in st.session_state: st.session_state.current_page_index = 0 # Create two columns for side-by-side view # Using [1, 1] ratio as suggested col1, col2 = st.columns([1, 1]) with col1: st.subheader("Askan n Ufaylu deg Talɣa-s Tamenzut") if "pdf" in uploaded_file.type: if st.button("Sekker PDF (Askan n Yisebtar)"): try: # Read the file content once for PDF conversion pdf_content = uploaded_file.read() # Convert PDF pages to images with the DPI selected by the slider in the sidebar images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use the sidebar value directly if images: # Store the *original* images for display (un-enhanced copies for preview) st.session_state.all_pdf_images = [img.copy() for img in images] # Reset current page index for slideshow st.session_state.current_page_index = 0 # Store the first page image for display (original, for preview) - kept for backward compatibility if needed st.session_state.display_image = images[0] # Reset OCR text for new processing (only reset here if button is pressed) # st.session_state.ocr_text = "" # This is done on file change now st.success(f"Yuli-d uPDF (DPI n Uskan: {preview_dpi}). Tekki ɣef 'Sekker OCR' i Uselket.") else: st.error("Ulac isebtare deg ufaylu PDF.") except Exception as e: st.error(f"Ugul deg uselket n PDF: {e}") st.warning("Senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').") # Display the current PDF page and navigation controls (if PDF was processed and pages are stored) if st.session_state.all_pdf_images: st.subheader("Askan n Yisebtar n Ufaylu PDF") # Subheader for the single page view num_pages = len(st.session_state.all_pdf_images) current_idx = st.session_state.current_page_index # Display navigation buttons and page info on the same line using columns col_nav1, col_nav2, col_nav3 = st.columns([1, 2, 1]) # Create columns for layout with col_nav1: # Disable 'Previous' button if on the first page st.button("Ɣer deffir", disabled=(current_idx == 0), on_click=lambda: setattr(st.session_state, 'current_page_index', max(0, current_idx - 1)), key='prev_btn_slideshow') with col_nav2: # Display page number centered st.text(f"Asebter {current_idx + 1} n {num_pages}") with col_nav3: # Disable 'Next' button if on the last page st.button("Ɣer zdat", disabled=(current_idx == num_pages - 1), on_click=lambda: setattr(st.session_state, 'current_page_index', min(num_pages - 1, current_idx + 1)), key='next_btn_slideshow') # Display the current image below the navigation # Calculate width based on DPI relative to a standard DPI (e.g., 300) # This provides a dynamic scaling effect based on DPI for the *display*. base_dpi = 300 base_width = 600 # A reasonable base width for 300 DPI calculated_width = int((preview_dpi / base_dpi) * base_width) # Enhance the *copy* of the *current* image just for display current_img = st.session_state.all_pdf_images[current_idx] display_image_enhanced = enhance_image(current_img.copy(), for_display=True) # Width is now calculated based on the DPI slider value # Display the *enhanced* image for preview with the calculated width # The CSS rule max-width: 100% will prevent it from exceeding col1's width st.image(display_image_enhanced, caption=f"Asebter {current_idx + 1} ({preview_dpi} DPI)", width=calculated_width) # Use calculated width else: # It's an image file # For direct image files, use the calculated width based on the slider value from the sidebar. base_dpi = 300 base_width = 600 calculated_width = int((preview_dpi / base_dpi) * base_width) image = Image.open(uploaded_file) # Enhance the *copy* of the image just for display display_image_enhanced = enhance_image(image.copy(), for_display=True) # Display the *enhanced* image for preview (NOT inside the PDF container) st.image(display_image_enhanced, caption=uploaded_file.name, width=calculated_width) # Store the *original* image for potential use later (though not strictly needed here) st.session_state.display_image = image with col2: st.subheader("Asezṛeg n Uḍris seg OCR") # OCR Button (only appears after preview is potentially loaded for PDFs) if st.button("Sekker OCR"): full_text = "" # Use a placeholder to clear the area and then update with progress progress_text = st.empty() progress_bar = st.progress(0) with st.spinner("Asekker n OCR..."): if "pdf" in uploaded_file.type: try: # Read the file content again for OCR, using the value from the sidebar slider pdf_content = uploaded_file.read() images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use sidebar value directly num_pages = len(images) for i, page_image in enumerate(images): # Update the placeholder with the current page message progress_text.text(f"Yeɣɣar asebter {i+1} n {num_pages}...") # Update the progress bar progress_bar.progress((i + 1) / num_pages) # Process the page txt = process_image(page_image) full_text += f""" --- Asebter {i+1} --- {txt} """ except Exception as e: st.error(f"Ugul deg uPDF deg OCR: {e}") st.warning("Ma twalaḍ ugul yeɛnan 'poppler_path', senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').") else: # It's an image file # For images, we don't need to re-open or re-scale based on DPI slider for OCR itself, # just use the original uploaded image. # The process_image function now handles enhancement internally progress_text.text("Yeɣɣar tugna...") # Inform user about single image processing image = Image.open(uploaded_file) full_text = process_image(image) progress_bar.progress(100) # Indicate completion for single image # Clear the progress text and bar after processing is complete progress_text.empty() progress_bar.empty() st.session_state.ocr_text = full_text st.success("OCR Yemmed!") # Text Editor - Always visible, updates with OCR result or user edits # Height increased, font size controlled by sidebar slider via CSS # Text color also controlled by CSS edited_text = st.text_area("Zṛeg Aḍris, Seɣti Tira-s da", value=st.session_state.ocr_text, height=600, key="text_editor") # Update session state if user edits the text area if edited_text != st.session_state.ocr_text: st.session_state.ocr_text = edited_text # Download Button (only enabled if there's text) if st.session_state.ocr_text: st.download_button( label="Zdem Aḍris", data=st.session_state.ocr_text.encode('utf-8'), file_name=f"{uploaded_file.name.replace('.', '_')}_ocr.txt", mime="text/plain" ) else: st.info("Seddu OCR, Selket s Aḍris.") else: # If no file is uploaded, clear session state to ensure clean interface on initial load # or if user deselects the file. for key in ["ocr_text", "display_image", "current_file_info", "all_pdf_images", "current_page_index"]: if key in st.session_state: del st.session_state[key] st.write("Ldi afaylu PDF, PNG, JPG, or JPEG seg ufeggad n yisefka.")