Spaces:

AitBAD
/

kab-ocr-tanti

Running

App Files Files Community

AitBAD commited on Dec 8, 2025

Commit

49dfaa0

verified ·

1 Parent(s): a8b7156

Upload 2 files

Browse files

Files changed (2) hide show

app.py +294 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,294 @@

+# app.py script that converts pdf or png txt image to UTF8 text
+# Kabyle OCR tool
+import streamlit as st
+import pytesseract
+from pdf2image import convert_from_bytes
+from PIL import Image, ImageEnhance, ImageFilter
+import os
+# Set TESSDATA_PREFIX to the local tessdata directory within the Space
+os.environ['TESSDATA_PREFIX'] = os.path.join(os.getcwd(), 'tessdata')
+# --- Configuration ---
+# Config now points to the model using the TESSDATA_PREFIX
+custom_config = r'-l kab --oem 3 --psm 6' # Example config, adjust as needed
+# --- Function Definition ---
+def enhance_image(image, for_display=False):
+    """Applies enhancements to improve OCR quality or display quality."""
+    # Convert to 'L' mode (grayscale) if not already
+    if image.mode != 'L':
+        image = image.convert('L')
+    # Enhance Contrast
+    contrast_enhancer = ImageEnhance.Contrast(image)
+    # Increase contrast slightly for OCR. Values > 1.0 increase contrast.
+    # For display, we might want a slightly different value or skip this step entirely
+    # depending on the original image quality. Let's use the same value for now.
+    contrast_factor = 1.5
+    if for_display:
+        # Potentially use a different factor for display if needed
+        # contrast_factor = 1.3 # Example for display
+        pass # Using same factor for now
+    image = contrast_enhancer.enhance(contrast_factor)
+    # Enhance Sharpness
+    sharpness_enhancer = ImageEnhance.Sharpness(image)
+    # Slightly increase sharpness for OCR. Values > 1.0 increase sharpness.
+    # Again, for display, a different value might be preferred.
+    sharpness_factor = 1.2
+    if for_display:
+        # Potentially use a different factor for display if needed
+        # sharpness_factor = 1.1 # Example for display
+        pass # Using same factor for now
+    image = sharpness_enhancer.enhance(sharpness_factor)
+    # Optional: Apply a slight unsharp mask filter for further sharpening
+    # if not for_display: # Only for OCR processing?
+    #     image = image.filter(ImageFilter.UnsharpMask(radius=1, percent=50, threshold=0))
+    return image
+def process_image(image):
+    """Processes a single image using pytesseract, applying enhancements first."""
+    try:
+        # Apply enhancements before OCR
+        enhanced_image = enhance_image(image, for_display=False) # Explicitly for OCR
+        text = pytesseract.image_to_string(enhanced_image, config=custom_config)
+        return text
+    except Exception as e:
+        return f"Error during OCR: {e}"
+# --- Page Setup ---
+st.set_page_config(page_title="Kabyle OCR", layout="wide")
+st.title("Asemmezdey Asekdan n Teqbaylit - Kabyle OCR")
+# --- Sidebar ---
+st.sidebar.header("Isefka")
+uploaded_file = st.sidebar.file_uploader("Ssali-d Afaylu", type=["png", "jpg", "jpeg", "pdf"])
+# Add font size selector to the sidebar
+font_size = st.sidebar.slider("Tiddi n Yisekkilen n Uḍris seg OCR (px)", min_value=10, max_value=30, value=18, step=1)
+# Add the preview quality (DPI) slider to the sidebar, under font size
+preview_dpi = st.sidebar.slider("Amerkid n Uskan (DPI)", min_value=150, max_value=700, value=300, step=50)
+# --- Inject Custom CSS for Font Size, Text Color, and Image Layout ---
+# This CSS applies the selected font size and a darker color to the text area
+# It also ensures images in col1 behave predictably with max-width
+st.markdown(
+    f"""
+    <style>
+    /* Target the text area where the OCR text is edited */
+    textarea[data-testid="stText"] {{
+        font-size: {font_size}px;
+        color: #000000; /* Set text color to black for better visibility */
+        line-height: 1.5; /* Improve readability with line spacing */
+    }}
+    /* Target the text inside the download button preview or other standard text elements if needed */
+    .stTextArea textarea {{
+        font-size: {font_size}px;
+        color: #000000; /* Set text color to black */
+    }}
+    /* Target images specifically within that first column's content div */
+    section[data-testid="stSidebar"] ~ div > div:has(div[data-testid="stColumn"] > div:nth-child(1)) > div:nth-child(1) img {{
+        max-width: 100%; /* Ensures image doesn't exceed the column width */
+        height: auto;    /* Maintains aspect ratio when width is constrained */
+        display: block;  /* Makes the image a block element, necessary for max-width */
+        margin-left: auto; /* Center the image horizontally within its container */
+        margin-right: auto; /* Center the image horizontally within its container */
+        margin-bottom: 10px; /* Add some space below the image */
+    }}
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# --- Main App Logic ---
+if uploaded_file is not None:
+    # Check if the uploaded file object has changed (a new file was selected)
+    # Compare the new file's info with the one stored in session state (if it exists)
+    current_file_info = (uploaded_file.name, uploaded_file.size, uploaded_file.type)
+    if 'current_file_info' not in st.session_state or st.session_state.current_file_info != current_file_info:
+        # New file detected, reset session state
+        st.session_state.current_file_info = current_file_info
+        st.session_state.ocr_text = ""
+        st.session_state.display_image = None
+        st.session_state.all_pdf_images = [] # Add list for all PDF preview images
+        st.session_state.current_page_index = 0 # Add index for slideshow
+        # Optional: Clear other relevant session state variables if needed
+        # st.session_state.some_other_var = default_value
+    st.info(f"Afaylu i d-yulin: {uploaded_file.name}")
+    # Initialize session state for text and display image if needed (should be after reset check)
+    if 'ocr_text' not in st.session_state:
+        st.session_state.ocr_text = ""
+    if 'display_image' not in st.session_state:
+        st.session_state.display_image = None
+    # Initialize list for all PDF preview images
+    if 'all_pdf_images' not in st.session_state:
+        st.session_state.all_pdf_images = []
+    # Initialize current page index for slideshow
+    if 'current_page_index' not in st.session_state:
+        st.session_state.current_page_index = 0
+    # Create two columns for side-by-side view
+    # Using [1, 1] ratio as suggested
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.subheader("Askan n Ufaylu deg Talɣa-s Tamenzut")
+        if "pdf" in uploaded_file.type:
+            if st.button("Sekker PDF (Askan n Yisebtar)"):
+                try:
+                    # Read the file content once for PDF conversion
+                    pdf_content = uploaded_file.read()
+                    # Convert PDF pages to images with the DPI selected by the slider in the sidebar
+                    images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use the sidebar value directly
+                    if images:
+                        # Store the *original* images for display (un-enhanced copies for preview)
+                        st.session_state.all_pdf_images = [img.copy() for img in images]
+                        # Reset current page index for slideshow
+                        st.session_state.current_page_index = 0
+                        # Store the first page image for display (original, for preview) - kept for backward compatibility if needed
+                        st.session_state.display_image = images[0]
+                        # Reset OCR text for new processing (only reset here if button is pressed)
+                        # st.session_state.ocr_text = "" # This is done on file change now
+                        st.success(f"Yuli-d uPDF (DPI n Uskan: {preview_dpi}). Tekki ɣef 'Sekker OCR' i Uselket.")
+                    else:
+                        st.error("Ulac isebtare deg ufaylu PDF.")
+                except Exception as e:
+                    st.error(f"Ugul deg uselket n PDF: {e}")
+                    st.warning("Senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').")
+            # Display the current PDF page and navigation controls (if PDF was processed and pages are stored)
+            if st.session_state.all_pdf_images:
+                st.subheader("Askan n Yisebtar n Ufaylu PDF") # Subheader for the single page view
+                num_pages = len(st.session_state.all_pdf_images)
+                current_idx = st.session_state.current_page_index
+                # Display navigation buttons and page info on the same line using columns
+                col_nav1, col_nav2, col_nav3 = st.columns([1, 2, 1]) # Create columns for layout
+                with col_nav1:
+                    # Disable 'Previous' button if on the first page
+                    st.button("Ɣer deffir", disabled=(current_idx == 0), on_click=lambda: setattr(st.session_state, 'current_page_index', max(0, current_idx - 1)), key='prev_btn_slideshow')
+                with col_nav2:
+                    # Display page number centered
+                    st.text(f"Asebter {current_idx + 1} n {num_pages}")
+                with col_nav3:
+                    # Disable 'Next' button if on the last page
+                    st.button("Ɣer zdat", disabled=(current_idx == num_pages - 1), on_click=lambda: setattr(st.session_state, 'current_page_index', min(num_pages - 1, current_idx + 1)), key='next_btn_slideshow')
+                # Display the current image below the navigation
+                # Calculate width based on DPI relative to a standard DPI (e.g., 300)
+                # This provides a dynamic scaling effect based on DPI for the *display*.
+                base_dpi = 300
+                base_width = 600 # A reasonable base width for 300 DPI
+                calculated_width = int((preview_dpi / base_dpi) * base_width)
+                # Enhance the *copy* of the *current* image just for display
+                current_img = st.session_state.all_pdf_images[current_idx]
+                display_image_enhanced = enhance_image(current_img.copy(), for_display=True)
+                # Width is now calculated based on the DPI slider value
+                # Display the *enhanced* image for preview with the calculated width
+                # The CSS rule max-width: 100% will prevent it from exceeding col1's width
+                st.image(display_image_enhanced, caption=f"Asebter {current_idx + 1} ({preview_dpi} DPI)", width=calculated_width) # Use calculated width
+        else: # It's an image file
+            # For direct image files, use the calculated width based on the slider value from the sidebar.
+            base_dpi = 300
+            base_width = 600
+            calculated_width = int((preview_dpi / base_dpi) * base_width)
+            image = Image.open(uploaded_file)
+            # Enhance the *copy* of the image just for display
+            display_image_enhanced = enhance_image(image.copy(), for_display=True)
+            # Display the *enhanced* image for preview (NOT inside the PDF container)
+            st.image(display_image_enhanced, caption=uploaded_file.name, width=calculated_width)
+            # Store the *original* image for potential use later (though not strictly needed here)
+            st.session_state.display_image = image
+    with col2:
+        st.subheader("Asezṛeg n Uḍris seg OCR")
+        # OCR Button (only appears after preview is potentially loaded for PDFs)
+        if st.button("Sekker OCR"):
+            full_text = ""
+            # Use a placeholder to clear the area and then update with progress
+            progress_text = st.empty()
+            progress_bar = st.progress(0)
+            with st.spinner("Asekker n OCR..."):
+                if "pdf" in uploaded_file.type:
+                    try:
+                        # Read the file content again for OCR, using the value from the sidebar slider
+                        pdf_content = uploaded_file.read()
+                        images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use sidebar value directly
+                        num_pages = len(images)
+                        for i, page_image in enumerate(images):
+                            # Update the placeholder with the current page message
+                            progress_text.text(f"Yeɣɣar asebter {i+1} n {num_pages}...")
+                            # Update the progress bar
+                            progress_bar.progress((i + 1) / num_pages)
+                            # Process the page
+                            txt = process_image(page_image)
+                            full_text += f"""
+--- Asebter {i+1} ---
+{txt}
+"""
+                    except Exception as e:
+                        st.error(f"Ugul deg uPDF deg OCR: {e}")
+                        st.warning("Ma twalaḍ ugul yeɛnan 'poppler_path', senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').")
+                else: # It's an image file
+                    # For images, we don't need to re-open or re-scale based on DPI slider for OCR itself,
+                    # just use the original uploaded image.
+                    # The process_image function now handles enhancement internally
+                    progress_text.text("Yeɣɣar tugna...") # Inform user about single image processing
+                    image = Image.open(uploaded_file)
+                    full_text = process_image(image)
+                    progress_bar.progress(100) # Indicate completion for single image
+            # Clear the progress text and bar after processing is complete
+            progress_text.empty()
+            progress_bar.empty()
+            st.session_state.ocr_text = full_text
+            st.success("OCR Yemmed!")
+        # Text Editor - Always visible, updates with OCR result or user edits
+        # Height increased, font size controlled by sidebar slider via CSS
+        # Text color also controlled by CSS
+        edited_text = st.text_area("Zṛeg Aḍris, Seɣti Tira-s da", value=st.session_state.ocr_text, height=600, key="text_editor")
+        # Update session state if user edits the text area
+        if edited_text != st.session_state.ocr_text:
+             st.session_state.ocr_text = edited_text
+        # Download Button (only enabled if there's text)
+        if st.session_state.ocr_text:
+            st.download_button(
+                label="Zdem Aḍris",
+                data=st.session_state.ocr_text.encode('utf-8'),
+                file_name=f"{uploaded_file.name.replace('.', '_')}_ocr.txt",
+                mime="text/plain"
+            )
+        else:
+            st.info("Seddu OCR, Selket s Aḍris.")
+else:
+    # If no file is uploaded, clear session state to ensure clean interface on initial load
+    # or if user deselects the file.
+    for key in ["ocr_text", "display_image", "current_file_info", "all_pdf_images", "current_page_index"]:
+        if key in st.session_state:
+            del st.session_state[key]
+    st.write("Ldi afaylu PDF, PNG, JPG, or JPEG seg ufeggad n yisefka.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+# requirements-OCR
+Pillow==10.4.0
+pytesseract==0.3.13
+pdf2image==1.17.0
+tesseract==5.5.1