Spaces:
Running
Running
Upload 2 files
Browse files- app.py +294 -0
- requirements.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py script that converts pdf or png txt image to UTF8 text
|
| 2 |
+
# Kabyle OCR tool
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import pytesseract
|
| 5 |
+
from pdf2image import convert_from_bytes
|
| 6 |
+
from PIL import Image, ImageEnhance, ImageFilter
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# Set TESSDATA_PREFIX to the local tessdata directory within the Space
|
| 10 |
+
os.environ['TESSDATA_PREFIX'] = os.path.join(os.getcwd(), 'tessdata')
|
| 11 |
+
|
| 12 |
+
# --- Configuration ---
|
| 13 |
+
# Config now points to the model using the TESSDATA_PREFIX
|
| 14 |
+
custom_config = r'-l kab --oem 3 --psm 6' # Example config, adjust as needed
|
| 15 |
+
|
| 16 |
+
# --- Function Definition ---
|
| 17 |
+
def enhance_image(image, for_display=False):
|
| 18 |
+
"""Applies enhancements to improve OCR quality or display quality."""
|
| 19 |
+
# Convert to 'L' mode (grayscale) if not already
|
| 20 |
+
if image.mode != 'L':
|
| 21 |
+
image = image.convert('L')
|
| 22 |
+
|
| 23 |
+
# Enhance Contrast
|
| 24 |
+
contrast_enhancer = ImageEnhance.Contrast(image)
|
| 25 |
+
# Increase contrast slightly for OCR. Values > 1.0 increase contrast.
|
| 26 |
+
# For display, we might want a slightly different value or skip this step entirely
|
| 27 |
+
# depending on the original image quality. Let's use the same value for now.
|
| 28 |
+
contrast_factor = 1.5
|
| 29 |
+
if for_display:
|
| 30 |
+
# Potentially use a different factor for display if needed
|
| 31 |
+
# contrast_factor = 1.3 # Example for display
|
| 32 |
+
pass # Using same factor for now
|
| 33 |
+
image = contrast_enhancer.enhance(contrast_factor)
|
| 34 |
+
|
| 35 |
+
# Enhance Sharpness
|
| 36 |
+
sharpness_enhancer = ImageEnhance.Sharpness(image)
|
| 37 |
+
# Slightly increase sharpness for OCR. Values > 1.0 increase sharpness.
|
| 38 |
+
# Again, for display, a different value might be preferred.
|
| 39 |
+
sharpness_factor = 1.2
|
| 40 |
+
if for_display:
|
| 41 |
+
# Potentially use a different factor for display if needed
|
| 42 |
+
# sharpness_factor = 1.1 # Example for display
|
| 43 |
+
pass # Using same factor for now
|
| 44 |
+
image = sharpness_enhancer.enhance(sharpness_factor)
|
| 45 |
+
|
| 46 |
+
# Optional: Apply a slight unsharp mask filter for further sharpening
|
| 47 |
+
# if not for_display: # Only for OCR processing?
|
| 48 |
+
# image = image.filter(ImageFilter.UnsharpMask(radius=1, percent=50, threshold=0))
|
| 49 |
+
|
| 50 |
+
return image
|
| 51 |
+
|
| 52 |
+
def process_image(image):
|
| 53 |
+
"""Processes a single image using pytesseract, applying enhancements first."""
|
| 54 |
+
try:
|
| 55 |
+
# Apply enhancements before OCR
|
| 56 |
+
enhanced_image = enhance_image(image, for_display=False) # Explicitly for OCR
|
| 57 |
+
text = pytesseract.image_to_string(enhanced_image, config=custom_config)
|
| 58 |
+
return text
|
| 59 |
+
except Exception as e:
|
| 60 |
+
return f"Error during OCR: {e}"
|
| 61 |
+
|
| 62 |
+
# --- Page Setup ---
|
| 63 |
+
st.set_page_config(page_title="Kabyle OCR", layout="wide")
|
| 64 |
+
st.title("Asemmezdey Asekdan n Teqbaylit - Kabyle OCR")
|
| 65 |
+
|
| 66 |
+
# --- Sidebar ---
|
| 67 |
+
st.sidebar.header("Isefka")
|
| 68 |
+
uploaded_file = st.sidebar.file_uploader("Ssali-d Afaylu", type=["png", "jpg", "jpeg", "pdf"])
|
| 69 |
+
|
| 70 |
+
# Add font size selector to the sidebar
|
| 71 |
+
font_size = st.sidebar.slider("Tiddi n Yisekkilen n Uḍris seg OCR (px)", min_value=10, max_value=30, value=18, step=1)
|
| 72 |
+
|
| 73 |
+
# Add the preview quality (DPI) slider to the sidebar, under font size
|
| 74 |
+
preview_dpi = st.sidebar.slider("Amerkid n Uskan (DPI)", min_value=150, max_value=700, value=300, step=50)
|
| 75 |
+
|
| 76 |
+
# --- Inject Custom CSS for Font Size, Text Color, and Image Layout ---
|
| 77 |
+
# This CSS applies the selected font size and a darker color to the text area
|
| 78 |
+
# It also ensures images in col1 behave predictably with max-width
|
| 79 |
+
st.markdown(
|
| 80 |
+
f"""
|
| 81 |
+
<style>
|
| 82 |
+
/* Target the text area where the OCR text is edited */
|
| 83 |
+
textarea[data-testid="stText"] {{
|
| 84 |
+
font-size: {font_size}px;
|
| 85 |
+
color: #000000; /* Set text color to black for better visibility */
|
| 86 |
+
line-height: 1.5; /* Improve readability with line spacing */
|
| 87 |
+
}}
|
| 88 |
+
|
| 89 |
+
/* Target the text inside the download button preview or other standard text elements if needed */
|
| 90 |
+
.stTextArea textarea {{
|
| 91 |
+
font-size: {font_size}px;
|
| 92 |
+
color: #000000; /* Set text color to black */
|
| 93 |
+
}}
|
| 94 |
+
|
| 95 |
+
/* Target images specifically within that first column's content div */
|
| 96 |
+
section[data-testid="stSidebar"] ~ div > div:has(div[data-testid="stColumn"] > div:nth-child(1)) > div:nth-child(1) img {{
|
| 97 |
+
max-width: 100%; /* Ensures image doesn't exceed the column width */
|
| 98 |
+
height: auto; /* Maintains aspect ratio when width is constrained */
|
| 99 |
+
display: block; /* Makes the image a block element, necessary for max-width */
|
| 100 |
+
margin-left: auto; /* Center the image horizontally within its container */
|
| 101 |
+
margin-right: auto; /* Center the image horizontally within its container */
|
| 102 |
+
margin-bottom: 10px; /* Add some space below the image */
|
| 103 |
+
}}
|
| 104 |
+
</style>
|
| 105 |
+
""",
|
| 106 |
+
unsafe_allow_html=True
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# --- Main App Logic ---
|
| 110 |
+
if uploaded_file is not None:
|
| 111 |
+
# Check if the uploaded file object has changed (a new file was selected)
|
| 112 |
+
# Compare the new file's info with the one stored in session state (if it exists)
|
| 113 |
+
current_file_info = (uploaded_file.name, uploaded_file.size, uploaded_file.type)
|
| 114 |
+
if 'current_file_info' not in st.session_state or st.session_state.current_file_info != current_file_info:
|
| 115 |
+
# New file detected, reset session state
|
| 116 |
+
st.session_state.current_file_info = current_file_info
|
| 117 |
+
st.session_state.ocr_text = ""
|
| 118 |
+
st.session_state.display_image = None
|
| 119 |
+
st.session_state.all_pdf_images = [] # Add list for all PDF preview images
|
| 120 |
+
st.session_state.current_page_index = 0 # Add index for slideshow
|
| 121 |
+
# Optional: Clear other relevant session state variables if needed
|
| 122 |
+
# st.session_state.some_other_var = default_value
|
| 123 |
+
|
| 124 |
+
st.info(f"Afaylu i d-yulin: {uploaded_file.name}")
|
| 125 |
+
|
| 126 |
+
# Initialize session state for text and display image if needed (should be after reset check)
|
| 127 |
+
if 'ocr_text' not in st.session_state:
|
| 128 |
+
st.session_state.ocr_text = ""
|
| 129 |
+
if 'display_image' not in st.session_state:
|
| 130 |
+
st.session_state.display_image = None
|
| 131 |
+
# Initialize list for all PDF preview images
|
| 132 |
+
if 'all_pdf_images' not in st.session_state:
|
| 133 |
+
st.session_state.all_pdf_images = []
|
| 134 |
+
# Initialize current page index for slideshow
|
| 135 |
+
if 'current_page_index' not in st.session_state:
|
| 136 |
+
st.session_state.current_page_index = 0
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# Create two columns for side-by-side view
|
| 140 |
+
# Using [1, 1] ratio as suggested
|
| 141 |
+
col1, col2 = st.columns([1, 1])
|
| 142 |
+
|
| 143 |
+
with col1:
|
| 144 |
+
st.subheader("Askan n Ufaylu deg Talɣa-s Tamenzut")
|
| 145 |
+
|
| 146 |
+
if "pdf" in uploaded_file.type:
|
| 147 |
+
if st.button("Sekker PDF (Askan n Yisebtar)"):
|
| 148 |
+
try:
|
| 149 |
+
# Read the file content once for PDF conversion
|
| 150 |
+
pdf_content = uploaded_file.read()
|
| 151 |
+
# Convert PDF pages to images with the DPI selected by the slider in the sidebar
|
| 152 |
+
images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use the sidebar value directly
|
| 153 |
+
if images:
|
| 154 |
+
# Store the *original* images for display (un-enhanced copies for preview)
|
| 155 |
+
st.session_state.all_pdf_images = [img.copy() for img in images]
|
| 156 |
+
# Reset current page index for slideshow
|
| 157 |
+
st.session_state.current_page_index = 0
|
| 158 |
+
# Store the first page image for display (original, for preview) - kept for backward compatibility if needed
|
| 159 |
+
st.session_state.display_image = images[0]
|
| 160 |
+
# Reset OCR text for new processing (only reset here if button is pressed)
|
| 161 |
+
# st.session_state.ocr_text = "" # This is done on file change now
|
| 162 |
+
st.success(f"Yuli-d uPDF (DPI n Uskan: {preview_dpi}). Tekki ɣef 'Sekker OCR' i Uselket.")
|
| 163 |
+
else:
|
| 164 |
+
st.error("Ulac isebtare deg ufaylu PDF.")
|
| 165 |
+
except Exception as e:
|
| 166 |
+
st.error(f"Ugul deg uselket n PDF: {e}")
|
| 167 |
+
st.warning("Senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').")
|
| 168 |
+
|
| 169 |
+
# Display the current PDF page and navigation controls (if PDF was processed and pages are stored)
|
| 170 |
+
if st.session_state.all_pdf_images:
|
| 171 |
+
st.subheader("Askan n Yisebtar n Ufaylu PDF") # Subheader for the single page view
|
| 172 |
+
num_pages = len(st.session_state.all_pdf_images)
|
| 173 |
+
current_idx = st.session_state.current_page_index
|
| 174 |
+
|
| 175 |
+
# Display navigation buttons and page info on the same line using columns
|
| 176 |
+
col_nav1, col_nav2, col_nav3 = st.columns([1, 2, 1]) # Create columns for layout
|
| 177 |
+
with col_nav1:
|
| 178 |
+
# Disable 'Previous' button if on the first page
|
| 179 |
+
st.button("Ɣer deffir", disabled=(current_idx == 0), on_click=lambda: setattr(st.session_state, 'current_page_index', max(0, current_idx - 1)), key='prev_btn_slideshow')
|
| 180 |
+
with col_nav2:
|
| 181 |
+
# Display page number centered
|
| 182 |
+
st.text(f"Asebter {current_idx + 1} n {num_pages}")
|
| 183 |
+
with col_nav3:
|
| 184 |
+
# Disable 'Next' button if on the last page
|
| 185 |
+
st.button("Ɣer zdat", disabled=(current_idx == num_pages - 1), on_click=lambda: setattr(st.session_state, 'current_page_index', min(num_pages - 1, current_idx + 1)), key='next_btn_slideshow')
|
| 186 |
+
|
| 187 |
+
# Display the current image below the navigation
|
| 188 |
+
# Calculate width based on DPI relative to a standard DPI (e.g., 300)
|
| 189 |
+
# This provides a dynamic scaling effect based on DPI for the *display*.
|
| 190 |
+
base_dpi = 300
|
| 191 |
+
base_width = 600 # A reasonable base width for 300 DPI
|
| 192 |
+
calculated_width = int((preview_dpi / base_dpi) * base_width)
|
| 193 |
+
|
| 194 |
+
# Enhance the *copy* of the *current* image just for display
|
| 195 |
+
current_img = st.session_state.all_pdf_images[current_idx]
|
| 196 |
+
display_image_enhanced = enhance_image(current_img.copy(), for_display=True)
|
| 197 |
+
# Width is now calculated based on the DPI slider value
|
| 198 |
+
# Display the *enhanced* image for preview with the calculated width
|
| 199 |
+
# The CSS rule max-width: 100% will prevent it from exceeding col1's width
|
| 200 |
+
st.image(display_image_enhanced, caption=f"Asebter {current_idx + 1} ({preview_dpi} DPI)", width=calculated_width) # Use calculated width
|
| 201 |
+
|
| 202 |
+
else: # It's an image file
|
| 203 |
+
# For direct image files, use the calculated width based on the slider value from the sidebar.
|
| 204 |
+
base_dpi = 300
|
| 205 |
+
base_width = 600
|
| 206 |
+
calculated_width = int((preview_dpi / base_dpi) * base_width)
|
| 207 |
+
|
| 208 |
+
image = Image.open(uploaded_file)
|
| 209 |
+
# Enhance the *copy* of the image just for display
|
| 210 |
+
display_image_enhanced = enhance_image(image.copy(), for_display=True)
|
| 211 |
+
# Display the *enhanced* image for preview (NOT inside the PDF container)
|
| 212 |
+
st.image(display_image_enhanced, caption=uploaded_file.name, width=calculated_width)
|
| 213 |
+
# Store the *original* image for potential use later (though not strictly needed here)
|
| 214 |
+
st.session_state.display_image = image
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
with col2:
|
| 218 |
+
st.subheader("Asezṛeg n Uḍris seg OCR")
|
| 219 |
+
|
| 220 |
+
# OCR Button (only appears after preview is potentially loaded for PDFs)
|
| 221 |
+
if st.button("Sekker OCR"):
|
| 222 |
+
full_text = ""
|
| 223 |
+
# Use a placeholder to clear the area and then update with progress
|
| 224 |
+
progress_text = st.empty()
|
| 225 |
+
progress_bar = st.progress(0)
|
| 226 |
+
|
| 227 |
+
with st.spinner("Asekker n OCR..."):
|
| 228 |
+
if "pdf" in uploaded_file.type:
|
| 229 |
+
try:
|
| 230 |
+
# Read the file content again for OCR, using the value from the sidebar slider
|
| 231 |
+
pdf_content = uploaded_file.read()
|
| 232 |
+
images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use sidebar value directly
|
| 233 |
+
num_pages = len(images)
|
| 234 |
+
for i, page_image in enumerate(images):
|
| 235 |
+
# Update the placeholder with the current page message
|
| 236 |
+
progress_text.text(f"Yeɣɣar asebter {i+1} n {num_pages}...")
|
| 237 |
+
# Update the progress bar
|
| 238 |
+
progress_bar.progress((i + 1) / num_pages)
|
| 239 |
+
# Process the page
|
| 240 |
+
txt = process_image(page_image)
|
| 241 |
+
full_text += f"""
|
| 242 |
+
--- Asebter {i+1} ---
|
| 243 |
+
{txt}
|
| 244 |
+
"""
|
| 245 |
+
except Exception as e:
|
| 246 |
+
st.error(f"Ugul deg uPDF deg OCR: {e}")
|
| 247 |
+
st.warning("Ma twalaḍ ugul yeɛnan 'poppler_path', senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').")
|
| 248 |
+
else: # It's an image file
|
| 249 |
+
# For images, we don't need to re-open or re-scale based on DPI slider for OCR itself,
|
| 250 |
+
# just use the original uploaded image.
|
| 251 |
+
# The process_image function now handles enhancement internally
|
| 252 |
+
progress_text.text("Yeɣɣar tugna...") # Inform user about single image processing
|
| 253 |
+
image = Image.open(uploaded_file)
|
| 254 |
+
full_text = process_image(image)
|
| 255 |
+
progress_bar.progress(100) # Indicate completion for single image
|
| 256 |
+
|
| 257 |
+
# Clear the progress text and bar after processing is complete
|
| 258 |
+
progress_text.empty()
|
| 259 |
+
progress_bar.empty()
|
| 260 |
+
|
| 261 |
+
st.session_state.ocr_text = full_text
|
| 262 |
+
st.success("OCR Yemmed!")
|
| 263 |
+
|
| 264 |
+
# Text Editor - Always visible, updates with OCR result or user edits
|
| 265 |
+
# Height increased, font size controlled by sidebar slider via CSS
|
| 266 |
+
# Text color also controlled by CSS
|
| 267 |
+
edited_text = st.text_area("Zṛeg Aḍris, Seɣti Tira-s da", value=st.session_state.ocr_text, height=600, key="text_editor")
|
| 268 |
+
# Update session state if user edits the text area
|
| 269 |
+
if edited_text != st.session_state.ocr_text:
|
| 270 |
+
st.session_state.ocr_text = edited_text
|
| 271 |
+
|
| 272 |
+
# Download Button (only enabled if there's text)
|
| 273 |
+
if st.session_state.ocr_text:
|
| 274 |
+
st.download_button(
|
| 275 |
+
label="Zdem Aḍris",
|
| 276 |
+
data=st.session_state.ocr_text.encode('utf-8'),
|
| 277 |
+
file_name=f"{uploaded_file.name.replace('.', '_')}_ocr.txt",
|
| 278 |
+
mime="text/plain"
|
| 279 |
+
)
|
| 280 |
+
else:
|
| 281 |
+
st.info("Seddu OCR, Selket s Aḍris.")
|
| 282 |
+
|
| 283 |
+
else:
|
| 284 |
+
# If no file is uploaded, clear session state to ensure clean interface on initial load
|
| 285 |
+
# or if user deselects the file.
|
| 286 |
+
for key in ["ocr_text", "display_image", "current_file_info", "all_pdf_images", "current_page_index"]:
|
| 287 |
+
if key in st.session_state:
|
| 288 |
+
del st.session_state[key]
|
| 289 |
+
st.write("Ldi afaylu PDF, PNG, JPG, or JPEG seg ufeggad n yisefka.")
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# requirements-OCR
|
| 2 |
+
|
| 3 |
+
Pillow==10.4.0
|
| 4 |
+
pytesseract==0.3.13
|
| 5 |
+
pdf2image==1.17.0
|
| 6 |
+
tesseract==5.5.1
|