AitBAD commited on
Commit
49dfaa0
·
verified ·
1 Parent(s): a8b7156

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +294 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py script that converts pdf or png txt image to UTF8 text
2
+ # Kabyle OCR tool
3
+ import streamlit as st
4
+ import pytesseract
5
+ from pdf2image import convert_from_bytes
6
+ from PIL import Image, ImageEnhance, ImageFilter
7
+ import os
8
+
9
+ # Set TESSDATA_PREFIX to the local tessdata directory within the Space
10
+ os.environ['TESSDATA_PREFIX'] = os.path.join(os.getcwd(), 'tessdata')
11
+
12
+ # --- Configuration ---
13
+ # Config now points to the model using the TESSDATA_PREFIX
14
+ custom_config = r'-l kab --oem 3 --psm 6' # Example config, adjust as needed
15
+
16
+ # --- Function Definition ---
17
+ def enhance_image(image, for_display=False):
18
+ """Applies enhancements to improve OCR quality or display quality."""
19
+ # Convert to 'L' mode (grayscale) if not already
20
+ if image.mode != 'L':
21
+ image = image.convert('L')
22
+
23
+ # Enhance Contrast
24
+ contrast_enhancer = ImageEnhance.Contrast(image)
25
+ # Increase contrast slightly for OCR. Values > 1.0 increase contrast.
26
+ # For display, we might want a slightly different value or skip this step entirely
27
+ # depending on the original image quality. Let's use the same value for now.
28
+ contrast_factor = 1.5
29
+ if for_display:
30
+ # Potentially use a different factor for display if needed
31
+ # contrast_factor = 1.3 # Example for display
32
+ pass # Using same factor for now
33
+ image = contrast_enhancer.enhance(contrast_factor)
34
+
35
+ # Enhance Sharpness
36
+ sharpness_enhancer = ImageEnhance.Sharpness(image)
37
+ # Slightly increase sharpness for OCR. Values > 1.0 increase sharpness.
38
+ # Again, for display, a different value might be preferred.
39
+ sharpness_factor = 1.2
40
+ if for_display:
41
+ # Potentially use a different factor for display if needed
42
+ # sharpness_factor = 1.1 # Example for display
43
+ pass # Using same factor for now
44
+ image = sharpness_enhancer.enhance(sharpness_factor)
45
+
46
+ # Optional: Apply a slight unsharp mask filter for further sharpening
47
+ # if not for_display: # Only for OCR processing?
48
+ # image = image.filter(ImageFilter.UnsharpMask(radius=1, percent=50, threshold=0))
49
+
50
+ return image
51
+
52
+ def process_image(image):
53
+ """Processes a single image using pytesseract, applying enhancements first."""
54
+ try:
55
+ # Apply enhancements before OCR
56
+ enhanced_image = enhance_image(image, for_display=False) # Explicitly for OCR
57
+ text = pytesseract.image_to_string(enhanced_image, config=custom_config)
58
+ return text
59
+ except Exception as e:
60
+ return f"Error during OCR: {e}"
61
+
62
+ # --- Page Setup ---
63
+ st.set_page_config(page_title="Kabyle OCR", layout="wide")
64
+ st.title("Asemmezdey Asekdan n Teqbaylit - Kabyle OCR")
65
+
66
+ # --- Sidebar ---
67
+ st.sidebar.header("Isefka")
68
+ uploaded_file = st.sidebar.file_uploader("Ssali-d Afaylu", type=["png", "jpg", "jpeg", "pdf"])
69
+
70
+ # Add font size selector to the sidebar
71
+ font_size = st.sidebar.slider("Tiddi n Yisekkilen n Uḍris seg OCR (px)", min_value=10, max_value=30, value=18, step=1)
72
+
73
+ # Add the preview quality (DPI) slider to the sidebar, under font size
74
+ preview_dpi = st.sidebar.slider("Amerkid n Uskan (DPI)", min_value=150, max_value=700, value=300, step=50)
75
+
76
+ # --- Inject Custom CSS for Font Size, Text Color, and Image Layout ---
77
+ # This CSS applies the selected font size and a darker color to the text area
78
+ # It also ensures images in col1 behave predictably with max-width
79
+ st.markdown(
80
+ f"""
81
+ <style>
82
+ /* Target the text area where the OCR text is edited */
83
+ textarea[data-testid="stText"] {{
84
+ font-size: {font_size}px;
85
+ color: #000000; /* Set text color to black for better visibility */
86
+ line-height: 1.5; /* Improve readability with line spacing */
87
+ }}
88
+
89
+ /* Target the text inside the download button preview or other standard text elements if needed */
90
+ .stTextArea textarea {{
91
+ font-size: {font_size}px;
92
+ color: #000000; /* Set text color to black */
93
+ }}
94
+
95
+ /* Target images specifically within that first column's content div */
96
+ section[data-testid="stSidebar"] ~ div > div:has(div[data-testid="stColumn"] > div:nth-child(1)) > div:nth-child(1) img {{
97
+ max-width: 100%; /* Ensures image doesn't exceed the column width */
98
+ height: auto; /* Maintains aspect ratio when width is constrained */
99
+ display: block; /* Makes the image a block element, necessary for max-width */
100
+ margin-left: auto; /* Center the image horizontally within its container */
101
+ margin-right: auto; /* Center the image horizontally within its container */
102
+ margin-bottom: 10px; /* Add some space below the image */
103
+ }}
104
+ </style>
105
+ """,
106
+ unsafe_allow_html=True
107
+ )
108
+
109
+ # --- Main App Logic ---
110
+ if uploaded_file is not None:
111
+ # Check if the uploaded file object has changed (a new file was selected)
112
+ # Compare the new file's info with the one stored in session state (if it exists)
113
+ current_file_info = (uploaded_file.name, uploaded_file.size, uploaded_file.type)
114
+ if 'current_file_info' not in st.session_state or st.session_state.current_file_info != current_file_info:
115
+ # New file detected, reset session state
116
+ st.session_state.current_file_info = current_file_info
117
+ st.session_state.ocr_text = ""
118
+ st.session_state.display_image = None
119
+ st.session_state.all_pdf_images = [] # Add list for all PDF preview images
120
+ st.session_state.current_page_index = 0 # Add index for slideshow
121
+ # Optional: Clear other relevant session state variables if needed
122
+ # st.session_state.some_other_var = default_value
123
+
124
+ st.info(f"Afaylu i d-yulin: {uploaded_file.name}")
125
+
126
+ # Initialize session state for text and display image if needed (should be after reset check)
127
+ if 'ocr_text' not in st.session_state:
128
+ st.session_state.ocr_text = ""
129
+ if 'display_image' not in st.session_state:
130
+ st.session_state.display_image = None
131
+ # Initialize list for all PDF preview images
132
+ if 'all_pdf_images' not in st.session_state:
133
+ st.session_state.all_pdf_images = []
134
+ # Initialize current page index for slideshow
135
+ if 'current_page_index' not in st.session_state:
136
+ st.session_state.current_page_index = 0
137
+
138
+
139
+ # Create two columns for side-by-side view
140
+ # Using [1, 1] ratio as suggested
141
+ col1, col2 = st.columns([1, 1])
142
+
143
+ with col1:
144
+ st.subheader("Askan n Ufaylu deg Talɣa-s Tamenzut")
145
+
146
+ if "pdf" in uploaded_file.type:
147
+ if st.button("Sekker PDF (Askan n Yisebtar)"):
148
+ try:
149
+ # Read the file content once for PDF conversion
150
+ pdf_content = uploaded_file.read()
151
+ # Convert PDF pages to images with the DPI selected by the slider in the sidebar
152
+ images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use the sidebar value directly
153
+ if images:
154
+ # Store the *original* images for display (un-enhanced copies for preview)
155
+ st.session_state.all_pdf_images = [img.copy() for img in images]
156
+ # Reset current page index for slideshow
157
+ st.session_state.current_page_index = 0
158
+ # Store the first page image for display (original, for preview) - kept for backward compatibility if needed
159
+ st.session_state.display_image = images[0]
160
+ # Reset OCR text for new processing (only reset here if button is pressed)
161
+ # st.session_state.ocr_text = "" # This is done on file change now
162
+ st.success(f"Yuli-d uPDF (DPI n Uskan: {preview_dpi}). Tekki ɣef 'Sekker OCR' i Uselket.")
163
+ else:
164
+ st.error("Ulac isebtare deg ufaylu PDF.")
165
+ except Exception as e:
166
+ st.error(f"Ugul deg uselket n PDF: {e}")
167
+ st.warning("Senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').")
168
+
169
+ # Display the current PDF page and navigation controls (if PDF was processed and pages are stored)
170
+ if st.session_state.all_pdf_images:
171
+ st.subheader("Askan n Yisebtar n Ufaylu PDF") # Subheader for the single page view
172
+ num_pages = len(st.session_state.all_pdf_images)
173
+ current_idx = st.session_state.current_page_index
174
+
175
+ # Display navigation buttons and page info on the same line using columns
176
+ col_nav1, col_nav2, col_nav3 = st.columns([1, 2, 1]) # Create columns for layout
177
+ with col_nav1:
178
+ # Disable 'Previous' button if on the first page
179
+ st.button("Ɣer deffir", disabled=(current_idx == 0), on_click=lambda: setattr(st.session_state, 'current_page_index', max(0, current_idx - 1)), key='prev_btn_slideshow')
180
+ with col_nav2:
181
+ # Display page number centered
182
+ st.text(f"Asebter {current_idx + 1} n {num_pages}")
183
+ with col_nav3:
184
+ # Disable 'Next' button if on the last page
185
+ st.button("Ɣer zdat", disabled=(current_idx == num_pages - 1), on_click=lambda: setattr(st.session_state, 'current_page_index', min(num_pages - 1, current_idx + 1)), key='next_btn_slideshow')
186
+
187
+ # Display the current image below the navigation
188
+ # Calculate width based on DPI relative to a standard DPI (e.g., 300)
189
+ # This provides a dynamic scaling effect based on DPI for the *display*.
190
+ base_dpi = 300
191
+ base_width = 600 # A reasonable base width for 300 DPI
192
+ calculated_width = int((preview_dpi / base_dpi) * base_width)
193
+
194
+ # Enhance the *copy* of the *current* image just for display
195
+ current_img = st.session_state.all_pdf_images[current_idx]
196
+ display_image_enhanced = enhance_image(current_img.copy(), for_display=True)
197
+ # Width is now calculated based on the DPI slider value
198
+ # Display the *enhanced* image for preview with the calculated width
199
+ # The CSS rule max-width: 100% will prevent it from exceeding col1's width
200
+ st.image(display_image_enhanced, caption=f"Asebter {current_idx + 1} ({preview_dpi} DPI)", width=calculated_width) # Use calculated width
201
+
202
+ else: # It's an image file
203
+ # For direct image files, use the calculated width based on the slider value from the sidebar.
204
+ base_dpi = 300
205
+ base_width = 600
206
+ calculated_width = int((preview_dpi / base_dpi) * base_width)
207
+
208
+ image = Image.open(uploaded_file)
209
+ # Enhance the *copy* of the image just for display
210
+ display_image_enhanced = enhance_image(image.copy(), for_display=True)
211
+ # Display the *enhanced* image for preview (NOT inside the PDF container)
212
+ st.image(display_image_enhanced, caption=uploaded_file.name, width=calculated_width)
213
+ # Store the *original* image for potential use later (though not strictly needed here)
214
+ st.session_state.display_image = image
215
+
216
+
217
+ with col2:
218
+ st.subheader("Asezṛeg n Uḍris seg OCR")
219
+
220
+ # OCR Button (only appears after preview is potentially loaded for PDFs)
221
+ if st.button("Sekker OCR"):
222
+ full_text = ""
223
+ # Use a placeholder to clear the area and then update with progress
224
+ progress_text = st.empty()
225
+ progress_bar = st.progress(0)
226
+
227
+ with st.spinner("Asekker n OCR..."):
228
+ if "pdf" in uploaded_file.type:
229
+ try:
230
+ # Read the file content again for OCR, using the value from the sidebar slider
231
+ pdf_content = uploaded_file.read()
232
+ images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use sidebar value directly
233
+ num_pages = len(images)
234
+ for i, page_image in enumerate(images):
235
+ # Update the placeholder with the current page message
236
+ progress_text.text(f"Yeɣɣar asebter {i+1} n {num_pages}...")
237
+ # Update the progress bar
238
+ progress_bar.progress((i + 1) / num_pages)
239
+ # Process the page
240
+ txt = process_image(page_image)
241
+ full_text += f"""
242
+ --- Asebter {i+1} ---
243
+ {txt}
244
+ """
245
+ except Exception as e:
246
+ st.error(f"Ugul deg uPDF deg OCR: {e}")
247
+ st.warning("Ma twalaḍ ugul yeɛnan 'poppler_path', senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').")
248
+ else: # It's an image file
249
+ # For images, we don't need to re-open or re-scale based on DPI slider for OCR itself,
250
+ # just use the original uploaded image.
251
+ # The process_image function now handles enhancement internally
252
+ progress_text.text("Yeɣɣar tugna...") # Inform user about single image processing
253
+ image = Image.open(uploaded_file)
254
+ full_text = process_image(image)
255
+ progress_bar.progress(100) # Indicate completion for single image
256
+
257
+ # Clear the progress text and bar after processing is complete
258
+ progress_text.empty()
259
+ progress_bar.empty()
260
+
261
+ st.session_state.ocr_text = full_text
262
+ st.success("OCR Yemmed!")
263
+
264
+ # Text Editor - Always visible, updates with OCR result or user edits
265
+ # Height increased, font size controlled by sidebar slider via CSS
266
+ # Text color also controlled by CSS
267
+ edited_text = st.text_area("Zṛeg Aḍris, Seɣti Tira-s da", value=st.session_state.ocr_text, height=600, key="text_editor")
268
+ # Update session state if user edits the text area
269
+ if edited_text != st.session_state.ocr_text:
270
+ st.session_state.ocr_text = edited_text
271
+
272
+ # Download Button (only enabled if there's text)
273
+ if st.session_state.ocr_text:
274
+ st.download_button(
275
+ label="Zdem Aḍris",
276
+ data=st.session_state.ocr_text.encode('utf-8'),
277
+ file_name=f"{uploaded_file.name.replace('.', '_')}_ocr.txt",
278
+ mime="text/plain"
279
+ )
280
+ else:
281
+ st.info("Seddu OCR, Selket s Aḍris.")
282
+
283
+ else:
284
+ # If no file is uploaded, clear session state to ensure clean interface on initial load
285
+ # or if user deselects the file.
286
+ for key in ["ocr_text", "display_image", "current_file_info", "all_pdf_images", "current_page_index"]:
287
+ if key in st.session_state:
288
+ del st.session_state[key]
289
+ st.write("Ldi afaylu PDF, PNG, JPG, or JPEG seg ufeggad n yisefka.")
290
+
291
+
292
+
293
+
294
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # requirements-OCR
2
+
3
+ Pillow==10.4.0
4
+ pytesseract==0.3.13
5
+ pdf2image==1.17.0
6
+ tesseract==5.5.1