| |
|
|
| from pptx import Presentation |
| import re |
| from transformers import pipeline |
|
|
| def extract_text_from_pptx(file_path): |
| presentation = Presentation(file_path) |
|
|
| text = [] |
| for slide_number, slide in enumerate(presentation.slides, start=1): |
| for shape in slide.shapes: |
| if hasattr(shape, "text"): |
| text.append(shape.text) |
|
|
| return "\n".join(text) |
|
|
| def main(): |
| file_path = "path/to/your/powerpoint.pptx" |
|
|
| extracted_text = extract_text_from_pptx(file_path) |
| cleaned_text = re.sub(r'\s+', ' ', extracted_text) |
|
|
| print(cleaned_text) |
|
|
| classifier = pipeline("text-classification", model="Ahmed235/roberta_classification") |
| summarizer = pipeline("summarization", model="Falconsai/text_summarization") |
|
|
| result = classifier(cleaned_text)[0] |
| predicted_label = result['label'] |
| predicted_probability = result['score'] |
|
|
| print("Predicted Label:", predicted_label) |
| print(f"Evaluate the topic according to {predicted_label} is: {predicted_probability}") |
| print(summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)) |
|
|
| if __name__ == "__main__": |
| main() |
|
|