max-long commited on
Commit
67aa33f
·
verified ·
1 Parent(s): 7ebfa43

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ # Load the CSV file
4
+ df = pd.read_csv("1921_catalogue_SMG.csv") # Replace with your actual CSV file path
5
+ text_column = "Description" # Replace with the actual column name containing the text data
6
+
7
+ # Load the model
8
+ model = GLiNER.from_pretrained("max-long/textile_machines_3_oct", trust_remote_code=True)
9
+
10
+ def get_new_snippet():
11
+ # Randomly select a snippet from the CSV file
12
+ if len(df) > 0:
13
+ sample = df.sample(n=1)[text_column].values[0]
14
+ return sample
15
+ else:
16
+ return "No more snippets available." # Return this if the CSV file is empty
17
+
18
+ def ner(text: str):
19
+ labels = ["Textile Machinery"]
20
+ threshold = 0.5
21
+
22
+ # Predict entities using the fine-tuned GLiNER model
23
+ entities = model.predict_entities(text, labels, flat_ner=True, threshold=threshold)
24
+
25
+ textile_entities = [
26
+ {
27
+ "entity": ent["label"],
28
+ "word": ent["text"],
29
+ "start": ent["start"],
30
+ "end": ent["end"],
31
+ "score": ent.get("score", 0),
32
+ }
33
+ for ent in entities
34
+ if ent["label"] == "Textile Machinery"
35
+ ]
36
+
37
+ highlighted_text = text
38
+ for ent in sorted(textile_entities, key=lambda x: x['start'], reverse=True):
39
+ highlighted_text = (
40
+ highlighted_text[:ent['start']] +
41
+ f"<span style='background-color: yellow; font-weight: bold;'>{highlighted_text[ent['start']:ent['end']]}</span>" +
42
+ highlighted_text[ent['end']:]
43
+ )
44
+
45
+ return highlighted_text, textile_entities
46
+
47
+ # Gradio Interface
48
+ with gr.Blocks(title="Textile Machinery NER Demo") as demo:
49
+ gr.Markdown(
50
+ """
51
+ # Textile Machinery Entity Recognition Demo
52
+ This demo selects a random text snippet from a CSV file and identifies "Textile Machinery" entities using a fine-tuned GLiNER model.
53
+ """
54
+ )
55
+
56
+ input_text = gr.Textbox(
57
+ value="Enter or refresh to get text from CSV",
58
+ label="Text input",
59
+ placeholder="Enter your text here",
60
+ lines=5
61
+ )
62
+
63
+ output_highlighted = gr.HTML(label="Predicted Entities")
64
+ output_entities = gr.JSON(label="Entities")
65
+
66
+ submit_btn = gr.Button("Find Textile Machinery!")
67
+ refresh_btn = gr.Button("Get New Snippet")
68
+
69
+ refresh_btn.click(fn=get_new_snippet, outputs=input_text)
70
+
71
+ submit_btn.click(
72
+ fn=ner,
73
+ inputs=[input_text],
74
+ outputs=[output_highlighted, output_entities]
75
+ )
76
+
77
+ demo.queue()
78
+ demo.launch(debug=True)