Daniela-C commited on
Commit
fe70f39
·
verified ·
1 Parent(s): 9ed7372

Upload streamlit_taxi_app.py

Browse files
Files changed (1) hide show
  1. streamlit_taxi_app.py +65 -0
streamlit_taxi_app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Streamlit App: NYC Taxi Anomaly Detector with Event Markers
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ from datetime import datetime
7
+
8
+ st.set_page_config(page_title="NYC Taxi Anomaly Detector", layout="wide")
9
+ st.title("🚕 NYC Taxi Passenger Count - Anomaly Detection")
10
+
11
+ # Load data
12
+ @st.cache_data
13
+ def load_data():
14
+ df = pd.read_csv('nyc_taxi_real_anomalies.csv', parse_dates=['tpep_pickup_datetime'], index_col='tpep_pickup_datetime')
15
+ return df
16
+
17
+ df = load_data()
18
+
19
+ # Define NYC events/holidays
20
+ events = {
21
+ "New Year\'s Eve": "2015-12-31",
22
+ "New Year\'s Day": "2016-01-01",
23
+ "Martin Luther King Jr. Day": "2016-01-18"
24
+ }
25
+
26
+ # Sidebar controls
27
+ st.sidebar.header("Filters")
28
+ start_date = st.sidebar.date_input("Start Date", df.index.min().date())
29
+ end_date = st.sidebar.date_input("End Date", df.index.max().date())
30
+ threshold_slider = st.sidebar.slider("Anomaly Threshold (%)", 90, 99, 95)
31
+
32
+ # Filtered data
33
+ filtered_df = df.loc[str(start_date):str(end_date)]
34
+
35
+ # Apply new threshold
36
+ new_threshold = filtered_df['reconstruction_error'].quantile(threshold_slider / 100.0)
37
+ filtered_df['anomaly_custom'] = filtered_df['reconstruction_error'] > new_threshold
38
+
39
+ # Plot
40
+ fig, ax = plt.subplots(figsize=(15, 5))
41
+ ax.plot(filtered_df.index, filtered_df['passenger_count'], label='Passenger Count')
42
+ ax.scatter(filtered_df[filtered_df['anomaly_custom']].index,
43
+ filtered_df[filtered_df['anomaly_custom']]['passenger_count'],
44
+ color='red', label='Anomaly')
45
+
46
+ # Add event markers
47
+ for name, date_str in events.items():
48
+ event_date = pd.to_datetime(date_str)
49
+ if event_date in filtered_df.index:
50
+ ax.axvline(event_date, color='orange', linestyle='--', alpha=0.7)
51
+ ax.text(event_date, ax.get_ylim()[1]*0.9, name, rotation=90, color='orange', fontsize=8)
52
+
53
+ ax.set_title('Anomaly Detection with NYC Event Markers')
54
+ ax.legend()
55
+ ax.set_xlabel("Date")
56
+ ax.set_ylabel("Passenger Count")
57
+ plt.xticks(rotation=45)
58
+ st.pyplot(fig)
59
+
60
+ # Show data table
61
+ with st.expander("📄 View Data Table"):
62
+ st.dataframe(filtered_df[['passenger_count', 'reconstruction_error', 'anomaly_custom']])
63
+
64
+ # Download
65
+ st.download_button("Download Anomalies CSV", data=filtered_df.to_csv().encode('utf-8'), file_name="filtered_anomalies.csv", mime="text/csv")