Daniela-C commited on
Commit
c122809
·
verified ·
1 Parent(s): dba1d94

Update nyc_taxi_app.py

Browse files
Files changed (1) hide show
  1. nyc_taxi_app.py +26 -23
nyc_taxi_app.py CHANGED
@@ -1,27 +1,30 @@
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
- from sklearn.ensemble import IsolationForest
5
- from datasets import load_dataset
6
-
7
- st.title("NYC Taxi Anomaly Detection")
8
-
9
- from datasets import load_dataset
10
-
11
- dataset = load_dataset("your_dataset_id", cache_dir="/tmp/hf_cache")
12
-
13
-
14
- st.line_chart(df["value"], height=300)
15
-
16
- model = IsolationForest(contamination=0.01, random_state=42)
17
- df["anomaly"] = model.fit_predict(df[["value"]])
18
- df["anomaly_score"] = model.decision_function(df[["value"]])
19
-
20
- anomalies = df[df["anomaly"] == -1]
21
 
22
- st.subheader("Detected Anomalies")
23
- fig, ax = plt.subplots(figsize=(10, 4))
24
- df["value"].plot(ax=ax, label="Taxi Volume")
25
- anomalies["value"].plot(ax=ax, style='ro', label="Anomalies")
26
- ax.legend()
27
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib_cache"
3
+
4
  import streamlit as st
5
  import pandas as pd
6
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ st.title("📊 Parquet Data Explorer")
9
+
10
+ # Load your local Parquet dataset
11
+ @st.cache_data
12
+ def load_data():
13
+ df = pd.read_parquet("data/your_dataset.parquet")
14
+ return df
15
+
16
+ df = load_data()
17
+
18
+ st.write("Sample of Your Data:")
19
+ st.dataframe(df.head())
20
+
21
+ # Visualize numeric column if it exists
22
+ numeric_cols = df.select_dtypes(include='number').columns
23
+ if len(numeric_cols) > 0:
24
+ selected_col = st.selectbox("Choose a numeric column to plot:", numeric_cols)
25
+ st.write(f"### Histogram for `{selected_col}`")
26
+ fig, ax = plt.subplots()
27
+ df[selected_col].hist(bins=30, ax=ax)
28
+ st.pyplot(fig)
29
+ else:
30
+ st.warning("No numeric columns found in your dataset.")