Spaces:

Daniela-C
/

nyc_taxi_app

Runtime error

Daniela-C commited on Jul 31, 2025

Commit

c122809

verified ·

1 Parent(s): dba1d94

Update nyc_taxi_app.py

Files changed (1) hide show

nyc_taxi_app.py CHANGED Viewed

@@ -1,27 +1,30 @@
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
-from sklearn.ensemble import IsolationForest
-from datasets import load_dataset
-st.title("NYC Taxi Anomaly Detection")
-from datasets import load_dataset
-dataset = load_dataset("your_dataset_id", cache_dir="/tmp/hf_cache")
-st.line_chart(df["value"], height=300)
-model = IsolationForest(contamination=0.01, random_state=42)
-df["anomaly"] = model.fit_predict(df[["value"]])
-df["anomaly_score"] = model.decision_function(df[["value"]])
-anomalies = df[df["anomaly"] == -1]
-st.subheader("Detected Anomalies")
-fig, ax = plt.subplots(figsize=(10, 4))
-df["value"].plot(ax=ax, label="Taxi Volume")
-anomalies["value"].plot(ax=ax, style='ro', label="Anomalies")
-ax.legend()
-st.pyplot(fig)

+import os
+os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib_cache"
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
+st.title("📊 Parquet Data Explorer")
+# Load your local Parquet dataset
+@st.cache_data
+def load_data():
+    df = pd.read_parquet("data/your_dataset.parquet")
+    return df
+df = load_data()
+st.write("Sample of Your Data:")
+st.dataframe(df.head())
+# Visualize numeric column if it exists
+numeric_cols = df.select_dtypes(include='number').columns
+if len(numeric_cols) > 0:
+    selected_col = st.selectbox("Choose a numeric column to plot:", numeric_cols)
+    st.write(f"### Histogram for `{selected_col}`")
+    fig, ax = plt.subplots()
+    df[selected_col].hist(bins=30, ax=ax)
+    st.pyplot(fig)
+else:
+    st.warning("No numeric columns found in your dataset.")