File size: 1,333 Bytes
c122809
 
 
fe70f39
 
 
 
c122809
 
e21272c
6a135d9
e21272c
d83aa4a
0cf3b5a
 
c8c8b72
f5dc4b3
 
 
 
0cf3b5a
5325534
 
c122809
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib_cache"

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

st.title("📊 Parquet Data Explorer")

# Login using e.g. `huggingface-cli login` to access this dataset
#df = pd.read_parquet("hf://datasets/Daniela-C/Yellow_tripdata_2025/yellow_tripdata_2025-06.parquet")

from datasets import load_dataset

# Load directly from the HF Dataset Hub
dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="train")
# or load the separate splits if the dataset has train/validation/test splits
train_dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="train")
valid_dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="validation")
test_dataset  = load_dataset("Daniela-C/Yellow_tripdata_2025", split="test")

from datasets import Dataset

df = load_data()

st.write("Sample of Your Data:")
st.dataframe(df.head())

# Visualize numeric column if it exists
numeric_cols = df.select_dtypes(include='number').columns
if len(numeric_cols) > 0:
    selected_col = st.selectbox("Choose a numeric column to plot:", numeric_cols)
    st.write(f"### Histogram for `{selected_col}`")
    fig, ax = plt.subplots()
    df[selected_col].hist(bins=30, ax=ax)
    st.pyplot(fig)
else:
    st.warning("No numeric columns found in your dataset.")