import os import psycopg2 from dotenv import load_dotenv def setup_database(): load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env')) url = os.environ.get("SUPABASE_DB_URL") if not url: print("SUPABASE_DB_URL not set in .env") return print(f"Connecting to database...") schema_sql = """ CREATE TABLE IF NOT EXISTS sense_file_registry ( id BIGSERIAL PRIMARY KEY, uid TEXT NOT NULL, connector_type TEXT NOT NULL, file_id TEXT NOT NULL, file_name TEXT NOT NULL, is_folder BOOLEAN DEFAULT FALSE, mime_type TEXT, file_size_bytes BIGINT, parent_folder_id TEXT, full_path TEXT, first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), metadata JSONB DEFAULT '{}'::jsonb, UNIQUE (uid, connector_type, file_id) ); CREATE TABLE IF NOT EXISTS sense_file_scan_state ( id BIGSERIAL PRIMARY KEY, uid TEXT NOT NULL, connector_type TEXT NOT NULL, file_id TEXT NOT NULL, classification TEXT NOT NULL DEFAULT 'unscanned', scan_type TEXT, last_scanned_at TIMESTAMPTZ, scan_session_id BIGINT, UNIQUE (uid, connector_type, file_id) ); DO $$ BEGIN IF EXISTS(SELECT * FROM information_schema.columns WHERE table_name='sense_file_scan_state' and column_name='scan_status') THEN ALTER TABLE "public"."sense_file_scan_state" RENAME COLUMN "scan_status" TO "classification"; END IF; END $$; CREATE TABLE IF NOT EXISTS sense_scan_sessions ( id BIGSERIAL PRIMARY KEY, uid TEXT NOT NULL, connector_type TEXT NOT NULL, scan_type TEXT NOT NULL, triggered_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), files_scanned INTEGER DEFAULT 0, total_pii_found INTEGER DEFAULT 0, status TEXT DEFAULT 'running' ); -- Compound indexes for performance (Recommended) CREATE INDEX IF NOT EXISTS idx_sense_file_registry_uid_conn ON sense_file_registry(uid, connector_type); CREATE INDEX IF NOT EXISTS idx_sense_file_scan_state_uid_conn ON sense_file_scan_state(uid, connector_type); CREATE INDEX IF NOT EXISTS idx_sense_scan_sessions_uid_conn ON sense_scan_sessions(uid, connector_type); """ try: conn = psycopg2.connect(url) cur = conn.cursor() cur.execute(schema_sql) conn.commit() # Migration statements — safe to run on existing deployments cur.execute("ALTER TABLE sense_file_registry ADD COLUMN IF NOT EXISTS metadata JSONB DEFAULT '{}'::jsonb;") cur.execute("ALTER TABLE sense_file_scan_state DROP COLUMN IF EXISTS pii_count;") cur.execute("ALTER TABLE sense_file_scan_state DROP COLUMN IF EXISTS score;") conn.commit() cur.close() conn.close() print("Database schema created successfully.") except Exception as e: print(f"Error setting up database: {e}") if __name__ == "__main__": setup_database()