Phase-Technologies commited on
Commit
d603f91
·
verified ·
1 Parent(s): b6b2f09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -7,12 +7,11 @@ import threading
7
  MODEL_NAME = "Xerv-AI/MAXWELL"
8
 
9
  print("Loading model on CPU... this may take a few minutes.")
10
- # We load in 8-bit or float16 because 4-bit (bitsandbytes) is GPU-only
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  MODEL_NAME,
14
  device_map="cpu",
15
- torch_dtype=torch.float32 # CPU requires float32 for stability
16
  )
17
 
18
  # --- 2. INFERENCE LOGIC ---
@@ -29,7 +28,7 @@ def stream_maxwell(message, history):
29
 
30
  gen_kwargs = dict(
31
  **inputs,
32
- max_new_tokens=512, # Reduced for CPU speed
33
  temperature=0.3,
34
  do_sample=True,
35
  streamer=streamer,
@@ -48,20 +47,21 @@ def stream_maxwell(message, history):
48
  display_text = display_text.replace("</reasoning>", "</i></details>\n\n")
49
  yield display_text
50
 
51
- # --- 3. UI DESIGN ---
52
  custom_css = """
53
- footer {visibility: hidden}
54
  .gradio-container {background-color: #121212 !important; color: white !important;}
55
  details { background: #1A1A1A; border-left: 2px solid #3b82f6; padding: 10px; margin: 10px 0; color: #A0A0A0; }
 
56
  """
57
 
58
- demo = gr.ChatInterface(
59
- fn=stream_maxwell,
60
- title="M. (CPU Mode)",
61
- description="The computational throne is currently on backup power (CPU). Expect slower response times.",
62
- css=custom_css,
63
- theme=gr.themes.Default(primary_hue="blue", neutral_hue="zinc")
64
- )
65
 
66
  if __name__ == "__main__":
67
  demo.queue().launch()
 
7
  MODEL_NAME = "Xerv-AI/MAXWELL"
8
 
9
  print("Loading model on CPU... this may take a few minutes.")
 
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
  model = AutoModelForCausalLM.from_pretrained(
12
  MODEL_NAME,
13
  device_map="cpu",
14
+ torch_dtype=torch.float32
15
  )
16
 
17
  # --- 2. INFERENCE LOGIC ---
 
28
 
29
  gen_kwargs = dict(
30
  **inputs,
31
+ max_new_tokens=512,
32
  temperature=0.3,
33
  do_sample=True,
34
  streamer=streamer,
 
47
  display_text = display_text.replace("</reasoning>", "</i></details>\n\n")
48
  yield display_text
49
 
50
+ # --- 3. UI DESIGN (Fixed for Gradio 4.0+) ---
51
  custom_css = """
52
+ footer {visibility: hidden !important;}
53
  .gradio-container {background-color: #121212 !important; color: white !important;}
54
  details { background: #1A1A1A; border-left: 2px solid #3b82f6; padding: 10px; margin: 10px 0; color: #A0A0A0; }
55
+ summary { cursor: pointer; color: #5c94ff; font-weight: bold; }
56
  """
57
 
58
+ # Wrap ChatInterface in Blocks to apply the CSS
59
+ with gr.Blocks(css=custom_css, theme=gr.themes.Default(primary_hue="blue", neutral_hue="zinc")) as demo:
60
+ gr.ChatInterface(
61
+ fn=stream_maxwell,
62
+ title="M. (CPU Mode)",
63
+ description="The computational throne is currently on backup power (CPU).",
64
+ )
65
 
66
  if __name__ == "__main__":
67
  demo.queue().launch()