Senoox commited on
Commit
65881eb
·
verified ·
1 Parent(s): 86cb39b

feat: enable real AI image/video generation pipelines + ZeroGPU-ready app

Browse files
Files changed (2) hide show
  1. app.py +125 -64
  2. requirements.txt +10 -5
app.py CHANGED
@@ -1,78 +1,139 @@
1
- from datetime import datetime
 
 
2
 
3
  import gradio as gr
4
- from PIL import Image, ImageDraw
 
5
 
6
  try:
7
  import spaces
8
  except Exception:
9
  spaces = None
10
 
11
- TITLE = "Qwen Image Edit Rapid Aio"
12
- MODEL_HINT = "qwen-image-edit-rapid-aio"
13
- MODE = "image"
14
-
15
-
16
- def _render_image(prompt: str) -> Image.Image:
17
- prompt = (prompt or "").strip() or "test"
18
- img = Image.new("RGB", (1024, 1024), color=(20, 24, 32))
19
- d = ImageDraw.Draw(img)
20
- lines = [
21
- f"{TITLE} - smoke test",
22
- f"model: {MODEL_HINT}",
23
- f"mode: {MODE}",
24
- f"time: {datetime.utcnow().isoformat()}Z",
25
- "",
26
- f"prompt: {prompt[:300]}",
27
- ]
28
- y = 40
29
- for ln in lines:
30
- d.text((40, y), ln, fill=(235, 235, 235))
31
- y += 52
32
- return img
33
-
34
-
35
- def _render_video(prompt: str) -> str:
36
- import imageio.v2 as imageio
37
- import numpy as np
38
- out_path = "/tmp/out.mp4"
39
- frames = []
40
- w, h = 640, 360
41
- for i in range(24):
42
- arr = np.zeros((h, w, 3), dtype=np.uint8)
43
- arr[:, :, 0] = (30 + i * 3) % 255
44
- arr[:, :, 1] = (60 + i * 5) % 255
45
- arr[:, :, 2] = (90 + i * 7) % 255
46
- frames.append(arr)
47
- imageio.mimsave(out_path, frames, fps=12, codec="libx264")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  return out_path
49
 
50
 
51
- def _infer(prompt: str):
52
- if MODE == "video":
53
- return _render_video(prompt), f"✅ {TITLE} OK (CPU-safe)."
54
- return _render_image(prompt), f"✅ {TITLE} OK (CPU-safe)."
55
-
56
-
57
- if spaces is not None:
58
- infer = spaces.GPU(duration=60)(_infer)
59
- else:
60
- infer = _infer
61
-
62
-
63
- with gr.Blocks(title=TITLE) as demo:
64
- gr.Markdown(f"""# {TITLE}
65
-
66
- Space opérationnel en mode **{MODE}**.
67
-
68
- - Compatible ZeroGPU et CPU
69
- - Génération de test locale
70
- """)
71
- inp = gr.Textbox(label="Prompt", placeholder="Décris ce que tu veux générer...")
72
- run = gr.Button("Générer")
73
- media = gr.Video(label="Sortie") if MODE == "video" else gr.Image(label="Sortie")
74
- status = gr.Textbox(label="Statut")
75
- run.click(infer, inputs=inp, outputs=[media, status])
 
 
 
 
 
 
76
 
77
  if __name__ == "__main__":
78
  demo.launch()
 
1
+ import os
2
+ import tempfile
3
+ from typing import Optional
4
 
5
  import gradio as gr
6
+ import torch
7
+ from PIL import Image
8
 
9
  try:
10
  import spaces
11
  except Exception:
12
  spaces = None
13
 
14
+ from diffusers import DiffusionPipeline
15
+ from diffusers.utils import export_to_video
16
+
17
+ SPACE_ID = os.getenv("SPACE_ID", "").lower()
18
+ IS_VIDEO_SPACE = any(k in SPACE_ID for k in ["hunyuanvideo", "wan-2-1"])
19
+
20
+ IMAGE_MODEL_ID = os.getenv("IMAGE_MODEL_ID", "black-forest-labs/FLUX.1-schnell")
21
+ VIDEO_MODEL_ID = os.getenv("VIDEO_MODEL_ID", "damo-vilab/text-to-video-ms-1.7b")
22
+
23
+ _image_pipe: Optional[DiffusionPipeline] = None
24
+ _video_pipe: Optional[DiffusionPipeline] = None
25
+
26
+
27
+ def _device_dtype():
28
+ if torch.cuda.is_available():
29
+ if torch.cuda.get_device_properties(0).major >= 8:
30
+ return "cuda", torch.bfloat16
31
+ return "cuda", torch.float16
32
+ return "cpu", torch.float32
33
+
34
+
35
+ def _load_image_pipe() -> DiffusionPipeline:
36
+ global _image_pipe
37
+ if _image_pipe is None:
38
+ device, dtype = _device_dtype()
39
+ _image_pipe = DiffusionPipeline.from_pretrained(IMAGE_MODEL_ID, torch_dtype=dtype)
40
+ if device == "cuda":
41
+ _image_pipe.enable_model_cpu_offload()
42
+ else:
43
+ _image_pipe.to("cpu")
44
+ return _image_pipe
45
+
46
+
47
+ def _load_video_pipe() -> DiffusionPipeline:
48
+ global _video_pipe
49
+ if _video_pipe is None:
50
+ device, _ = _device_dtype()
51
+ dtype = torch.float16 if device == "cuda" else torch.float32
52
+ _video_pipe = DiffusionPipeline.from_pretrained(VIDEO_MODEL_ID, torch_dtype=dtype)
53
+ if device == "cuda":
54
+ _video_pipe.enable_model_cpu_offload()
55
+ else:
56
+ _video_pipe.to("cpu")
57
+ return _video_pipe
58
+
59
+
60
+ def _gpu_decorator(seconds: int):
61
+ if spaces is not None:
62
+ return spaces.GPU(duration=seconds)
63
+
64
+ def _wrap(fn):
65
+ return fn
66
+
67
+ return _wrap
68
+
69
+
70
+ @_gpu_decorator(120)
71
+ def generate_image(prompt: str, steps: int, guidance_scale: float, seed: int):
72
+ prompt = (prompt or "").strip() or "A cinematic photo of a woman on a beach at sunset"
73
+ pipe = _load_image_pipe()
74
+
75
+ gen = torch.Generator(device="cpu").manual_seed(int(seed))
76
+ image: Image.Image = pipe(
77
+ prompt=prompt,
78
+ num_inference_steps=int(steps),
79
+ guidance_scale=float(guidance_scale),
80
+ generator=gen,
81
+ width=1024,
82
+ height=1024,
83
+ ).images[0]
84
+ return image
85
+
86
+
87
+ @_gpu_decorator(240)
88
+ def generate_video(prompt: str, steps: int, fps: int, num_frames: int, seed: int):
89
+ prompt = (prompt or "").strip() or "A woman walking on a sunny beach, cinematic shot"
90
+ pipe = _load_video_pipe()
91
+
92
+ gen = torch.Generator(device="cpu").manual_seed(int(seed))
93
+ result = pipe(
94
+ prompt,
95
+ num_inference_steps=int(steps),
96
+ num_frames=int(num_frames),
97
+ generator=gen,
98
+ )
99
+ frames = result.frames[0]
100
+
101
+ out_path = os.path.join(tempfile.gettempdir(), "generated_video.mp4")
102
+ export_to_video(frames, out_path, fps=int(fps))
103
  return out_path
104
 
105
 
106
+ def build_ui():
107
+ title = os.getenv("SPACE_TITLE", SPACE_ID.split("/")[-1].replace("-", " ").title() or "AI Generator")
108
+
109
+ if IS_VIDEO_SPACE:
110
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
111
+ gr.Markdown(f"## {title} — AI Video Generation")
112
+ prompt = gr.Textbox(label="Prompt", value="A woman walking on a sunny beach, cinematic shot")
113
+ with gr.Row():
114
+ steps = gr.Slider(8, 40, value=20, step=1, label="Inference steps")
115
+ num_frames = gr.Slider(8, 32, value=16, step=1, label="Frames")
116
+ fps = gr.Slider(4, 16, value=8, step=1, label="FPS")
117
+ seed = gr.Number(value=42, precision=0, label="Seed")
118
+ out = gr.Video(label="Generated video")
119
+ btn = gr.Button("Generate")
120
+ btn.click(generate_video, [prompt, steps, fps, num_frames, seed], [out])
121
+ return demo
122
+
123
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
124
+ gr.Markdown(f"## {title} AI Image Generation")
125
+ prompt = gr.Textbox(label="Prompt", value="A cinematic photo of a woman on a beach at sunset")
126
+ with gr.Row():
127
+ steps = gr.Slider(4, 40, value=20, step=1, label="Inference steps")
128
+ guidance = gr.Slider(1.0, 10.0, value=3.5, step=0.1, label="Guidance scale")
129
+ seed = gr.Number(value=42, precision=0, label="Seed")
130
+ out = gr.Image(type="pil", label="Generated image")
131
+ btn = gr.Button("Generate")
132
+ btn.click(generate_image, [prompt, steps, guidance, seed], [out])
133
+ return demo
134
+
135
+
136
+ demo = build_ui()
137
 
138
  if __name__ == "__main__":
139
  demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,10 @@
1
- gradio>=6.9.0
2
- spaces>=0.37.0
3
- pillow>=10
4
- numpy>=1.26
5
- imageio[ffmpeg]>=2.34
 
 
 
 
 
 
1
+ gradio>=5.0.0
2
+ torch>=2.2.0
3
+ diffusers>=0.32.0
4
+ transformers>=4.44.0
5
+ accelerate>=0.34.0
6
+ safetensors>=0.4.5
7
+ sentencepiece>=0.2.0
8
+ protobuf>=4.25.0
9
+ imageio>=2.34.0
10
+ imageio-ffmpeg>=0.5.1