Spaces:

Senoox
/

qwen-image-edit-rapid-aio

Running on Zero

App Files Files Community

Senoox commited on Mar 10

Commit

65881eb

verified ·

1 Parent(s): 86cb39b

feat: enable real AI image/video generation pipelines + ZeroGPU-ready app

Browse files

Files changed (2) hide show

app.py +125 -64
requirements.txt +10 -5

app.py CHANGED Viewed

@@ -1,78 +1,139 @@
-from datetime import datetime
 import gradio as gr
-from PIL import Image, ImageDraw
 try:
     import spaces
 except Exception:
     spaces = None
-TITLE = "Qwen Image Edit Rapid Aio"
-MODEL_HINT = "qwen-image-edit-rapid-aio"
-MODE = "image"
-def _render_image(prompt: str) -> Image.Image:
-    prompt = (prompt or "").strip() or "test"
-    img = Image.new("RGB", (1024, 1024), color=(20, 24, 32))
-    d = ImageDraw.Draw(img)
-    lines = [
-        f"{TITLE} - smoke test",
-        f"model: {MODEL_HINT}",
-        f"mode: {MODE}",
-        f"time: {datetime.utcnow().isoformat()}Z",
-        "",
-        f"prompt: {prompt[:300]}",
-    ]
-    y = 40
-    for ln in lines:
-        d.text((40, y), ln, fill=(235, 235, 235))
-        y += 52
-    return img
-def _render_video(prompt: str) -> str:
-    import imageio.v2 as imageio
-    import numpy as np
-    out_path = "/tmp/out.mp4"
-    frames = []
-    w, h = 640, 360
-    for i in range(24):
-        arr = np.zeros((h, w, 3), dtype=np.uint8)
-        arr[:, :, 0] = (30 + i * 3) % 255
-        arr[:, :, 1] = (60 + i * 5) % 255
-        arr[:, :, 2] = (90 + i * 7) % 255
-        frames.append(arr)
-    imageio.mimsave(out_path, frames, fps=12, codec="libx264")
     return out_path
-def _infer(prompt: str):
-    if MODE == "video":
-        return _render_video(prompt), f"✅ {TITLE} OK (CPU-safe)."
-    return _render_image(prompt), f"✅ {TITLE} OK (CPU-safe)."
-if spaces is not None:
-    infer = spaces.GPU(duration=60)(_infer)
-else:
-    infer = _infer
-with gr.Blocks(title=TITLE) as demo:
-    gr.Markdown(f"""# {TITLE}
-Space opérationnel en mode **{MODE}**.
-- Compatible ZeroGPU et CPU
-- Génération de test locale
-""")
-    inp = gr.Textbox(label="Prompt", placeholder="Décris ce que tu veux générer...")
-    run = gr.Button("Générer")
-    media = gr.Video(label="Sortie") if MODE == "video" else gr.Image(label="Sortie")
-    status = gr.Textbox(label="Statut")
-    run.click(infer, inputs=inp, outputs=[media, status])
 if __name__ == "__main__":
     demo.launch()

+import os
+import tempfile
+from typing import Optional
 import gradio as gr
+import torch
+from PIL import Image
 try:
     import spaces
 except Exception:
     spaces = None
+from diffusers import DiffusionPipeline
+from diffusers.utils import export_to_video
+SPACE_ID = os.getenv("SPACE_ID", "").lower()
+IS_VIDEO_SPACE = any(k in SPACE_ID for k in ["hunyuanvideo", "wan-2-1"])
+IMAGE_MODEL_ID = os.getenv("IMAGE_MODEL_ID", "black-forest-labs/FLUX.1-schnell")
+VIDEO_MODEL_ID = os.getenv("VIDEO_MODEL_ID", "damo-vilab/text-to-video-ms-1.7b")
+_image_pipe: Optional[DiffusionPipeline] = None
+_video_pipe: Optional[DiffusionPipeline] = None
+def _device_dtype():
+    if torch.cuda.is_available():
+        if torch.cuda.get_device_properties(0).major >= 8:
+            return "cuda", torch.bfloat16
+        return "cuda", torch.float16
+    return "cpu", torch.float32
+def _load_image_pipe() -> DiffusionPipeline:
+    global _image_pipe
+    if _image_pipe is None:
+        device, dtype = _device_dtype()
+        _image_pipe = DiffusionPipeline.from_pretrained(IMAGE_MODEL_ID, torch_dtype=dtype)
+        if device == "cuda":
+            _image_pipe.enable_model_cpu_offload()
+        else:
+            _image_pipe.to("cpu")
+    return _image_pipe
+def _load_video_pipe() -> DiffusionPipeline:
+    global _video_pipe
+    if _video_pipe is None:
+        device, _ = _device_dtype()
+        dtype = torch.float16 if device == "cuda" else torch.float32
+        _video_pipe = DiffusionPipeline.from_pretrained(VIDEO_MODEL_ID, torch_dtype=dtype)
+        if device == "cuda":
+            _video_pipe.enable_model_cpu_offload()
+        else:
+            _video_pipe.to("cpu")
+    return _video_pipe
+def _gpu_decorator(seconds: int):
+    if spaces is not None:
+        return spaces.GPU(duration=seconds)
+    def _wrap(fn):
+        return fn
+    return _wrap
+@_gpu_decorator(120)
+def generate_image(prompt: str, steps: int, guidance_scale: float, seed: int):
+    prompt = (prompt or "").strip() or "A cinematic photo of a woman on a beach at sunset"
+    pipe = _load_image_pipe()
+    gen = torch.Generator(device="cpu").manual_seed(int(seed))
+    image: Image.Image = pipe(
+        prompt=prompt,
+        num_inference_steps=int(steps),
+        guidance_scale=float(guidance_scale),
+        generator=gen,
+        width=1024,
+        height=1024,
+    ).images[0]
+    return image
+@_gpu_decorator(240)
+def generate_video(prompt: str, steps: int, fps: int, num_frames: int, seed: int):
+    prompt = (prompt or "").strip() or "A woman walking on a sunny beach, cinematic shot"
+    pipe = _load_video_pipe()
+    gen = torch.Generator(device="cpu").manual_seed(int(seed))
+    result = pipe(
+        prompt,
+        num_inference_steps=int(steps),
+        num_frames=int(num_frames),
+        generator=gen,
+    )
+    frames = result.frames[0]
+    out_path = os.path.join(tempfile.gettempdir(), "generated_video.mp4")
+    export_to_video(frames, out_path, fps=int(fps))
     return out_path
+def build_ui():
+    title = os.getenv("SPACE_TITLE", SPACE_ID.split("/")[-1].replace("-", " ").title() or "AI Generator")
+    if IS_VIDEO_SPACE:
+        with gr.Blocks(theme=gr.themes.Soft()) as demo:
+            gr.Markdown(f"## {title} — AI Video Generation")
+            prompt = gr.Textbox(label="Prompt", value="A woman walking on a sunny beach, cinematic shot")
+            with gr.Row():
+                steps = gr.Slider(8, 40, value=20, step=1, label="Inference steps")
+                num_frames = gr.Slider(8, 32, value=16, step=1, label="Frames")
+                fps = gr.Slider(4, 16, value=8, step=1, label="FPS")
+                seed = gr.Number(value=42, precision=0, label="Seed")
+            out = gr.Video(label="Generated video")
+            btn = gr.Button("Generate")
+            btn.click(generate_video, [prompt, steps, fps, num_frames, seed], [out])
+        return demo
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown(f"## {title} — AI Image Generation")
+        prompt = gr.Textbox(label="Prompt", value="A cinematic photo of a woman on a beach at sunset")
+        with gr.Row():
+            steps = gr.Slider(4, 40, value=20, step=1, label="Inference steps")
+            guidance = gr.Slider(1.0, 10.0, value=3.5, step=0.1, label="Guidance scale")
+            seed = gr.Number(value=42, precision=0, label="Seed")
+        out = gr.Image(type="pil", label="Generated image")
+        btn = gr.Button("Generate")
+        btn.click(generate_image, [prompt, steps, guidance, seed], [out])
+    return demo
+demo = build_ui()
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,5 +1,10 @@
-gradio>=6.9.0
-spaces>=0.37.0
-pillow>=10
-numpy>=1.26
-imageio[ffmpeg]>=2.34

+gradio>=5.0.0
+torch>=2.2.0
+diffusers>=0.32.0
+transformers>=4.44.0
+accelerate>=0.34.0
+safetensors>=0.4.5
+sentencepiece>=0.2.0
+protobuf>=4.25.0
+imageio>=2.34.0
+imageio-ffmpeg>=0.5.1