Commit ·
1c0e2b4
1
Parent(s): f4dcb91
set default model loading to JIT
Browse files- gradio_server.py +6 -5
gradio_server.py
CHANGED
|
@@ -492,7 +492,7 @@ if not Path(server_config_filename).is_file():
|
|
| 492 |
"boost" : 1,
|
| 493 |
"vae_config": 0,
|
| 494 |
"profile" : profile_type.LowRAM_LowVRAM,
|
| 495 |
-
"reload_model":
|
| 496 |
|
| 497 |
with open(server_config_filename, "w", encoding="utf-8") as writer:
|
| 498 |
writer.write(json.dumps(server_config))
|
|
@@ -854,8 +854,8 @@ def load_models(i2v):
|
|
| 854 |
kwargs = { "extraModelsToQuantize": None}
|
| 855 |
if profile == 2 or profile == 4:
|
| 856 |
kwargs["budgets"] = { "transformer" : 100 if preload == 0 else preload, "text_encoder" : 100, "*" : 1000 }
|
| 857 |
-
if profile == 4:
|
| 858 |
-
|
| 859 |
elif profile == 3:
|
| 860 |
kwargs["budgets"] = { "*" : "70%" }
|
| 861 |
offloadobj = offload.profile(pipe, profile_no= profile, compile = compile, quantizeTransformer = quantizeTransformer, loras = "transformer", **kwargs)
|
|
@@ -867,6 +867,7 @@ if check_loras:
|
|
| 867 |
setup_loras(use_image2video, transformer, get_lora_dir(use_image2video), "", None)
|
| 868 |
exit()
|
| 869 |
del transformer
|
|
|
|
| 870 |
gen_in_progress = False
|
| 871 |
|
| 872 |
def get_auto_attention():
|
|
@@ -2247,7 +2248,7 @@ def generate_configuration_tab():
|
|
| 2247 |
("When changing tabs", 1),
|
| 2248 |
("When pressing generate", 2),
|
| 2249 |
],
|
| 2250 |
-
value=server_config.get("reload_model",
|
| 2251 |
label="Reload model"
|
| 2252 |
)
|
| 2253 |
msg = gr.Markdown()
|
|
@@ -2289,7 +2290,7 @@ def on_tab_select(t2v_state, i2v_state, evt: gr.SelectData):
|
|
| 2289 |
new_i2v = evt.index == 1
|
| 2290 |
use_image2video = new_i2v
|
| 2291 |
|
| 2292 |
-
if(server_config.get("reload_model",
|
| 2293 |
global wan_model, offloadobj
|
| 2294 |
if wan_model is not None:
|
| 2295 |
if offloadobj is not None:
|
|
|
|
| 492 |
"boost" : 1,
|
| 493 |
"vae_config": 0,
|
| 494 |
"profile" : profile_type.LowRAM_LowVRAM,
|
| 495 |
+
"reload_model": 2 }
|
| 496 |
|
| 497 |
with open(server_config_filename, "w", encoding="utf-8") as writer:
|
| 498 |
writer.write(json.dumps(server_config))
|
|
|
|
| 854 |
kwargs = { "extraModelsToQuantize": None}
|
| 855 |
if profile == 2 or profile == 4:
|
| 856 |
kwargs["budgets"] = { "transformer" : 100 if preload == 0 else preload, "text_encoder" : 100, "*" : 1000 }
|
| 857 |
+
# if profile == 4:
|
| 858 |
+
# kwargs["partialPinning"] = True
|
| 859 |
elif profile == 3:
|
| 860 |
kwargs["budgets"] = { "*" : "70%" }
|
| 861 |
offloadobj = offload.profile(pipe, profile_no= profile, compile = compile, quantizeTransformer = quantizeTransformer, loras = "transformer", **kwargs)
|
|
|
|
| 867 |
setup_loras(use_image2video, transformer, get_lora_dir(use_image2video), "", None)
|
| 868 |
exit()
|
| 869 |
del transformer
|
| 870 |
+
|
| 871 |
gen_in_progress = False
|
| 872 |
|
| 873 |
def get_auto_attention():
|
|
|
|
| 2248 |
("When changing tabs", 1),
|
| 2249 |
("When pressing generate", 2),
|
| 2250 |
],
|
| 2251 |
+
value=server_config.get("reload_model",2),
|
| 2252 |
label="Reload model"
|
| 2253 |
)
|
| 2254 |
msg = gr.Markdown()
|
|
|
|
| 2290 |
new_i2v = evt.index == 1
|
| 2291 |
use_image2video = new_i2v
|
| 2292 |
|
| 2293 |
+
if(server_config.get("reload_model",2) == 1):
|
| 2294 |
global wan_model, offloadobj
|
| 2295 |
if wan_model is not None:
|
| 2296 |
if offloadobj is not None:
|