2.1.844

* maintain clip vision device * update links in troubleshoot
mashb1t · Dec 16, 2023 · 3a727fd · 3a727fd
1 parent 776e080
commit 3a727fd
Show file tree

Hide file tree

Showing 10 changed files with 113 additions and 77 deletions.
diff --git a/extras/ip_adapter.py b/extras/ip_adapter.py
@@ -167,14 +167,7 @@ def preprocess(img, ip_adapter_path):
 
     ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher)
     pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
-
-    if clip_vision.dtype != torch.float32:
-        precision_scope = torch.autocast
-    else:
-        precision_scope = lambda a, b: contextlib.nullcontext(a)
-
-    with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
-        outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2)
+    outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2)
 
     ip_adapter = entry['ip_adapter']
     ip_layers = entry['ip_layers']

diff --git a/fooocus_version.py b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.843'
+version = '2.1.844'
diff --git a/ldm_patched/contrib/external.py b/ldm_patched/contrib/external.py
@@ -1870,6 +1870,7 @@ def init_custom_nodes():
         "nodes_images.py",
         "nodes_video_model.py",
         "nodes_sag.py",
+        "nodes_perpneg.py",
     ]
 
     for node_file in extras_files:

diff --git a/ldm_patched/contrib/external_latent.py b/ldm_patched/contrib/external_latent.py
@@ -5,9 +5,7 @@
 
 def reshape_latent_to(target_shape, latent):
     if latent.shape[1:] != target_shape[1:]:
-        latent.movedim(1, -1)
         latent = ldm_patched.modules.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center")
-        latent.movedim(-1, 1)
     return ldm_patched.modules.utils.repeat_to_batch_size(latent, target_shape[0])
 
 
@@ -104,9 +102,32 @@ def op(self, samples1, samples2, ratio):
         samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio))
         return (samples_out,)
 
+class LatentBatch:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}}
+
+    RETURN_TYPES = ("LATENT",)
+    FUNCTION = "batch"
+
+    CATEGORY = "latent/batch"
+
+    def batch(self, samples1, samples2):
+        samples_out = samples1.copy()
+        s1 = samples1["samples"]
+        s2 = samples2["samples"]
+
+        if s1.shape[1:] != s2.shape[1:]:
+            s2 = ldm_patched.modules.utils.common_upscale(s2, s1.shape[3], s1.shape[2], "bilinear", "center")
+        s = torch.cat((s1, s2), dim=0)
+        samples_out["samples"] = s
+        samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])])
+        return (samples_out,)
+
 NODE_CLASS_MAPPINGS = {
     "LatentAdd": LatentAdd,
     "LatentSubtract": LatentSubtract,
     "LatentMultiply": LatentMultiply,
     "LatentInterpolate": LatentInterpolate,
+    "LatentBatch": LatentBatch,
 }
diff --git a/ldm_patched/contrib/external_model_advanced.py b/ldm_patched/contrib/external_model_advanced.py
@@ -19,41 +19,19 @@ def calculate_denoised(self, sigma, model_output, model_input):
 
         return c_out * x0 + c_skip * model_input
 
-class ModelSamplingDiscreteDistilled(torch.nn.Module):
+class ModelSamplingDiscreteDistilled(ldm_patched.modules.model_sampling.ModelSamplingDiscrete):
     original_timesteps = 50
 
-    def __init__(self):
-        super().__init__()
-        self.sigma_data = 1.0
-        timesteps = 1000
-        beta_start = 0.00085
-        beta_end = 0.012
+    def __init__(self, model_config=None):
+        super().__init__(model_config)
 
-        betas = torch.linspace(beta_start**0.5, beta_end**0.5, timesteps, dtype=torch.float32) ** 2
-        alphas = 1.0 - betas
-        alphas_cumprod = torch.cumprod(alphas, dim=0)
+        self.skip_steps = self.num_timesteps // self.original_timesteps
 
-        self.skip_steps = timesteps // self.original_timesteps
-
-
-        alphas_cumprod_valid = torch.zeros((self.original_timesteps), dtype=torch.float32)
+        sigmas_valid = torch.zeros((self.original_timesteps), dtype=torch.float32)
         for x in range(self.original_timesteps):
-            alphas_cumprod_valid[self.original_timesteps - 1 - x] = alphas_cumprod[timesteps - 1 - x * self.skip_steps]
-
-        sigmas = ((1 - alphas_cumprod_valid) / alphas_cumprod_valid) ** 0.5
-        self.set_sigmas(sigmas)
+            sigmas_valid[self.original_timesteps - 1 - x] = self.sigmas[self.num_timesteps - 1 - x * self.skip_steps]
 
-    def set_sigmas(self, sigmas):
-        self.register_buffer('sigmas', sigmas)
-        self.register_buffer('log_sigmas', sigmas.log())
-
-    @property
-    def sigma_min(self):
-        return self.sigmas[0]
-
-    @property
-    def sigma_max(self):
-        return self.sigmas[-1]
+        self.set_sigmas(sigmas_valid)
 
     def timestep(self, sigma):
         log_sigma = sigma.log()
@@ -68,14 +46,6 @@ def sigma(self, timestep):
         log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx]
         return log_sigma.exp().to(timestep.device)
 
-    def percent_to_sigma(self, percent):
-        if percent <= 0.0:
-            return 999999999.9
-        if percent >= 1.0:
-            return 0.0
-        percent = 1.0 - percent
-        return self.sigma(torch.tensor(percent * 999.0)).item()
-
 
 def rescale_zero_terminal_snr_sigmas(sigmas):
     alphas_cumprod = 1 / ((sigmas * sigmas) + 1)
@@ -124,7 +94,7 @@ def patch(self, model, sampling, zsnr):
         class ModelSamplingAdvanced(sampling_base, sampling_type):
             pass
 
-        model_sampling = ModelSamplingAdvanced()
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
         if zsnr:
             model_sampling.set_sigmas(rescale_zero_terminal_snr_sigmas(model_sampling.sigmas))
 
@@ -156,7 +126,7 @@ def patch(self, model, sampling, sigma_max, sigma_min):
         class ModelSamplingAdvanced(ldm_patched.modules.model_sampling.ModelSamplingContinuousEDM, sampling_type):
             pass
 
-        model_sampling = ModelSamplingAdvanced()
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
         model_sampling.set_sigma_range(sigma_min, sigma_max)
         m.add_object_patch("model_sampling", model_sampling)
         return (m, )

diff --git a/ldm_patched/contrib/external_perpneg.py b/ldm_patched/contrib/external_perpneg.py
@@ -0,0 +1,57 @@
+# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
+
+import torch
+import ldm_patched.modules.model_management
+import ldm_patched.modules.sample
+import ldm_patched.modules.samplers
+import ldm_patched.modules.utils
+
+
+class PerpNeg:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"model": ("MODEL", ),
+                             "empty_conditioning": ("CONDITIONING", ),
+                             "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}),
+                            }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "patch"
+
+    CATEGORY = "_for_testing"
+
+    def patch(self, model, empty_conditioning, neg_scale):
+        m = model.clone()
+        nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning)
+
+        def cfg_function(args):
+            model = args["model"]
+            noise_pred_pos = args["cond_denoised"]
+            noise_pred_neg = args["uncond_denoised"]
+            cond_scale = args["cond_scale"]
+            x = args["input"]
+            sigma = args["sigma"]
+            model_options = args["model_options"]
+            nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative")
+
+            (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options)
+
+            pos = noise_pred_pos - noise_pred_nocond
+            neg = noise_pred_neg - noise_pred_nocond
+            perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg
+            perp_neg = perp * neg_scale
+            cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg)
+            cfg_result = x - cfg_result
+            return cfg_result
+
+        m.set_model_sampler_cfg_function(cfg_function)
+
+        return (m, )
+
+
+NODE_CLASS_MAPPINGS = {
+    "PerpNeg": PerpNeg,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "PerpNeg": "Perp-Neg",
+}
diff --git a/ldm_patched/contrib/external_sag.py b/ldm_patched/contrib/external_sag.py
@@ -60,7 +60,7 @@ def create_blur_map(x0, attn, sigma=3.0, threshold=1.0):
     attn = attn.reshape(b, -1, hw1, hw2)
     # Global Average Pool
     mask = attn.mean(1, keepdim=False).sum(1, keepdim=False) > threshold
-    ratio = round(math.sqrt(lh * lw / hw1))
+    ratio = math.ceil(math.sqrt(lh * lw / hw1))
     mid_shape = [math.ceil(lh / ratio), math.ceil(lw / ratio)]
 
     # Reshape

diff --git a/ldm_patched/modules/clip_vision.py b/ldm_patched/modules/clip_vision.py
@@ -19,11 +19,13 @@ def __setitem__(self, key, item):
 def clip_preprocess(image, size=224):
     mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype)
     std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype)
-    scale = (size / min(image.shape[1], image.shape[2]))
-    image = torch.nn.functional.interpolate(image.movedim(-1, 1), size=(round(scale * image.shape[1]), round(scale * image.shape[2])), mode="bicubic", antialias=True)
-    h = (image.shape[2] - size)//2
-    w = (image.shape[3] - size)//2
-    image = image[:,:,h:h+size,w:w+size]
+    image = image.movedim(-1, 1)
+    if not (image.shape[2] == size and image.shape[3] == size):
+        scale = (size / min(image.shape[2], image.shape[3]))
+        image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True)
+        h = (image.shape[2] - size)//2
+        w = (image.shape[3] - size)//2
+        image = image[:,:,h:h+size,w:w+size]
     image = torch.clip((255. * image), 0, 255).round() / 255.0
     return (image - mean.view([3,1,1])) / std.view([3,1,1])
 
@@ -34,27 +36,18 @@ def __init__(self, json_config):
 
         self.load_device = ldm_patched.modules.model_management.text_encoder_device()
         offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
-        self.dtype = torch.float32
-        if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
-            self.dtype = torch.float16
-
-        self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.disable_weight_init)
+        self.dtype = ldm_patched.modules.model_management.text_encoder_dtype(self.load_device)
+        self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.manual_cast)
+        self.model.eval()
 
         self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
     def load_sd(self, sd):
         return self.model.load_state_dict(sd, strict=False)
 
     def encode_image(self, image):
         ldm_patched.modules.model_management.load_model_gpu(self.patcher)
-        pixel_values = clip_preprocess(image.to(self.load_device))
-
-        if self.dtype != torch.float32:
-            precision_scope = torch.autocast
-        else:
-            precision_scope = lambda a, b: contextlib.nullcontext(a)
-
-        with precision_scope(ldm_patched.modules.model_management.get_autocast_device(self.load_device), torch.float32):
-            out = self.model(pixel_values=pixel_values, intermediate_output=-2)
+        pixel_values = clip_preprocess(image.to(self.load_device)).float()
+        out = self.model(pixel_values=pixel_values, intermediate_output=-2)
 
         outputs = Output()
         outputs["last_hidden_state"] = out[0].to(ldm_patched.modules.model_management.intermediate_device())

diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py
@@ -251,7 +251,8 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option
 
         cond_pred, uncond_pred = calc_cond_uncond_batch(model, cond, uncond_, x, timestep, model_options)
         if "sampler_cfg_function" in model_options:
-            args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep}
+            args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep,
+                    "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options}
             cfg_result = x - model_options["sampler_cfg_function"](args)
         else:
             cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale

diff --git a/troubleshoot.md b/troubleshoot.md
@@ -143,31 +143,31 @@ Besides, the current support for MAC is very experimental, and we encourage user
 
 ### I am using Nvidia with 8GB VRAM, I get CUDA Out Of Memory
 
-It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using Nvidia with 6GB VRAM, I get CUDA Out Of Memory
 
-It is very likely a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is very likely a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using Nvidia with 4GB VRAM with Float16 support, like RTX 3050, I get CUDA Out Of Memory
 
-It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using Nvidia with 4GB VRAM without Float16 support, like GTX 960, I get CUDA Out Of Memory
 
-Supporting GPU with 4GB VRAM without fp16 is extremely difficult, and you may not be able to use SDXL. However, you may still make an issue and let us know. You may try SD1.5 in Automatic1111 or other software for your device. See also [minimal requirements](readme.md#minimal-requirement).
+Supporting GPU with 4GB VRAM without fp16 is extremely difficult, and you may not be able to use SDXL. However, you may still make an issue and let us know. You may try SD1.5 in Automatic1111 or other software for your device. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using AMD GPU on Windows, I get CUDA Out Of Memory
 
 Current AMD support is very experimental for Windows. If you see this, then perhaps you cannot use Fooocus on this device on Windows.
 
 However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help.
 
-Besides, the AMD support on Linux is slightly better because it will use ROCM. You may also try it if you are willing to change OS to linux. See also [minimal requirements](readme.md#minimal-requirement).
+Besides, the AMD support on Linux is slightly better because it will use ROCM. You may also try it if you are willing to change OS to linux. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using AMD GPU on Linux, I get CUDA Out Of Memory
 
-Current AMD support for Linux is better than that for Windows, but still, very experimental. However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. See also [minimal requirements](readme.md#minimal-requirement).
+Current AMD support for Linux is better than that for Windows, but still, very experimental. However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I tried flags like --lowvram or --gpu-only or --bf16 or so on, and things are not getting any better?