How to use inpainting/outpainting model with diffusers pipeline

gwm_outpainting's direction for use:

Step1. installing 🤗 Diffusers

follow the readme file to install diffusers: https://github.com/huggingface/diffusers?tab=readme-ov-file#installation

Step2. download inpainting&outpainting model

We uploaded a outpainting unet model to: https://civitai.com/models/275269/gwmoutpainting. Because the full model is too large, we only uploaded the unet part. Infering requires downloading both the diffusers sdxl-inpainting model and our outpainting unet model.

Attention：The outpainting unet model path should contain gwmOutpainting_v10.safetensors(need rename it to diffusion_pytorch_model.safetensors) and config.json(due to civitai don't accept json files, just use the config.json in unet folder of diffusers/stable-diffusion-xl-1.0-inpainting-0.1)

Step3. prepare data

In this example, we use an image of size 1360×768 generated using model gwmbasemodelv1

Step4. generate images with diffusers pipeline

use the from_pretrained method to load diffusers/stable-diffusion-xl-1.0-inpainting-0.1 model. And then, use UNet2DConditionModel.from_pretrained method to replace the unet in with gwm-outpainting model.

inference.py:

# diffusers inpainting pipeline for outpainting
import argparse
from omegaconf import OmegaConf
opt_parser = argparse.ArgumentParser()
opt_parser.add_argument(
    '--cfg_path', 
    type=str, 
    default="./infer-config.yaml"
    )
opt = opt_parser.parse_args()
cfg = OmegaConf.load(opt.cfg_path)

import os
import cv2
import numpy as np
from diffusers import (
    UNet2DConditionModel,
    DPMSolverMultistepScheduler, 
    StableDiffusionXLInpaintPipeline,
    )
import torch
from torchvision.utils import save_image

device = "cuda" if torch.cuda.is_available() else "cpu"

pipeline = StableDiffusionXLInpaintPipeline.from_pretrained(
    cfg.pretrained_model_name_or_path,
    torch_dtype=torch.float16, 
    ).to(device)
pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
    pipeline.scheduler.config,
    use_karras_sigmas=True,
    algorithm_type='sde-dpmsolver++'
    )
# replace the unet in with gwm-outpainting model
pipeline.unet = UNet2DConditionModel.from_pretrained(
    cfg.outpainting_unet_path, 
    subfolder=None,
    torch_dtype=torch.float16, 
    ).to(device)

file_path = cfg.val_data
prompt = cfg.prompt    
for seed in cfg.seeds:
    generator = torch.Generator(device="cuda").manual_seed(int(seed))
    # run inference
    img = cv2.imread(file_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.normalize(img, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    h, w, _ = img.shape
    #############################################################
    # expand_img
    c_W, c_H= int(cfg.target_resolution[0]), int(cfg.target_resolution[1])
    if c_W % 8 != 0 or c_H % 8 != 0:
        raise ValueError(f"`target height` and `target width` have to be divisible by 8 but are {c_H} and {c_W}.")
    delta_h = (c_H - h)//2
    delta_w = (c_W - w)//2
    if delta_h < 0 or delta_w < 0:
        raise ValueError(f"the input image size should be smaller than the target image size.")
    expand_img = np.ones((c_H, c_W, 3))
    expand_img[delta_h:delta_h+h, delta_w:delta_w+w, :] = img
    #msk
    mask = np.ones((c_H, c_W), dtype=np.uint8)*255
    mask[delta_h:delta_h+h, delta_w:delta_w+w] = 0

    pipeline_args = {
        "prompt": prompt, 
        # "negative_prompt": cfg.neg_prompt,
        "image":expand_img,
        "mask_image": mask,
        "height":c_H,
        "width": c_W,
        "strength": cfg.strength,
        "guidance_scale": cfg.guidance_scale,
        "num_images_per_prompt": cfg.num_images_per_prompt,
        "num_inference_steps":cfg.num_inference_steps,
        }
    with torch.cuda.amp.autocast():
        image = pipeline(
            **pipeline_args, 
            generator=generator, 
            output_type='pt'
            ).images[0]
        if not os.path.exists(cfg.output_dir):
            os.makedirs(cfg.output_dir)
        save_image(image, f'{cfg.output_dir}/{cfg.name}-result-gwm.png')

infer-config.yaml:

# model
pretrained_model_name_or_path: " "  # diffusers-sdxl-inpainting model path
outpainting_unet_path: " "  # gwm-outpainting unet model path
# path
val_data: "./xxx.png"
output_dir: "./"
# setting
name: "sdxl-gwm-outpainting"
prompt: "the breathtaking view of a river. The river meanders through the landscape, surrounded by towering karst mountains that rise majestically against the overcast sky."
strength: 1.0
guidance_scale: 3
seeds: 
  - 23425362
target_resolution: 
  - 4080
  - 768
num_images_per_prompt: 1
num_inference_steps: 30

Step5. Get result

Have fun with this model. If you like this work, don't forget add ❤️ to us . Thank U❤️❤️❤️