在 L20 48G 上 训练了超过70个GPU小时
我想从零开始训练一个动漫通用模型,这个模型是我积累经验的产物。
使用10K图片,从Animagine-xl-3.0-base 上训练微调

在训练中使用高品质数据集和质量词进行微调。
每一张图片都具有质量词。
质量词分别是:
best,hight,amazing,
但遗憾的是,他们并不生效
--
Training model: Animagine-xl-3.0-base
Training resolution: 1024*1024
Recommended output resolution: 1024
特别鸣谢:算力赞助 nieta.art / GPU Sponsor: Neta
Discord : https://discord.com/invite/CFpyHceF52
Recommended dataset collection and processing tools: https://github.com/deepghs/waifuc
Possibly the most user-friendly dataset processing UI currently in use:
https://github.com/Eugeoter/waifuset
在训练参数上,此模型大部分参考了Animagine XL 3.0的训练配置
Training configuration
[sdxl_arguments]
cache_text_encoder_outputs = false
no_half_vae = false
min_timestep = 0
max_timestep = 1000
[model_arguments]
pretrained_model_name_or_path = "/root/autodl-tmp/stable-diffusion-webui/models/Stable-diffusion/animagine-xl-3.0-base.safetensors"
vae = "/root/vae/sdxl_vae.safetensors"
[dataset_arguments]
shuffle_caption = true
debug_dataset = false
train_data_dir = "/root/autodl-tmp/train/quany"
dataset_repeats = 1
keep_tokens_separator = "|||"
resolution = "1024, 1024"
caption_dropout_rate = 0
caption_tag_dropout_rate = 0
caption_dropout_every_n_epochs = 0
token_warmup_min = 1
token_warmup_step = 0
enable_bucket = true
min_bucket_reso=512
max_bucket_reso=2048
bucket_reso_steps=64
cache_latents = true
cache_latents_to_disk = true
[training_arguments]
output_dir = "/root/autodl-tmp/stable-diffusion-webui/models/Stable-diffusion/quany"
output_name = "composition"
save_precision = "fp16"
train_batch_size=8
vae_batch_size=4
max_train_epochs=20
save_every_n_epochs=2
max_token_length = 225
mem_eff_attn = false
xformers = true
sdpa = false
max_data_loader_n_workers = 8
persistent_data_loader_workers = true
gradient_checkpointing = true
gradient_accumulation_steps = 1
mixed_precision = "fp16"
[sample_prompt_arguments]
sample_every_n_steps = 200
sample_sampler = "euler_a"
sample_prompts="/root/example.txt"
[saving_arguments]
save_model_as = "safetensors"
[optimizer_arguments]
optimizer_type = "AdaFactor"
learning_rate = 7.5e-7
train_text_encoder = true
learning_rate_te1 = 3.75e-7
learning_rate_te2 = 3.75e-7
optimizer_args = [ "scale_parameter=False", "relative_step=False", "warmup_init=False",]
lr_scheduler = "constant_with_warmup"
lr_warmup_steps = 100
max_grad_norm = 0