Trained on Anima Preview 2
Assume that any lora trained on the preview version won't work well on the final version.
Recommended prompt structure:
Positive prompt (quality tags at the start of prompt):
masterpiece, best quality, very aesthetic, {{tags}}
Slightly updated dataset of 96 images, trained at 1024 x 1024 and 1536 x 1024 resolutions, previews are mostly generated at 1536 x 1024 or 1024 x 1536 .
Used diffusion-pipe - fork by @bluvoll
Config:
# Resolution settings.
resolutions = [[1280, 720], [1536, 1024]]
# Aspect ratio bucketing settings
enable_ar_bucket = true
min_ar = 0.5
max_ar = 2.0
num_ar_buckets = 7
[[directory]] # IMAGES
# Path to the directory containing images and their corresponding caption files.
path = '/mnt/d/training_data/images'
num_repeats = 1
resolutions = [[1280, 720], [1536, 1024]]
# Config for RTX 6000 Pro 96GB
# Run with NCCL_P2P_DISABLE="1" NCCL_IB_DISABLE="1" NCCL_CUMEM_ENABLE="0" deepspeed --num_gpus=1 train.py --deepspeed --config config-anima.toml
# Change these paths
output_dir = '/mnt/d/anima/training_output'
dataset = 'dataset-anima.toml'
# training settings
epochs = 50
micro_batch_size_per_gpu = 24
pipeline_stages = 1
gradient_accumulation_steps = 1
gradient_clipping = 1.0
warmup_steps = 150
train_llm_adapter = true
# eval settings
eval_every_n_epochs = 1
eval_before_first_step = true
eval_micro_batch_size_per_gpu = 1
eval_gradient_accumulation_steps = 1
# misc settings
save_every_n_epochs = 1
checkpoint_every_n_epochs = 1
# checkpoint_every_n_minutes = 60
activation_checkpointing = true
partition_method = 'parameters'
save_dtype = 'bfloat16'
caching_batch_size = 1
steps_per_print = 1
[model]
type = 'anima'
transformer_path = '/mnt/c/models/diffusion_models/anima-preview2.safetensors'
vae_path = '/mnt/c/models/vae/qwen_image_vae.safetensors'
qwen_path = '../qwen0.6/Qwen3-0.6B/'
dtype = 'bfloat16'
timestep_sample_method = 'logit_normal'
sigmoid_scale = 1.0
shift = 3.0
# Caption Processing Options
cache_text_embeddings = false
# NOTE: Requires cache_text_embeddings = false to work!
# For cached embeddings, use cache_shuffle_num in your dataset config instead.
shuffle_tags = true
tag_delimiter = ', '
keep_first_n_tags = 5
shuffle_keep_first_n = 5
tag_dropout_percent = 0.3
protected_tags_file = './protected_tags.txt'
nl_shuffle_sentences = false
nl_keep_first_sentence = true
# 'tags' 'nl' 'mixed'
caption_mode = 'mixed'
debug_caption_processing = false
debug_caption_interval = 1000
llm_adapter_lr = 6e-5 # 1e-5 base
[adapter]
type = 'lora'
rank = 32
dtype = 'bfloat16'
# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights.
[optimizer]
type = 'adamw_optimi'
lr = 3.5e-4 # 2e-5 base
betas = [0.9, 0.99]
weight_decay = 0.01
eps = 1e-7 # 1e-8 base