Trained on Anima Preview
Assume that any lora trained on the preview version won't work well on the final version.
Recommended prompt structure:
Positive prompt (quality tags at the start of prompt):
masterpiece, best quality, very aesthetic, {{tags}}
Updated dataset and reduced to 78 images, adjusted captions for Anima with a mix of NL and tags.
Used diffusion-pipe - fork by @bluvoll
Config:
# dataset-anima.toml
# Resolution settings.
resolutions = [1024]
# Aspect ratio bucketing settings
enable_ar_bucket = true
min_ar = 0.5
max_ar = 2.0
num_ar_buckets = 7
# config-anima.toml
[[directory]] # IMAGES
# Path to the directory containing images and their corresponding caption files.
path = '/mnt/d/huanvideo/training_data/images'
num_repeats = 1
resolutions = [1024]
# Change these paths
output_dir = '/mnt/d/anima/training_output'
dataset = 'dataset-anima.toml'
# training settings
epochs = 50
micro_batch_size_per_gpu = 6
pipeline_stages = 1
gradient_accumulation_steps = 1
gradient_clipping = 1.0
warmup_steps = 100
# eval settings
eval_every_n_epochs = 1
eval_before_first_step = true
eval_micro_batch_size_per_gpu = 1
eval_gradient_accumulation_steps = 1
# misc settings
save_every_n_epochs = 1
checkpoint_every_n_minutes = 120
activation_checkpointing = true
partition_method = 'parameters'
save_dtype = 'bfloat16'
caching_batch_size = 1
steps_per_print = 1
[model]
type = 'anima'
transformer_path = '/mnt/c/models/diffusion_models/anima-preview.safetensors'
vae_path = '/mnt/c/models/vae/qwen_image_vae.safetensors'
qwen_path = '../qwen0.6/Qwen3-0.6B/'
dtype = 'bfloat16'
timestep_sample_method = 'logit_normal'
sigmoid_scale = 1.0
shift = 3.0
# Caption Processing Options
cache_text_embeddings = false
# NOTE: Requires cache_text_embeddings = false to work!
# For cached embeddings, use cache_shuffle_num in your dataset config instead.
shuffle_tags = true
tag_delimiter = ', '
keep_first_n_tags = 5
shuffle_keep_first_n = 5
tag_dropout_percent = 0.10
protected_tags_file = './protected_tags.txt'
nl_shuffle_sentences = true
nl_keep_first_sentence = false
# 'tags' 'nl' 'mixed'
caption_mode = 'mixed'
debug_caption_processing = true
debug_caption_interval = 100
[adapter]
type = 'lora'
rank = 32
dtype = 'bfloat16'
# AdamW from the optimi library is a good default since it automatically uses Kahan summation when training bfloat16 weights.
[optimizer]
type = 'adamw_optimi'
lr = 5e-5
betas = [0.9, 0.99]
weight_decay = 0.01
eps = 1e-8