# Backup your wildcards first.
# Run on a folder containing autogenerated wildcards
# It will remove/modify offending prompt terms / prevent unwanted and against TOS images
import os
import re
import spacy
# Load the spaCy English model once
try:
# Use a smaller model for quicker startup, or 'en_core_web_md'/'en_core_web_lg' for better accuracy
nlp = spacy.load("en_core_web_sm")
print("spaCy model 'en_core_web_sm' loaded successfully.")
except Exception as e:
print(f"Error loading spaCy model: {e}")
print("Please ensure spaCy is installed and the model is downloaded:")
print(" pip install spacy")
print(" python -m spacy download en_core_web_sm")
# Exit or handle gracefully if spaCy model can't be loaded
nlp = None # Set nlp to None to indicate it's not available
def is_likely_person_name(text, confidence_threshold=0.7):
"""
More conservative person name detection.
Returns True only if we're confident it's actually a person's name.
"""
if not nlp:
return False
# Skip very short or very long strings
if len(text.strip()) < 2 or len(text.strip()) > 50:
return False
# Skip if it contains common non-name indicators
non_name_indicators = [
'and', 'or', 'the', 'with', 'in', 'on', 'at', 'by', 'for', 'of',
'style', 'look', 'pose', 'art', 'photo', 'image', 'picture',
'clothing', 'hair', 'face', 'eye', 'hand', 'body', 'skin'
]
text_lower = text.lower()
if any(indicator in text_lower for indicator in non_name_indicators):
return False
# Process with spaCy
doc = nlp(text)
# Check if the entire text is identified as a person
person_entities = [ent for ent in doc.ents if ent.label_ == "PERSON"]
if not person_entities:
return False
# Be more conservative - only remove if:
# 1. There's exactly one PERSON entity
# 2. It covers most/all of the text
# 3. It looks like a typical name (starts with capital, reasonable length)
if len(person_entities) == 1:
person_entity = person_entities[0]
# Check if the entity covers most of the original text
entity_coverage = len(person_entity.text) / len(text.strip())
# Check if it looks like a typical name
is_capitalized = person_entity.text[0].isupper()
reasonable_length = 2 <= len(person_entity.text) <= 30
no_special_chars = not re.search(r'[^a-zA-Z\s\-\.]', person_entity.text)
if (entity_coverage > 0.7 and
is_capitalized and
reasonable_length and
no_special_chars):
return True
return False
def process_wildcard_files_with_nlp(in_folder, replace_dict, delete_list):
"""
Recursively loops through wildcard files, processing comma-separated tags.
Applies exact tag replacement/deletion, in-tag word replacement/deletion,
and detects/removes person names using spaCy.
Args:
in_folder (str): The path to the folder containing wildcard files.
replace_dict (dict): A dictionary where keys are tags to find and
values are their replacements.
delete_list (list): A list of exact tags to be deleted.
"""
if nlp is None:
print("Skipping name detection as spaCy model could not be loaded.")
# If nlp model didn't load, we can choose to exit or continue without name detection.
# For now, we'll continue but name detection won't work.
pass
if not os.path.isdir(in_folder):
print(f"Error: Folder '{in_folder}' not found.")
return
for root, _, files in os.walk(in_folder):
for file_name in files:
file_path = os.path.join(root, file_name)
# Assuming wildcard files are .txt or .wildcard
if file_name.endswith(('.txt', '.wildcard')):
print(f"Processing file: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
new_lines = []
changes_made = False
for line in lines:
original_line = line.strip()
processed_tags = []
# Split the line by commas, handling potential leading/trailing spaces
tags = re.split(r'\s*,\s*', original_line)
for tag in tags:
if not tag: # Skip empty tags that might result from splitting e.g. "a,,b"
continue
original_tag_value = tag.strip() # Store original value for comparison
current_tag = original_tag_value
tag_changed_this_iteration = False # Flag for changes within this specific tag
# --- PHASE 1: Exact Tag Matching (Highest Priority) ---
# Check for exact replacement of the ENTIRE tag
if current_tag in replace_dict:
if current_tag != replace_dict[current_tag]: # Check if a real change
print(f" DEBUG: Exact tag '{current_tag}' replaced with '{replace_dict[current_tag]}'")
current_tag = replace_dict[current_tag]
tag_changed_this_iteration = True
# Check for exact deletion of the ENTIRE tag
elif current_tag in delete_list:
print(f" DEBUG: Exact tag '{current_tag}' deleted.")
current_tag = '' # Mark for deletion by setting to empty
tag_changed_this_iteration = True
else:
# --- PHASE 2: In-Tag Word Processing (only if not an exact tag match) ---
# Create a temporary string to apply in-tag changes
temp_tag_content = current_tag
# 2a. Detect and remove people's names using improved spaCy detection
if nlp: # Only run if spaCy model was loaded
# Check if the entire tag is a person name
if is_likely_person_name(temp_tag_content):
print(f" DEBUG: Tag '{original_tag_value}' identified as person name and will be removed.")
temp_tag_content = ''
tag_changed_this_iteration = True
else:
# Check for person names within the tag (word by word)
words = temp_tag_content.split()
filtered_words = []
for word in words:
# Clean word of punctuation for testing
clean_word = re.sub(r'[^\w\s]', '', word)
if clean_word and is_likely_person_name(clean_word):
print(f" DEBUG: Within tag '{original_tag_value}', word '{word}' identified as person name and removed.")
tag_changed_this_iteration = True
# Don't add this word to filtered_words (effectively removing it)
else:
filtered_words.append(word)
# Reconstruct the tag from filtered words
if len(filtered_words) != len(words):
temp_tag_content = ' '.join(filtered_words)
# 2b. Process replacements for single words within the tag
for old_word, new_word in replace_dict.items():
# Only process if old_word is a single word (no spaces)
if ' ' not in old_word.strip():
pattern = r'\b' + re.escape(old_word) + r'\b'
if re.search(pattern, temp_tag_content, flags=re.IGNORECASE): # Added IGNORECASE for better word matching
initial_temp_tag = temp_tag_content # Store for debug print
temp_tag_content = re.sub(pattern, new_word, temp_tag_content, flags=re.IGNORECASE)
if initial_temp_tag != temp_tag_content:
print(f" DEBUG: Within tag '{original_tag_value}', word '{old_word}' replaced with '{new_word}'. Tag now: '{temp_tag_content}'")
tag_changed_this_iteration = True
# 2c. Process deletions for single words within the tag
for word_to_delete in delete_list:
# Only process if word_to_delete is a single word (no spaces)
if ' ' not in word_to_delete.strip():
pattern = r'\b' + re.escape(word_to_delete) + r'\b'
if re.search(pattern, temp_tag_content, flags=re.IGNORECASE): # Added IGNORECASE for better word matching
initial_temp_tag = temp_tag_content # Store for debug print
temp_tag_content = re.sub(pattern, '', temp_tag_content, flags=re.IGNORECASE)
if initial_temp_tag != temp_tag_content:
print(f" DEBUG: Within tag '{original_tag_value}', word '{word_to_delete}' deleted. Tag now: '{temp_tag_content}'")
tag_changed_this_iteration = True
current_tag = temp_tag_content # Update the current_tag with in-tag modifications
# Clean up spaces/commas within the processed tag itself
# Capture before/after for final clean-up debug
initial_clean_tag = current_tag
current_tag = re.sub(r'\s+', ' ', current_tag).strip()
current_tag = re.sub(r',+', ',', current_tag) # replace multiple commas with one
current_tag = current_tag.replace(', ,', ',') # clean up ", ," after removing words
if initial_clean_tag != current_tag and initial_clean_tag.strip() != "":
print(f" DEBUG: Within tag '{original_tag_value}', cleaned up. Final result before adding to list: '{current_tag}'")
# If the tag was changed or contains valid content after processing, add it
if not current_tag.strip() and tag_changed_this_iteration:
print(f" DEBUG: Tag '{original_tag_value}' resulted in empty string after processing and will be removed from line.")
# No need to add to processed_tags as it's empty
elif current_tag.strip(): # Only add if not empty after processing
processed_tags.append(current_tag.strip())
# If it was an empty tag initially and no changes happened, it won't be added to processed_tags
# Reconstruct the line
new_line = ', '.join(processed_tags)
# Handle cases where leading/trailing commas might appear due to deletions
new_line = re.sub(r'(^,\s*)|(,\s*$)','', new_line) # Remove leading/trailing commas
new_line = re.sub(r',\s*,', ',', new_line) # Remove double commas
new_line = new_line.strip() # Final strip
if new_line != original_line:
changes_made = True
new_lines.append(new_line + '\n') # Add newline back
if changes_made:
print(f"Changes detected in {file_name}. Updating file.")
with open(file_path, 'w', encoding='utf-8') as f:
f.writelines(new_lines)
else:
print(f"No changes needed for {file_name}.")
# --- Configuration ---
in_folder = r'..\stable-diffusion-webui\extensions\sd-dynamic-prompts\wildcards\sarahpeterson'
# Define your replacements and deletions.
# Important:
# - If a key in replace_dict or an item in delete_list is a multi-word phrase,
# it will ONLY be matched for exact, whole-tag replacement/deletion.
# - If a key in replace_dict or an item in delete_list is a single word,
# it will also be matched and processed *within* longer tags (e.g., sentences).
replace_dict = {
# --- Individuals ---
'child': 'person',
'kid': 'individual',
'teenager': 'young adult',
'teen': 'young adult',
'adolescent': 'person',
'minor': 'individual',
'baby': 'person',
'infant': 'person',
'toddler': 'individual',
'newborn': 'person',
'youngster': 'person',
'youth': 'person',
'juvenile': 'individual',
'schoolgirl': 'student',
'schoolboy': 'person',
'pupil': 'person',
'boy': 'male',
'girl': 'female',
'lad': 'male',
'lass': 'female',
'kiddo': 'individual',
'teeny-bopper': 'person',
'babe': 'person',
# --- Multi-word terms ---
'little boy': 'person',
'little girl': 'person',
'young boy': 'person',
'young girl': 'person',
'baby boy': 'person',
'baby girl': 'person',
'young man': 'person',
'young woman': 'person',
'high school student': 'person',
'middle schooler': 'person',
'grade school student': 'person',
'elementary school student': 'person',
'preschooler': 'individual',
# --- Implied presence in context ---
'nursery rhyme': 'song',
'storybook': 'book',
'cartoon character': 'animated figure',
'playground': 'outdoor area',
'sandbox': 'outdoor play area',
'lunchbox': 'container',
'toy car': 'model vehicle',
'toy truck': 'model vehicle',
'action figure': 'figurine',
'stuffed animal': 'plush item',
'dollhouse': 'miniature home',
'toy blocks': 'building pieces',
'crayon': 'drawing utensil',
'high chair': 'chair',
'baby stroller': 'cart',
'diaper bag': 'bag',
'pacifier': 'item',
'rattle': 'item',
'nazi': 'german'
}
# Define tags/words to be exactly deleted.
delete_list = [
# --- Direct descriptors ---
'childlike',
'childish',
'kiddie',
'kiddy',
'underage',
'preteen',
'minority age',
'prepubescent',
'pubescent',
'juvenile',
'elementary-aged',
'young-aged',
'precocious',
# --- Education/School terms ---
'grade-schooler',
'kindergartener',
'preschooler',
'nursery school',
'daycare',
'elementary school',
'schoolyard',
'lunch period',
'homework folder',
# --- Scene/location-based ---
'playdate',
'sandbox',
'crib',
'playpen',
'nursery',
'recess',
'amusement park ride',
'storybook time',
'craft hour',
'ball pit',
'bouncy house',
'baby shower',
'toddler group',
'story circle',
'toy chest',
'birthday clown',
'face painting',
'finger painting',
'training wheels',
# --- Objects ---
'rattle',
'pacifier',
'teether',
'sippy cup',
'diaper',
'bottle warmer',
'bib',
'onesie',
'stroller',
'high chair',
'play mat',
'baby gate',
'mobile',
'building blocks',
'stuffed animal',
'doll',
'toy train',
'teddy bear',
'action figure',
'toy car',
'toy truck',
'lego',
'crayons',
'coloring book',
'flash cards',
'baby powder',
# --- Media/Entertainment ---
'cartoon',
'kids show',
'animated movie',
'puppet show',
'nursery rhyme',
'story time',
'kids movie',
'bedtime story',
"children's song",
'animated series',
'mascot character',
'storybook',
# --- Plural and generalizations ---
'children',
'kids',
'babies',
'infants',
'toddlers',
'youngsters',
'minors',
'teens',
'preschoolers',
'schoolkids',
'pupils',
# --- Colloquialisms / Diminutives ---
'ankle biters',
'tykes',
'nippers',
'moppets',
'wee ones',
'tiny tots',
'sprouts',
'young"uns',
'crumb crunchers',
'bambinos',
'little ones',
'small fry',
'junior',
'peanut',
'tot',
# other
'beastiality'
]
# --- Run the processing ---
if __name__ == "__main__":
process_wildcard_files_with_nlp(in_folder, replace_dict, delete_list)
print("\nProcessing complete.")
Python code to remove non-citivai permitted terms /censorship /unwanted image, child prompt remover
4
4