Python code to remove non-citivai permitted terms /censorship /unwanted image, child prompt remover

# Backup your wildcards first. 
# Run on a folder containing autogenerated wildcards
# It will remove/modify offending prompt terms / prevent unwanted and against TOS images

import os
import re
import spacy

# Load the spaCy English model once
try:
    # Use a smaller model for quicker startup, or 'en_core_web_md'/'en_core_web_lg' for better accuracy
    nlp = spacy.load("en_core_web_sm")
    print("spaCy model 'en_core_web_sm' loaded successfully.")
except Exception as e:
    print(f"Error loading spaCy model: {e}")
    print("Please ensure spaCy is installed and the model is downloaded:")
    print("  pip install spacy")
    print("  python -m spacy download en_core_web_sm")
    # Exit or handle gracefully if spaCy model can't be loaded
    nlp = None # Set nlp to None to indicate it's not available


def is_likely_person_name(text, confidence_threshold=0.7):
    """
    More conservative person name detection.
    Returns True only if we're confident it's actually a person's name.
    """
    if not nlp:
        return False
    
    # Skip very short or very long strings
    if len(text.strip()) < 2 or len(text.strip()) > 50:
        return False
    
    # Skip if it contains common non-name indicators
    non_name_indicators = [
        'and', 'or', 'the', 'with', 'in', 'on', 'at', 'by', 'for', 'of',
        'style', 'look', 'pose', 'art', 'photo', 'image', 'picture',
        'clothing', 'hair', 'face', 'eye', 'hand', 'body', 'skin'
    ]
    
    text_lower = text.lower()
    if any(indicator in text_lower for indicator in non_name_indicators):
        return False
    
    # Process with spaCy
    doc = nlp(text)
    
    # Check if the entire text is identified as a person
    person_entities = [ent for ent in doc.ents if ent.label_ == "PERSON"]
    
    if not person_entities:
        return False
    
    # Be more conservative - only remove if:
    # 1. There's exactly one PERSON entity
    # 2. It covers most/all of the text
    # 3. It looks like a typical name (starts with capital, reasonable length)
    
    if len(person_entities) == 1:
        person_entity = person_entities[0]
        
        # Check if the entity covers most of the original text
        entity_coverage = len(person_entity.text) / len(text.strip())
        
        # Check if it looks like a typical name
        is_capitalized = person_entity.text[0].isupper()
        reasonable_length = 2 <= len(person_entity.text) <= 30
        no_special_chars = not re.search(r'[^a-zA-Z\s\-\.]', person_entity.text)
        
        if (entity_coverage > 0.7 and 
            is_capitalized and 
            reasonable_length and 
            no_special_chars):
            return True
    
    return False


def process_wildcard_files_with_nlp(in_folder, replace_dict, delete_list):
    """
    Recursively loops through wildcard files, processing comma-separated tags.
    Applies exact tag replacement/deletion, in-tag word replacement/deletion,
    and detects/removes person names using spaCy.

    Args:
        in_folder (str): The path to the folder containing wildcard files.
        replace_dict (dict): A dictionary where keys are tags to find and
                             values are their replacements.
        delete_list (list): A list of exact tags to be deleted.
    """
    if nlp is None:
        print("Skipping name detection as spaCy model could not be loaded.")
        # If nlp model didn't load, we can choose to exit or continue without name detection.
        # For now, we'll continue but name detection won't work.
        pass

    if not os.path.isdir(in_folder):
        print(f"Error: Folder '{in_folder}' not found.")
        return

    for root, _, files in os.walk(in_folder):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            # Assuming wildcard files are .txt or .wildcard
            if file_name.endswith(('.txt', '.wildcard')):
                print(f"Processing file: {file_path}")
                with open(file_path, 'r', encoding='utf-8') as f:
                    lines = f.readlines()

                new_lines = []
                changes_made = False

                for line in lines:
                    original_line = line.strip()
                    processed_tags = []

                    # Split the line by commas, handling potential leading/trailing spaces
                    tags = re.split(r'\s*,\s*', original_line)

                    for tag in tags:
                        if not tag:   # Skip empty tags that might result from splitting e.g. "a,,b"
                            continue

                        original_tag_value = tag.strip() # Store original value for comparison
                        current_tag = original_tag_value
                        tag_changed_this_iteration = False # Flag for changes within this specific tag

                        # --- PHASE 1: Exact Tag Matching (Highest Priority) ---
                        # Check for exact replacement of the ENTIRE tag
                        if current_tag in replace_dict:
                            if current_tag != replace_dict[current_tag]: # Check if a real change
                                print(f"  DEBUG: Exact tag '{current_tag}' replaced with '{replace_dict[current_tag]}'")
                                current_tag = replace_dict[current_tag]
                                tag_changed_this_iteration = True

                        # Check for exact deletion of the ENTIRE tag
                        elif current_tag in delete_list:
                            print(f"  DEBUG: Exact tag '{current_tag}' deleted.")
                            current_tag = ''  # Mark for deletion by setting to empty
                            tag_changed_this_iteration = True
                        else:
                            # --- PHASE 2: In-Tag Word Processing (only if not an exact tag match) ---
                            # Create a temporary string to apply in-tag changes
                            temp_tag_content = current_tag

                            # 2a. Detect and remove people's names using improved spaCy detection
                            if nlp: # Only run if spaCy model was loaded
                                # Check if the entire tag is a person name
                                if is_likely_person_name(temp_tag_content):
                                    print(f"  DEBUG: Tag '{original_tag_value}' identified as person name and will be removed.")
                                    temp_tag_content = ''
                                    tag_changed_this_iteration = True
                                else:
                                    # Check for person names within the tag (word by word)
                                    words = temp_tag_content.split()
                                    filtered_words = []
                                    
                                    for word in words:
                                        # Clean word of punctuation for testing
                                        clean_word = re.sub(r'[^\w\s]', '', word)
                                        
                                        if clean_word and is_likely_person_name(clean_word):
                                            print(f"  DEBUG: Within tag '{original_tag_value}', word '{word}' identified as person name and removed.")
                                            tag_changed_this_iteration = True
                                            # Don't add this word to filtered_words (effectively removing it)
                                        else:
                                            filtered_words.append(word)
                                    
                                    # Reconstruct the tag from filtered words
                                    if len(filtered_words) != len(words):
                                        temp_tag_content = ' '.join(filtered_words)

                            # 2b. Process replacements for single words within the tag
                            for old_word, new_word in replace_dict.items():
                                # Only process if old_word is a single word (no spaces)
                                if ' ' not in old_word.strip():
                                    pattern = r'\b' + re.escape(old_word) + r'\b'
                                    if re.search(pattern, temp_tag_content, flags=re.IGNORECASE): # Added IGNORECASE for better word matching
                                        initial_temp_tag = temp_tag_content # Store for debug print
                                        temp_tag_content = re.sub(pattern, new_word, temp_tag_content, flags=re.IGNORECASE)
                                        if initial_temp_tag != temp_tag_content:
                                            print(f"  DEBUG: Within tag '{original_tag_value}', word '{old_word}' replaced with '{new_word}'. Tag now: '{temp_tag_content}'")
                                            tag_changed_this_iteration = True

                            # 2c. Process deletions for single words within the tag
                            for word_to_delete in delete_list:
                                # Only process if word_to_delete is a single word (no spaces)
                                if ' ' not in word_to_delete.strip():
                                    pattern = r'\b' + re.escape(word_to_delete) + r'\b'
                                    if re.search(pattern, temp_tag_content, flags=re.IGNORECASE): # Added IGNORECASE for better word matching
                                        initial_temp_tag = temp_tag_content # Store for debug print
                                        temp_tag_content = re.sub(pattern, '', temp_tag_content, flags=re.IGNORECASE)
                                        if initial_temp_tag != temp_tag_content:
                                            print(f"  DEBUG: Within tag '{original_tag_value}', word '{word_to_delete}' deleted. Tag now: '{temp_tag_content}'")
                                            tag_changed_this_iteration = True

                            current_tag = temp_tag_content # Update the current_tag with in-tag modifications

                        # Clean up spaces/commas within the processed tag itself
                        # Capture before/after for final clean-up debug
                        initial_clean_tag = current_tag
                        current_tag = re.sub(r'\s+', ' ', current_tag).strip()
                        current_tag = re.sub(r',+', ',', current_tag) # replace multiple commas with one
                        current_tag = current_tag.replace(', ,', ',') # clean up ", ," after removing words
                        if initial_clean_tag != current_tag and initial_clean_tag.strip() != "":
                            print(f"  DEBUG: Within tag '{original_tag_value}', cleaned up. Final result before adding to list: '{current_tag}'")


                        # If the tag was changed or contains valid content after processing, add it
                        if not current_tag.strip() and tag_changed_this_iteration:
                            print(f"  DEBUG: Tag '{original_tag_value}' resulted in empty string after processing and will be removed from line.")
                            # No need to add to processed_tags as it's empty
                        elif current_tag.strip(): # Only add if not empty after processing
                            processed_tags.append(current_tag.strip())
                        # If it was an empty tag initially and no changes happened, it won't be added to processed_tags

                    # Reconstruct the line
                    new_line = ', '.join(processed_tags)
                    # Handle cases where leading/trailing commas might appear due to deletions
                    new_line = re.sub(r'(^,\s*)|(,\s*$)','', new_line) # Remove leading/trailing commas
                    new_line = re.sub(r',\s*,', ',', new_line) # Remove double commas
                    new_line = new_line.strip() # Final strip

                    if new_line != original_line:
                        changes_made = True
                    new_lines.append(new_line + '\n') # Add newline back

                if changes_made:
                    print(f"Changes detected in {file_name}. Updating file.")
                    with open(file_path, 'w', encoding='utf-8') as f:
                        f.writelines(new_lines)
                else:
                    print(f"No changes needed for {file_name}.")

# --- Configuration ---
in_folder = r'..\stable-diffusion-webui\extensions\sd-dynamic-prompts\wildcards\sarahpeterson'

# Define your replacements and deletions.
# Important:
# - If a key in replace_dict or an item in delete_list is a multi-word phrase,
#   it will ONLY be matched for exact, whole-tag replacement/deletion.
# - If a key in replace_dict or an item in delete_list is a single word,
#   it will also be matched and processed *within* longer tags (e.g., sentences).
replace_dict = {

    # --- Individuals ---
    'child': 'person',
    'kid': 'individual',
    'teenager': 'young adult',
    'teen': 'young adult',
    'adolescent': 'person',
    'minor': 'individual',
    'baby': 'person',
    'infant': 'person',
    'toddler': 'individual',
    'newborn': 'person',
    'youngster': 'person',
    'youth': 'person',
    'juvenile': 'individual',
    'schoolgirl': 'student',
    'schoolboy': 'person',
    'pupil': 'person',
    'boy': 'male',
    'girl': 'female',
    'lad': 'male',
    'lass': 'female',
    'kiddo': 'individual',
    'teeny-bopper': 'person',
    'babe': 'person',

    # --- Multi-word terms ---
    'little boy': 'person',
    'little girl': 'person',
    'young boy': 'person',
    'young girl': 'person',
    'baby boy': 'person',
    'baby girl': 'person',
    'young man': 'person',
    'young woman': 'person',
    'high school student': 'person',
    'middle schooler': 'person',
    'grade school student': 'person',
    'elementary school student': 'person',
    'preschooler': 'individual',

    # --- Implied presence in context ---
    'nursery rhyme': 'song',
    'storybook': 'book',
    'cartoon character': 'animated figure',
    'playground': 'outdoor area',
    'sandbox': 'outdoor play area',
    'lunchbox': 'container',
    'toy car': 'model vehicle',
    'toy truck': 'model vehicle',
    'action figure': 'figurine',
    'stuffed animal': 'plush item',
    'dollhouse': 'miniature home',
    'toy blocks': 'building pieces',
    'crayon': 'drawing utensil',
    'high chair': 'chair',
    'baby stroller': 'cart',
    'diaper bag': 'bag',
    'pacifier': 'item',
    'rattle': 'item',
    'nazi': 'german'
}


# Define tags/words to be exactly deleted.
delete_list = [

    # --- Direct descriptors ---
    'childlike',
    'childish',
    'kiddie',
    'kiddy',
    'underage',
    'preteen',
    'minority age',
    'prepubescent',
    'pubescent',
    'juvenile',
    'elementary-aged',
    'young-aged',
    'precocious',
    
    # --- Education/School terms ---
    'grade-schooler',
    'kindergartener',
    'preschooler',
    'nursery school',
    'daycare',
    'elementary school',
    'schoolyard',
    'lunch period',
    'homework folder',

    # --- Scene/location-based ---
    'playdate',
    'sandbox',
    'crib',
    'playpen',
    'nursery',
    'recess',
    'amusement park ride',
    'storybook time',
    'craft hour',
    'ball pit',
    'bouncy house',
    'baby shower',
    'toddler group',
    'story circle',
    'toy chest',
    'birthday clown',
    'face painting',
    'finger painting',
    'training wheels',

    # --- Objects ---
    'rattle',
    'pacifier',
    'teether',
    'sippy cup',
    'diaper',
    'bottle warmer',
    'bib',
    'onesie',
    'stroller',
    'high chair',
    'play mat',
    'baby gate',
    'mobile',
    'building blocks',
    'stuffed animal',
    'doll',
    'toy train',
    'teddy bear',
    'action figure',
    'toy car',
    'toy truck',
    'lego',
    'crayons',
    'coloring book',
    'flash cards',
    'baby powder',

    # --- Media/Entertainment ---
    'cartoon',
    'kids show',
    'animated movie',
    'puppet show',
    'nursery rhyme',
    'story time',
    'kids movie',
    'bedtime story',
    "children's song",
    'animated series',
    'mascot character',
    'storybook',

    # --- Plural and generalizations ---
    'children',
    'kids',
    'babies',
    'infants',
    'toddlers',
    'youngsters',
    'minors',
    'teens',
    'preschoolers',
    'schoolkids',
    'pupils',

    # --- Colloquialisms / Diminutives ---
    'ankle biters',
    'tykes',
    'nippers',
    'moppets',
    'wee ones',
    'tiny tots',
    'sprouts',
    'young"uns',
    'crumb crunchers',
    'bambinos',
    'little ones',
    'small fry',
    'junior',
    'peanut',
    'tot',

    # other
    'beastiality'
]

# --- Run the processing ---
if __name__ == "__main__":
    process_wildcard_files_with_nlp(in_folder, replace_dict, delete_list)
    print("\nProcessing complete.")
Python code to remove non-citivai permitted terms /censorship /unwanted image, child prompt remover

Comments