".safetensors" Inspector - Google Colab

The program allows you to view the tags of any safestensor file, sort them by frequency, and display the corresponding activation tag.

import pandas as pd
import re

def process_file(file, encoding, tag_count):
    with open(file, 'r', encoding=encoding) as f:
        tag_content = f.read()

    # Search for the content between the quotes of the "ss_tag_frequency" tag
    match = re.search(r'"ss_tag_frequency":"({.+?})"', tag_content)
    if match is None:
        print("The 'ss_tag_frequency' tag was not found in the file.")
        return

    tag_content = match.group(1)

    # Extract the tag-frequency pairs using regular expressions
    pairs = re.findall(r'"([^"]+)": (\d+)', tag_content)

    # Create a list of dictionaries with the tag data
    data_list = [{'Tag': tag, 'Frequency': int(frequency)} for tag, frequency in pairs]

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data_list)

    # Sort the tags by frequency in descending order
    df = df.sort_values(by='Frequency', ascending=False)

    # Display the first "tag_count" complete tags and their frequencies
    pd.set_option('display.max_rows', tag_count)
    print(df.head(tag_count))


file_name = '/content/drive/MyDrive/Dataset/aeromorphAlo-03.safetensors'   # Replace '/content/drive/MyDrive/Dataset/aeromorphAlo-03.safetensors' with the correct path to your file
encoding = 'latin-1'  # Replace 'utf-8' with the correct encoding of your file
tag_count = 50 + 1  # Define the number of tags to display by replacing 50 with the desired value

process_file(file_name, encoding, tag_count)

Example:

~~~~~~~

The second program provides all the metadata/hyperparameters of the safestensor and presents it in an executive summary.

def generate_report_from_file(file_name, encoding):
    with open(file_name, 'r', encoding=encoding) as file:
        content = file.read()

    # Analyze the content to extract the necessary values
    value1 = get_value(content, "ss_sd_model_name")
    value2 = get_value(content, "ss_clip_skip")
    value3 = get_value(content, "ss_num_train_images")
    value4 = get_value(content, "ss_tag_frequency")  # Replace with the correct key based on the data structure
    value5 = get_value(content, "ss_epoch")
    value6 = get_value(content, "ss_face_crop_aug_range")
    value7 = get_value(content, "ss_full_fp16")
    value8 = get_value(content, "ss_gradient_accumulation_steps")
    value9 = get_value(content, "ss_gradient_checkpointing")
    value10 = get_value(content, "ss_learning_rate")
    value11 = get_value(content, "ss_lowram")
    value12 = get_value(content, "ss_lr_scheduler")
    value13 = get_value(content, "ss_lr_warmup_steps")
    value14 = get_value(content, "ss_max_grad_norm")
    value15 = get_value(content, "ss_max_token_length")
    value16 = get_value(content, "ss_max_train_steps")
    value17 = get_value(content, "ss_min_snr_gamma")
    value18 = get_value(content, "ss_mixed_precision")
    value19 = get_value(content, "ss_network_alpha")
    value20 = get_value(content, "ss_network_dim")
    value21 = get_value(content, "ss_network_module")
    value22 = get_value(content, "ss_new_sd_model_hash")
    value23 = get_value(content, "ss_noise_offset")
    value24 = get_value(content, "ss_num_batches_per_epoch")
    value25 = get_value(content, "ss_cache_latents")
    value26 = get_value(content, "ss_caption_dropout_every_n_epochs")
    value27 = get_value(content, "ss_caption_dropout_rate")
    value28 = get_value(content, "ss_caption_tag_dropout_rate")
    value29 = get_value(content, "ss_dataset_dirs")  # Replace with the correct key based on the data structure
    value30 = get_value(content, "ss_num_epochs")
    value31 = get_value(content, "ss_num_reg_images")
    value32 = get_value(content, "ss_optimizer")
    value33 = get_value(content, "ss_output_name")
    value34 = get_value(content, "ss_prior_loss_weight")
    value35 = get_value(content, "ss_sd_model_hash")
    value36 = get_value(content, "ss_sd_scripts_commit_hash")
    value37 = get_value(content, "ss_seed")
    value38 = get_value(content, "ss_session_id")
    value39 = get_value(content, "ss_text_encoder_lr")
    value40 = get_value(content, "ss_unet_lr")
    value41 = get_value(content, "ss_v2")
    value42 = get_value(content, "sshs_legacy_hash")
    value43 = get_value(content, "sshs_model_hash")

    # Generate the report using the extracted values
    report = f'''Executive Summary Report:
    
Important Parameters:
- Model Name: {value1}
- Clip Skip: {value2}
- Number of Training Images: {value3}
- Tag Frequency:
- Epochs: {value5}
- Face Crop Augmentation Range: {value6}
- Full FP16: {value7}
- Gradient Accumulation Steps: {value8}
- Gradient Checkpointing: {value9}
- Learning Rate: {value10}
- Low RAM: {value11}
- Learning Rate Scheduler: {value12}
- Learning Rate Warmup Steps: {value13}
- Max Gradient Norm: {value14}
- Max Token Length: {value15}
- Max Training Steps: {value16}
- Min SNR Gamma: {value17}
- Mixed Precision: {value18}
- Network Alpha: {value19}
- Network Dimension: {value20}
- Network Module: {value21}
- New SD Model Hash: {value22}
- Noise Offset: {value23}
- Number of Batches per Epoch: {value24}
- Cache Latents: {value25}
- Caption Dropout Every N Epochs: {value26}
- Caption Dropout Rate: {value27}
- Caption Tag Dropout Rate: {value28}
- Number of Epochs: {value30}
- Number of Regression Images: {value31}
- Optimizer: {value32}
- Output Name: {value33}
- Prior Loss Weight: {value34}
- SD Model Hash: {value35}
- SD Scripts Commit Hash: {value36}
- Seed: {value37}
- Session ID: {value38}
- Text Encoder Learning Rate: {value39}
- UNet Learning Rate: {value40}
- Version 2: {value41}
- SSHS Legacy Hash: {value42}
- SSHS Model Hash: {value43}'''

    return report

def get_value(content, key):
    start = content.find(key) + len(key) + 3  # Add 3 to skip the characters ': ",'
    end = content.find('"', start)
    value = content[start:end]
    return value


# Usage of the code to generate the report from a specific file
file_name = '/content/drive/MyDrive/Dataset/aeromorphAlo-03.safetensors'  # Replace '/path/to/file.txt' with the correct path to your file
encoding = 'latin-1'  # Replace 'utf-8' with the correct encoding of your file
generated_report = generate_report_from_file(file_name, encoding)
print(generated_report)

Example:

~~~~~~

The third program analyzes thousands of .safestensor files to obtain dataframes with data to scan it.

import os
import pandas as pd

def analyze_files_in_folder(folder, encoding):
    # Get the list of files in the folder
    files = os.listdir(folder)

    # Create a list to store the data
    data = []

    # Iterate over the files
    for file in files:
        # Read the content of the file
        file_path = os.path.join(folder, file)
        with open(file_path, 'r', encoding=encoding) as file:
            content = file.read()

        # Generate the values for the executive summary
        values = get_values(content)

        # Add the values to the list
        for value_name, value_count in values.items():
            current_value = {'File': file, 'Value Name': value_name, 'Value Count': value_count}
            data.append(current_value)

    # Create a DataFrame from the data
    df = pd.DataFrame(data)

    # Display the DataFrame with the values
    print(df)

def get_values(content):
    # Analyze the content to extract the necessary values [This function is the one you'll actually need to modify]
    values = {}
    value1 = get_value(content, "ss_max_train_steps")
    value2 = get_value(content, "ss_clip_skip")
    value3 = get_value(content, "ss_num_train_images")
    value4 = get_value(content, "ss_num_epochs")
    # Add more values as needed
    values = {'ss_max_train_steps': value1, 'ss_clip_skip': value2, 'ss_num_train_images': value3, 'ss_num_epochs': value4}  # Update with the appropriate value names

    return values

def get_value(content, key):
    start = content.find(key) + len(key) + 3  # Add 3 to skip the characters ': ",'
    end = content.find('"', start)
    value = content[start:end]
    return value

# Specify the folder you want to analyze
folder = '/content/drive/MyDrive/Dataset/Pruebas'
encoding = 'latin-1'  # Replace 'utf-8' with the correct encoding of your files

# Call the function to analyze the files in the folder
analyze_files_in_folder(folder, encoding)

Remember to update the folder variable with the folder you want to analyze and the encoding variable with the correct encoding of your files. Also, you can modify the get_values function to extract the necessary values according to your needs.

Example:

Ejecutar en :

".safetensors" Inspector - Google Colab

Open in Gato

Open in Colab

Comments