This analyzer shows how much data has been uploaded (uploaded to the storage node, i.e. downloaded from the internet, i.e. ingress traffic) and produces a histogram and total counts from it.
#!/usr/bin/env python3
import sys
import json
def human_readable_size(size_bytes):
    if size_bytes == 0:
        return "0B"
    size_name = ("B", "KB", "MB", "GB", "TB")
    i = int(len(str(size_bytes)) - 1) // 3
    p = pow(1024, i)
    s = round(size_bytes / p, 3)
    return f"{s} {size_name[i]}"
def print_histogram(sizes):
    bins = [
        (1 * 1024, 2 * 1024),
        (2 * 1024, 4 * 1024),
        (4 * 1024, 8 * 1024),
        (8 * 1024, 16 * 1024),
        (16 * 1024, 32 * 1024),
        (32 * 1024, 64 * 1024),
        (64 * 1024, 128 * 1024),
        (128 * 1024, 256 * 1024),
        (256 * 1024, 512 * 1024),
        (512 * 1024, 1 * 1024 * 1024),
        (1 * 1024 * 1024, 2 * 1024 * 1024),
        (2 * 1024 * 1024, 4 * 1024 * 1024),
        (4 * 1024 * 1024, 8 * 1024 * 1024),
        (8 * 1024 * 1024, 16 * 1024 * 1024),
        (16 * 1024 * 1024, 32 * 1024 * 1024),
        (32 * 1024, 64 * 1024),
        (64 * 1024 * 1024, 128 * 1024 * 1024),
        (128 * 1024 * 1024, 256 * 1024 * 1024),
        (256 * 1024 * 1024, float('inf'))
    ]
    bin_counts = [0] * len(bins)
    for size in sizes:
        for i, (low, high) in enumerate(bins):
            if low <= size < high:
                bin_counts[i] += 1
                break
    max_count = max(bin_counts)
    max_chars = 50
    print("File size histogram:")
    for i, (low, high) in enumerate(bins):
        bin_label = f"{human_readable_size(low)}..{human_readable_size(high)}"
        num_chars = int((bin_counts[i] / max_count) * max_chars) if max_count > 0 else 0
        count_str = f"{bin_counts[i]:8}"
        print(f"{bin_label:<20} | {count_str} | {'█' * num_chars}")
def filter_uploaded_lines(logfile):
    total_size = 0
    upload_count = 0
    sizes = []
    try:
        with open(logfile, 'r') as file:
            lines = file.readlines()
        for line in lines:
            if "uploaded" in line:
                upload_count += 1
                try:
                    json_part = line.split('uploaded')[-1].strip()
                    data = json.loads(json_part)
                    size = data.get("Size", 0)
                    total_size += size
                    sizes.append(size)
                except json.JSONDecodeError:
                    print(f"Failed to decode JSON from line: {line}")
                except Exception as e:
                    print(f"An error occurred while processing line: {e}")
        print(f"Total size of uploaded pieces: {human_readable_size(total_size)}")
        print(f"Total number of uploads: {upload_count}")
        print_histogram(sizes)
    except FileNotFoundError:
        print(f"The file {logfile} does not exist.")
    except Exception as e:
        print(f"An error occurred: {e}")
if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python filter_log.py <logfile>")
    else:
        logfile = sys.argv[1]
        filter_uploaded_lines(logfile)Run it on the storj logfile:
python3 storj-stats.py storage/storj.log
Example output
Total size of uploaded pieces: 156.995 GB Total number of uploads: 1036601 File size histogram: 1.0 KB..2.0 KB | 20164 | ██ 2.0 KB..4.0 KB | 402924 | █████████████████████████████████████████ 4.0 KB..8.0 KB | 18736 | █ 8.0 KB..16.0 KB | 15639 | █ 16.0 KB..32.0 KB | 10104 | █ 32.0 KB..64.0 KB | 11642 | █ 64.0 KB..128.0 KB | 9830 | █ 128.0 KB..256.0 KB | 483561 | ██████████████████████████████████████████████████ 256.0 KB..512.0 KB | 30104 | ███ 512.0 KB..1.0 MB | 3835 | 1.0 MB..2.0 MB | 11190 | █ 2.0 MB..4.0 MB | 8296 | 4.0 MB..8.0 MB | 5 | 8.0 MB..16.0 MB | 0 | 16.0 MB..32.0 MB | 0 | 32.0 KB..64.0 KB | 0 | 64.0 MB..128.0 MB | 0 | 128.0 MB..256.0 MB | 0 | 256.0 MB..inf B | 0 |