This analyzer shows how much data has been uploaded (uploaded to the storage node, i.e. downloaded from the internet, i.e. ingress traffic) and produces a histogram and total counts from it.
#!/usr/bin/env python3 import sys import json def human_readable_size(size_bytes): if size_bytes == 0: return "0B" size_name = ("B", "KB", "MB", "GB", "TB") i = int(len(str(size_bytes)) - 1) // 3 p = pow(1024, i) s = round(size_bytes / p, 3) return f"{s} {size_name[i]}" def print_histogram(sizes): bins = [ (1 * 1024, 2 * 1024), (2 * 1024, 4 * 1024), (4 * 1024, 8 * 1024), (8 * 1024, 16 * 1024), (16 * 1024, 32 * 1024), (32 * 1024, 64 * 1024), (64 * 1024, 128 * 1024), (128 * 1024, 256 * 1024), (256 * 1024, 512 * 1024), (512 * 1024, 1 * 1024 * 1024), (1 * 1024 * 1024, 2 * 1024 * 1024), (2 * 1024 * 1024, 4 * 1024 * 1024), (4 * 1024 * 1024, 8 * 1024 * 1024), (8 * 1024 * 1024, 16 * 1024 * 1024), (16 * 1024 * 1024, 32 * 1024 * 1024), (32 * 1024, 64 * 1024), (64 * 1024 * 1024, 128 * 1024 * 1024), (128 * 1024 * 1024, 256 * 1024 * 1024), (256 * 1024 * 1024, float('inf')) ] bin_counts = [0] * len(bins) for size in sizes: for i, (low, high) in enumerate(bins): if low <= size < high: bin_counts[i] += 1 break max_count = max(bin_counts) max_chars = 50 print("File size histogram:") for i, (low, high) in enumerate(bins): bin_label = f"{human_readable_size(low)}..{human_readable_size(high)}" num_chars = int((bin_counts[i] / max_count) * max_chars) if max_count > 0 else 0 count_str = f"{bin_counts[i]:8}" print(f"{bin_label:<20} | {count_str} | {'█' * num_chars}") def filter_uploaded_lines(logfile): total_size = 0 upload_count = 0 sizes = [] try: with open(logfile, 'r') as file: lines = file.readlines() for line in lines: if "uploaded" in line: upload_count += 1 try: json_part = line.split('uploaded')[-1].strip() data = json.loads(json_part) size = data.get("Size", 0) total_size += size sizes.append(size) except json.JSONDecodeError: print(f"Failed to decode JSON from line: {line}") except Exception as e: print(f"An error occurred while processing line: {e}") print(f"Total size of uploaded pieces: {human_readable_size(total_size)}") print(f"Total number of uploads: {upload_count}") print_histogram(sizes) except FileNotFoundError: print(f"The file {logfile} does not exist.") except Exception as e: print(f"An error occurred: {e}") if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python filter_log.py <logfile>") else: logfile = sys.argv[1] filter_uploaded_lines(logfile)
Run it on the storj logfile:
python3 storj-stats.py storage/storj.log
Example output
Total size of uploaded pieces: 156.995 GB Total number of uploads: 1036601 File size histogram: 1.0 KB..2.0 KB | 20164 | ██ 2.0 KB..4.0 KB | 402924 | █████████████████████████████████████████ 4.0 KB..8.0 KB | 18736 | █ 8.0 KB..16.0 KB | 15639 | █ 16.0 KB..32.0 KB | 10104 | █ 32.0 KB..64.0 KB | 11642 | █ 64.0 KB..128.0 KB | 9830 | █ 128.0 KB..256.0 KB | 483561 | ██████████████████████████████████████████████████ 256.0 KB..512.0 KB | 30104 | ███ 512.0 KB..1.0 MB | 3835 | 1.0 MB..2.0 MB | 11190 | █ 2.0 MB..4.0 MB | 8296 | 4.0 MB..8.0 MB | 5 | 8.0 MB..16.0 MB | 0 | 16.0 MB..32.0 MB | 0 | 32.0 KB..64.0 KB | 0 | 64.0 MB..128.0 MB | 0 | 128.0 MB..256.0 MB | 0 | 256.0 MB..inf B | 0 |