#!/usr/bin/env python3 import os import sys import shlex from collections import defaultdict def parse_sums(path): mapping = defaultdict(list) with open(path, "r", encoding="utf-8", errors="replace") as f: for line in f: line = line.strip() if not line: continue if len(line) < 66: continue h = line[:64] rest = line[64:].strip() # sha256sum style: "hash filename" or "hash *filename" if rest.startswith("*") or rest.startswith(" "): rest = rest[1:].lstrip() mapping[h].append(rest) return mapping def choose_keeper(paths): # 1) Prefer files whose basename starts with "cts" (case-insensitive) cts = [p for p in paths if os.path.basename(p).lower().startswith("cts")] if cts: # If multiple, pick the shortest path, then lexicographically cts.sort(key=lambda p: (len(p), p)) return cts[0] # 2) Otherwise, prefer basenames that are already all-lowercase lowercase = [p for p in paths if os.path.basename(p) == os.path.basename(p).lower()] if lowercase: lowercase.sort(key=lambda p: (len(p), p)) return lowercase[0] # 3) Fallback: lexicographically smallest full path return sorted(paths)[0] def main(): if len(sys.argv) > 1: sums_path = sys.argv[1] else: sums_path = "sha256sums.txt" if not os.path.exists(sums_path): print(f"ERROR: sums file {sums_path!r} not found", file=sys.stderr) sys.exit(1) hashes = parse_sums(sums_path) for h, paths in hashes.items(): if len(paths) < 2: continue keeper = choose_keeper(paths) print(f"# hash {h}") print(f"# keeping: {keeper}") for p in paths: if p == keeper: continue # Print rm commands for duplicates (you can review before running) print("rm -v -- " + shlex.quote(p)) print() if __name__ == "__main__": main()