Skip to content

Commit 05fca76

Browse files
committed
backend: helper to cleanup unique RPM files
These unique RPMs are successfully migrated to PULP, duplicates are not.
1 parent 0f656de commit 05fca76

File tree

1 file changed

+114
-0
lines changed

1 file changed

+114
-0
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
"""
2+
Remove RPMs that are moved to PULP.
3+
"""
4+
5+
import os
6+
import argparse
7+
from collections import defaultdict
8+
import logging
9+
10+
# Configure the logging system
11+
# Output will go to the console (stdout)
12+
# Format includes the log level (INFO, WARNING, ERROR) and the message
13+
14+
from copr_common.log import setup_script_logger
15+
16+
log = logging.getLogger(__name__)
17+
18+
def find_and_remove_unique_rpms(start_dir, dry_run=True):
19+
"""
20+
Traverses a directory, counts occurrences of each unique RPM filename,
21+
and removes files that are only present once (unique).
22+
23+
:param start_dir: The path to the root directory to start searching.
24+
:param dry_run: If True, only prints actions without deleting files.
25+
"""
26+
if not os.path.isdir(start_dir):
27+
# Using %s for string insertion
28+
log.error("Directory not found at %s", start_dir)
29+
return
30+
31+
# --- CONFIRMATION STEP ---
32+
# input() must still use standard string formatting for the prompt
33+
confirmation = input(
34+
"WARNING: This script will scan and potentially delete files in '%s'.\n"
35+
"Do you want to proceed? (yes/no): " % start_dir
36+
).lower()
37+
38+
if confirmation not in ('yes', 'y'):
39+
log.info("Operation cancelled by user.")
40+
return
41+
# --- END CONFIRMATION STEP ---
42+
43+
# Using %s for string insertion
44+
log.info("--- Starting traversal in: %s (Dry Run: %s) ---", start_dir, dry_run)
45+
46+
# Dictionary to store {filename: [list of full paths]}
47+
rpm_files = defaultdict(list)
48+
49+
# 1. Traverse the directory and collect all RPM paths
50+
for root, _, files in os.walk(start_dir):
51+
for filename in files:
52+
if filename.endswith('.rpm'):
53+
full_path = os.path.join(root, filename)
54+
rpm_files[filename].append(full_path)
55+
56+
total_files_found = sum(len(paths) for paths in rpm_files.values())
57+
# Using %d for integer insertion
58+
log.info("Found %d total RPM files.", total_files_found)
59+
60+
files_to_remove = []
61+
62+
# 2. Identify files that are unique (occur only once)
63+
for filename, paths in rpm_files.items():
64+
if len(paths) == 1:
65+
# This RPM file is unique. Add its single path to the list for removal.
66+
files_to_remove.extend(paths)
67+
68+
# 3. Perform removal (or print actions if dry_run)
69+
if not files_to_remove:
70+
log.info("No unique RPM files found to remove.")
71+
return
72+
73+
# Using %d for integer insertion
74+
log.info("\n--- Processing %d unique files for removal ---", len(files_to_remove))
75+
76+
for file_path in files_to_remove:
77+
if dry_run:
78+
# Using %s for string insertion
79+
log.info("[DRY RUN] Will remove: %s", file_path)
80+
else:
81+
try:
82+
os.remove(file_path)
83+
# Using %s for string insertion
84+
log.info("REMOVED: %s", file_path)
85+
except OSError as e:
86+
# log.error supports the same %s formatting
87+
log.error("ERROR removing %s: %s", file_path, e)
88+
89+
if dry_run:
90+
log.info("\n*** Dry run finished. Rerun with --execute to perform actual deletion. ***")
91+
else:
92+
log.info("\n*** Deletion finished. ***")
93+
94+
95+
if __name__ == "__main__":
96+
setup_script_logger(log, "/var/log/copr-backend/change-storage-delete.log")
97+
parser = argparse.ArgumentParser(
98+
description="Finds and removes RPM files that are unique (occur only once) across all subdirectories."
99+
)
100+
parser.add_argument(
101+
"directory",
102+
type=str,
103+
help="The root directory to traverse."
104+
)
105+
parser.add_argument(
106+
"-e", "--execute",
107+
action="store_true",
108+
help="Perform the actual deletion (default is a dry run)."
109+
)
110+
111+
args = parser.parse_args()
112+
113+
# Pass the argument and dry_run status to the main function
114+
find_and_remove_unique_rpms(args.directory, dry_run=not args.execute)

0 commit comments

Comments
 (0)