#!/usr/bin/env python3 """ Script to remap existing cover art files to match the new CSV ranking structure. This avoids having to re-download all the covers. """ import csv import os import re import shutil from pathlib import Path def sanitize_filename(text): """Remove or replace characters that aren't valid in filenames""" # Remove/replace problematic characters text = re.sub(r'[<>:"/\\|?*]', '', text) text = re.sub(r'[^\w\s\-_\.]', '', text) text = re.sub(r'\s+', '_', text.strip()) return text[:100] # Limit length def normalize_for_matching(text): """Normalize text for matching album/artist names""" text = text.lower().strip() # Remove common punctuation and normalize text = re.sub(r'[^\w\s&]', '', text) text = re.sub(r'\s+', ' ', text) # Handle common variations text = text.replace(' and ', ' ').replace(' & ', ' ') # Remove "the" from start if text.startswith('the '): text = text[4:] return text def find_matching_cover(artist, album, existing_covers): """Find existing cover file that matches this artist/album""" target_artist = normalize_for_matching(artist) target_album = normalize_for_matching(album) for cover_file in existing_covers: # Extract artist and album from filename # Format: rank_XXX_Artist_Album.jpg parts = cover_file.replace('.jpg', '').split('_') if len(parts) < 4: continue # Skip rank part, reconstruct artist and album file_parts = parts[2:] # Skip "rank" and "XXX" # Find where artist ends and album begins (tricky!) # We'll try different splits and see which gives best match best_match_score = 0 best_file = None for split_point in range(1, len(file_parts)): file_artist = '_'.join(file_parts[:split_point]) file_album = '_'.join(file_parts[split_point:]) norm_file_artist = normalize_for_matching(file_artist.replace('_', ' ')) norm_file_album = normalize_for_matching(file_album.replace('_', ' ')) # Calculate match score artist_match = target_artist in norm_file_artist or norm_file_artist in target_artist album_match = target_album in norm_file_album or norm_file_album in target_album if artist_match and album_match: # Calculate more precise score score = len(set(target_artist.split()) & set(norm_file_artist.split())) + \ len(set(target_album.split()) & set(norm_file_album.split())) if score > best_match_score: best_match_score = score best_file = cover_file # If we found a good match, return it if best_match_score >= 2: # At least 2 word matches return best_file return None def main(): covers_dir = Path('covers') backup_dir = Path('covers_backup') if not covers_dir.exists(): print("No covers directory found!") return # Create backup directory if backup_dir.exists(): print("Backup directory already exists. Removing it...") shutil.rmtree(backup_dir) print("Creating backup of existing covers...") shutil.copytree(covers_dir, backup_dir) print(f"Backup created at {backup_dir}") # Get list of existing cover files existing_covers = [f for f in os.listdir(covers_dir) if f.endswith('.jpg')] print(f"Found {len(existing_covers)} existing cover files") # Load current CSV csv_file = 'top_500_albums_2023.csv' if not os.path.exists(csv_file): print(f"Error: {csv_file} not found!") return new_covers_dir = Path('covers_new') if new_covers_dir.exists(): shutil.rmtree(new_covers_dir) new_covers_dir.mkdir() mapped_count = 0 unmapped_count = 0 unmapped_albums = [] print("\nMapping covers to new rankings...") with open(csv_file, 'r', encoding='utf-8') as file: csv_reader = csv.DictReader(file) for row in csv_reader: rank = row.get('Rank', '').strip() artist = row.get('Artist', '').strip() album = row.get('Album', '').strip() if not artist or not album: continue # Find matching existing cover matching_cover = find_matching_cover(artist, album, existing_covers) if matching_cover: # Create new filename with correct ranking safe_artist = sanitize_filename(artist) safe_album = sanitize_filename(album) new_filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg" # Copy file with new name old_path = covers_dir / matching_cover new_path = new_covers_dir / new_filename shutil.copy2(old_path, new_path) mapped_count += 1 if mapped_count % 50 == 0: print(f"āœ“ Mapped {mapped_count} covers so far...") else: unmapped_count += 1 unmapped_albums.append(f"{rank}. {artist} - {album}") print(f"āœ— No cover found for: {rank}. {artist} - {album}") print(f"\nšŸŽ‰ MAPPING RESULTS:") print(f"Successfully mapped: {mapped_count}") print(f"Could not map: {unmapped_count}") print(f"Total albums: {mapped_count + unmapped_count}") print(f"Success rate: {mapped_count/(mapped_count + unmapped_count)*100:.1f}%") if unmapped_albums: print(f"\nāŒ Albums without covers ({len(unmapped_albums)}):") for album in unmapped_albums[:10]: print(f" {album}") if len(unmapped_albums) > 10: print(f" ... and {len(unmapped_albums) - 10} more") # Replace old covers directory with new one print(f"\nReplacing covers directory...") shutil.rmtree(covers_dir) shutil.move(new_covers_dir, covers_dir) print(f"āœ… Cover remapping complete!") print(f"Original covers backed up to: {backup_dir}") print(f"New covers available in: {covers_dir}") if __name__ == "__main__": main()