- Moved all Python processing scripts to scripts/ directory for better organization - Preserves git history using git mv command - Clean separation between main project files and utility scripts 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
176 lines
No EOL
6.3 KiB
Python
176 lines
No EOL
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script to remap existing cover art files to match the new CSV ranking structure.
|
|
This avoids having to re-download all the covers.
|
|
"""
|
|
|
|
import csv
|
|
import os
|
|
import re
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
def sanitize_filename(text):
|
|
"""Remove or replace characters that aren't valid in filenames"""
|
|
# Remove/replace problematic characters
|
|
text = re.sub(r'[<>:"/\\|?*]', '', text)
|
|
text = re.sub(r'[^\w\s\-_\.]', '', text)
|
|
text = re.sub(r'\s+', '_', text.strip())
|
|
return text[:100] # Limit length
|
|
|
|
def normalize_for_matching(text):
|
|
"""Normalize text for matching album/artist names"""
|
|
text = text.lower().strip()
|
|
# Remove common punctuation and normalize
|
|
text = re.sub(r'[^\w\s&]', '', text)
|
|
text = re.sub(r'\s+', ' ', text)
|
|
# Handle common variations
|
|
text = text.replace(' and ', ' ').replace(' & ', ' ')
|
|
# Remove "the" from start
|
|
if text.startswith('the '):
|
|
text = text[4:]
|
|
return text
|
|
|
|
def find_matching_cover(artist, album, existing_covers):
|
|
"""Find existing cover file that matches this artist/album"""
|
|
target_artist = normalize_for_matching(artist)
|
|
target_album = normalize_for_matching(album)
|
|
|
|
for cover_file in existing_covers:
|
|
# Extract artist and album from filename
|
|
# Format: rank_XXX_Artist_Album.jpg
|
|
parts = cover_file.replace('.jpg', '').split('_')
|
|
if len(parts) < 4:
|
|
continue
|
|
|
|
# Skip rank part, reconstruct artist and album
|
|
file_parts = parts[2:] # Skip "rank" and "XXX"
|
|
|
|
# Find where artist ends and album begins (tricky!)
|
|
# We'll try different splits and see which gives best match
|
|
best_match_score = 0
|
|
best_file = None
|
|
|
|
for split_point in range(1, len(file_parts)):
|
|
file_artist = '_'.join(file_parts[:split_point])
|
|
file_album = '_'.join(file_parts[split_point:])
|
|
|
|
norm_file_artist = normalize_for_matching(file_artist.replace('_', ' '))
|
|
norm_file_album = normalize_for_matching(file_album.replace('_', ' '))
|
|
|
|
# Calculate match score
|
|
artist_match = target_artist in norm_file_artist or norm_file_artist in target_artist
|
|
album_match = target_album in norm_file_album or norm_file_album in target_album
|
|
|
|
if artist_match and album_match:
|
|
# Calculate more precise score
|
|
score = len(set(target_artist.split()) & set(norm_file_artist.split())) + \
|
|
len(set(target_album.split()) & set(norm_file_album.split()))
|
|
|
|
if score > best_match_score:
|
|
best_match_score = score
|
|
best_file = cover_file
|
|
|
|
# If we found a good match, return it
|
|
if best_match_score >= 2: # At least 2 word matches
|
|
return best_file
|
|
|
|
return None
|
|
|
|
def main():
|
|
covers_dir = Path('covers')
|
|
backup_dir = Path('covers_backup')
|
|
|
|
if not covers_dir.exists():
|
|
print("No covers directory found!")
|
|
return
|
|
|
|
# Create backup directory
|
|
if backup_dir.exists():
|
|
print("Backup directory already exists. Removing it...")
|
|
shutil.rmtree(backup_dir)
|
|
|
|
print("Creating backup of existing covers...")
|
|
shutil.copytree(covers_dir, backup_dir)
|
|
print(f"Backup created at {backup_dir}")
|
|
|
|
# Get list of existing cover files
|
|
existing_covers = [f for f in os.listdir(covers_dir) if f.endswith('.jpg')]
|
|
print(f"Found {len(existing_covers)} existing cover files")
|
|
|
|
# Load current CSV
|
|
csv_file = 'top_500_albums_2023.csv'
|
|
if not os.path.exists(csv_file):
|
|
print(f"Error: {csv_file} not found!")
|
|
return
|
|
|
|
new_covers_dir = Path('covers_new')
|
|
if new_covers_dir.exists():
|
|
shutil.rmtree(new_covers_dir)
|
|
new_covers_dir.mkdir()
|
|
|
|
mapped_count = 0
|
|
unmapped_count = 0
|
|
unmapped_albums = []
|
|
|
|
print("\nMapping covers to new rankings...")
|
|
|
|
with open(csv_file, 'r', encoding='utf-8') as file:
|
|
csv_reader = csv.DictReader(file)
|
|
|
|
for row in csv_reader:
|
|
rank = row.get('Rank', '').strip()
|
|
artist = row.get('Artist', '').strip()
|
|
album = row.get('Album', '').strip()
|
|
|
|
if not artist or not album:
|
|
continue
|
|
|
|
# Find matching existing cover
|
|
matching_cover = find_matching_cover(artist, album, existing_covers)
|
|
|
|
if matching_cover:
|
|
# Create new filename with correct ranking
|
|
safe_artist = sanitize_filename(artist)
|
|
safe_album = sanitize_filename(album)
|
|
new_filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg"
|
|
|
|
# Copy file with new name
|
|
old_path = covers_dir / matching_cover
|
|
new_path = new_covers_dir / new_filename
|
|
|
|
shutil.copy2(old_path, new_path)
|
|
mapped_count += 1
|
|
|
|
if mapped_count % 50 == 0:
|
|
print(f"✓ Mapped {mapped_count} covers so far...")
|
|
|
|
else:
|
|
unmapped_count += 1
|
|
unmapped_albums.append(f"{rank}. {artist} - {album}")
|
|
print(f"✗ No cover found for: {rank}. {artist} - {album}")
|
|
|
|
print(f"\n🎉 MAPPING RESULTS:")
|
|
print(f"Successfully mapped: {mapped_count}")
|
|
print(f"Could not map: {unmapped_count}")
|
|
print(f"Total albums: {mapped_count + unmapped_count}")
|
|
print(f"Success rate: {mapped_count/(mapped_count + unmapped_count)*100:.1f}%")
|
|
|
|
if unmapped_albums:
|
|
print(f"\n❌ Albums without covers ({len(unmapped_albums)}):")
|
|
for album in unmapped_albums[:10]:
|
|
print(f" {album}")
|
|
if len(unmapped_albums) > 10:
|
|
print(f" ... and {len(unmapped_albums) - 10} more")
|
|
|
|
# Replace old covers directory with new one
|
|
print(f"\nReplacing covers directory...")
|
|
shutil.rmtree(covers_dir)
|
|
shutil.move(new_covers_dir, covers_dir)
|
|
|
|
print(f"✅ Cover remapping complete!")
|
|
print(f"Original covers backed up to: {backup_dir}")
|
|
print(f"New covers available in: {covers_dir}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |