Organize repository by moving all scripts to scripts/ folder
- Moved all Python processing scripts to scripts/ directory for better organization - Preserves git history using git mv command - Clean separation between main project files and utility scripts 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
462fdcfa84
commit
872fdfa0ee
15 changed files with 0 additions and 0 deletions
176
remap_covers.py
176
remap_covers.py
|
|
@ -1,176 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to remap existing cover art files to match the new CSV ranking structure.
|
||||
This avoids having to re-download all the covers.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
def sanitize_filename(text):
|
||||
"""Remove or replace characters that aren't valid in filenames"""
|
||||
# Remove/replace problematic characters
|
||||
text = re.sub(r'[<>:"/\\|?*]', '', text)
|
||||
text = re.sub(r'[^\w\s\-_\.]', '', text)
|
||||
text = re.sub(r'\s+', '_', text.strip())
|
||||
return text[:100] # Limit length
|
||||
|
||||
def normalize_for_matching(text):
|
||||
"""Normalize text for matching album/artist names"""
|
||||
text = text.lower().strip()
|
||||
# Remove common punctuation and normalize
|
||||
text = re.sub(r'[^\w\s&]', '', text)
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
# Handle common variations
|
||||
text = text.replace(' and ', ' ').replace(' & ', ' ')
|
||||
# Remove "the" from start
|
||||
if text.startswith('the '):
|
||||
text = text[4:]
|
||||
return text
|
||||
|
||||
def find_matching_cover(artist, album, existing_covers):
|
||||
"""Find existing cover file that matches this artist/album"""
|
||||
target_artist = normalize_for_matching(artist)
|
||||
target_album = normalize_for_matching(album)
|
||||
|
||||
for cover_file in existing_covers:
|
||||
# Extract artist and album from filename
|
||||
# Format: rank_XXX_Artist_Album.jpg
|
||||
parts = cover_file.replace('.jpg', '').split('_')
|
||||
if len(parts) < 4:
|
||||
continue
|
||||
|
||||
# Skip rank part, reconstruct artist and album
|
||||
file_parts = parts[2:] # Skip "rank" and "XXX"
|
||||
|
||||
# Find where artist ends and album begins (tricky!)
|
||||
# We'll try different splits and see which gives best match
|
||||
best_match_score = 0
|
||||
best_file = None
|
||||
|
||||
for split_point in range(1, len(file_parts)):
|
||||
file_artist = '_'.join(file_parts[:split_point])
|
||||
file_album = '_'.join(file_parts[split_point:])
|
||||
|
||||
norm_file_artist = normalize_for_matching(file_artist.replace('_', ' '))
|
||||
norm_file_album = normalize_for_matching(file_album.replace('_', ' '))
|
||||
|
||||
# Calculate match score
|
||||
artist_match = target_artist in norm_file_artist or norm_file_artist in target_artist
|
||||
album_match = target_album in norm_file_album or norm_file_album in target_album
|
||||
|
||||
if artist_match and album_match:
|
||||
# Calculate more precise score
|
||||
score = len(set(target_artist.split()) & set(norm_file_artist.split())) + \
|
||||
len(set(target_album.split()) & set(norm_file_album.split()))
|
||||
|
||||
if score > best_match_score:
|
||||
best_match_score = score
|
||||
best_file = cover_file
|
||||
|
||||
# If we found a good match, return it
|
||||
if best_match_score >= 2: # At least 2 word matches
|
||||
return best_file
|
||||
|
||||
return None
|
||||
|
||||
def main():
|
||||
covers_dir = Path('covers')
|
||||
backup_dir = Path('covers_backup')
|
||||
|
||||
if not covers_dir.exists():
|
||||
print("No covers directory found!")
|
||||
return
|
||||
|
||||
# Create backup directory
|
||||
if backup_dir.exists():
|
||||
print("Backup directory already exists. Removing it...")
|
||||
shutil.rmtree(backup_dir)
|
||||
|
||||
print("Creating backup of existing covers...")
|
||||
shutil.copytree(covers_dir, backup_dir)
|
||||
print(f"Backup created at {backup_dir}")
|
||||
|
||||
# Get list of existing cover files
|
||||
existing_covers = [f for f in os.listdir(covers_dir) if f.endswith('.jpg')]
|
||||
print(f"Found {len(existing_covers)} existing cover files")
|
||||
|
||||
# Load current CSV
|
||||
csv_file = 'top_500_albums_2023.csv'
|
||||
if not os.path.exists(csv_file):
|
||||
print(f"Error: {csv_file} not found!")
|
||||
return
|
||||
|
||||
new_covers_dir = Path('covers_new')
|
||||
if new_covers_dir.exists():
|
||||
shutil.rmtree(new_covers_dir)
|
||||
new_covers_dir.mkdir()
|
||||
|
||||
mapped_count = 0
|
||||
unmapped_count = 0
|
||||
unmapped_albums = []
|
||||
|
||||
print("\nMapping covers to new rankings...")
|
||||
|
||||
with open(csv_file, 'r', encoding='utf-8') as file:
|
||||
csv_reader = csv.DictReader(file)
|
||||
|
||||
for row in csv_reader:
|
||||
rank = row.get('Rank', '').strip()
|
||||
artist = row.get('Artist', '').strip()
|
||||
album = row.get('Album', '').strip()
|
||||
|
||||
if not artist or not album:
|
||||
continue
|
||||
|
||||
# Find matching existing cover
|
||||
matching_cover = find_matching_cover(artist, album, existing_covers)
|
||||
|
||||
if matching_cover:
|
||||
# Create new filename with correct ranking
|
||||
safe_artist = sanitize_filename(artist)
|
||||
safe_album = sanitize_filename(album)
|
||||
new_filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg"
|
||||
|
||||
# Copy file with new name
|
||||
old_path = covers_dir / matching_cover
|
||||
new_path = new_covers_dir / new_filename
|
||||
|
||||
shutil.copy2(old_path, new_path)
|
||||
mapped_count += 1
|
||||
|
||||
if mapped_count % 50 == 0:
|
||||
print(f"✓ Mapped {mapped_count} covers so far...")
|
||||
|
||||
else:
|
||||
unmapped_count += 1
|
||||
unmapped_albums.append(f"{rank}. {artist} - {album}")
|
||||
print(f"✗ No cover found for: {rank}. {artist} - {album}")
|
||||
|
||||
print(f"\n🎉 MAPPING RESULTS:")
|
||||
print(f"Successfully mapped: {mapped_count}")
|
||||
print(f"Could not map: {unmapped_count}")
|
||||
print(f"Total albums: {mapped_count + unmapped_count}")
|
||||
print(f"Success rate: {mapped_count/(mapped_count + unmapped_count)*100:.1f}%")
|
||||
|
||||
if unmapped_albums:
|
||||
print(f"\n❌ Albums without covers ({len(unmapped_albums)}):")
|
||||
for album in unmapped_albums[:10]:
|
||||
print(f" {album}")
|
||||
if len(unmapped_albums) > 10:
|
||||
print(f" ... and {len(unmapped_albums) - 10} more")
|
||||
|
||||
# Replace old covers directory with new one
|
||||
print(f"\nReplacing covers directory...")
|
||||
shutil.rmtree(covers_dir)
|
||||
shutil.move(new_covers_dir, covers_dir)
|
||||
|
||||
print(f"✅ Cover remapping complete!")
|
||||
print(f"Original covers backed up to: {backup_dir}")
|
||||
print(f"New covers available in: {covers_dir}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue