Complete Top 500 Albums project with 100% data coverage and UI improvements

- Fixed Info/Description columns after regenerating CSV with clean Wikipedia data - Remapped and downloaded missing album covers to match new rankings - Modified website UI to show all description text without click-to-expand - Added comprehensive Info/Description for all 500 albums using research - Created multiple data processing scripts for album information completion - Achieved 100% data completion with descriptions ending "(by Claude)" for new content - All albums now have complete metadata and cover art 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 00:33:47 +02:00 · 2025-07-01 00:33:47 +02:00 · 462fdcfa84
commit 462fdcfa84
parent 09b5491f8a
500 changed files with 2323 additions and 502 deletions
--- a/remap_covers.py
+++ b/remap_covers.py
@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+"""
+Script to remap existing cover art files to match the new CSV ranking structure.
+This avoids having to re-download all the covers.
+"""
+
+import csv
+import os
+import re
+import shutil
+from pathlib import Path
+
+def sanitize_filename(text):
+    """Remove or replace characters that aren't valid in filenames"""
+    # Remove/replace problematic characters
+    text = re.sub(r'[<>:"/\\|?*]', '', text)
+    text = re.sub(r'[^\w\s\-_\.]', '', text)
+    text = re.sub(r'\s+', '_', text.strip())
+    return text[:100]  # Limit length
+
+def normalize_for_matching(text):
+    """Normalize text for matching album/artist names"""
+    text = text.lower().strip()
+    # Remove common punctuation and normalize
+    text = re.sub(r'[^\w\s&]', '', text)
+    text = re.sub(r'\s+', ' ', text)
+    # Handle common variations
+    text = text.replace(' and ', ' ').replace(' & ', ' ')
+    # Remove "the" from start
+    if text.startswith('the '):
+        text = text[4:]
+    return text
+
+def find_matching_cover(artist, album, existing_covers):
+    """Find existing cover file that matches this artist/album"""
+    target_artist = normalize_for_matching(artist)
+    target_album = normalize_for_matching(album)
+    
+    for cover_file in existing_covers:
+        # Extract artist and album from filename
+        # Format: rank_XXX_Artist_Album.jpg
+        parts = cover_file.replace('.jpg', '').split('_')
+        if len(parts) < 4:
+            continue
+            
+        # Skip rank part, reconstruct artist and album
+        file_parts = parts[2:]  # Skip "rank" and "XXX"
+        
+        # Find where artist ends and album begins (tricky!)
+        # We'll try different splits and see which gives best match
+        best_match_score = 0
+        best_file = None
+        
+        for split_point in range(1, len(file_parts)):
+            file_artist = '_'.join(file_parts[:split_point])
+            file_album = '_'.join(file_parts[split_point:])
+            
+            norm_file_artist = normalize_for_matching(file_artist.replace('_', ' '))
+            norm_file_album = normalize_for_matching(file_album.replace('_', ' '))
+            
+            # Calculate match score
+            artist_match = target_artist in norm_file_artist or norm_file_artist in target_artist
+            album_match = target_album in norm_file_album or norm_file_album in target_album
+            
+            if artist_match and album_match:
+                # Calculate more precise score
+                score = len(set(target_artist.split()) & set(norm_file_artist.split())) + \
+                       len(set(target_album.split()) & set(norm_file_album.split()))
+                
+                if score > best_match_score:
+                    best_match_score = score
+                    best_file = cover_file
+        
+        # If we found a good match, return it
+        if best_match_score >= 2:  # At least 2 word matches
+            return best_file
+    
+    return None
+
+def main():
+    covers_dir = Path('covers')
+    backup_dir = Path('covers_backup')
+    
+    if not covers_dir.exists():
+        print("No covers directory found!")
+        return
+    
+    # Create backup directory
+    if backup_dir.exists():
+        print("Backup directory already exists. Removing it...")
+        shutil.rmtree(backup_dir)
+    
+    print("Creating backup of existing covers...")
+    shutil.copytree(covers_dir, backup_dir)
+    print(f"Backup created at {backup_dir}")
+    
+    # Get list of existing cover files
+    existing_covers = [f for f in os.listdir(covers_dir) if f.endswith('.jpg')]
+    print(f"Found {len(existing_covers)} existing cover files")
+    
+    # Load current CSV
+    csv_file = 'top_500_albums_2023.csv'
+    if not os.path.exists(csv_file):
+        print(f"Error: {csv_file} not found!")
+        return
+    
+    new_covers_dir = Path('covers_new')
+    if new_covers_dir.exists():
+        shutil.rmtree(new_covers_dir)
+    new_covers_dir.mkdir()
+    
+    mapped_count = 0
+    unmapped_count = 0
+    unmapped_albums = []
+    
+    print("\nMapping covers to new rankings...")
+    
+    with open(csv_file, 'r', encoding='utf-8') as file:
+        csv_reader = csv.DictReader(file)
+        
+        for row in csv_reader:
+            rank = row.get('Rank', '').strip()
+            artist = row.get('Artist', '').strip()
+            album = row.get('Album', '').strip()
+            
+            if not artist or not album:
+                continue
+            
+            # Find matching existing cover
+            matching_cover = find_matching_cover(artist, album, existing_covers)
+            
+            if matching_cover:
+                # Create new filename with correct ranking
+                safe_artist = sanitize_filename(artist)
+                safe_album = sanitize_filename(album)
+                new_filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg"
+                
+                # Copy file with new name
+                old_path = covers_dir / matching_cover
+                new_path = new_covers_dir / new_filename
+                
+                shutil.copy2(old_path, new_path)
+                mapped_count += 1
+                
+                if mapped_count % 50 == 0:
+                    print(f"✓ Mapped {mapped_count} covers so far...")
+                
+            else:
+                unmapped_count += 1
+                unmapped_albums.append(f"{rank}. {artist} - {album}")
+                print(f"✗ No cover found for: {rank}. {artist} - {album}")
+    
+    print(f"\n🎉 MAPPING RESULTS:")
+    print(f"Successfully mapped: {mapped_count}")
+    print(f"Could not map: {unmapped_count}")
+    print(f"Total albums: {mapped_count + unmapped_count}")
+    print(f"Success rate: {mapped_count/(mapped_count + unmapped_count)*100:.1f}%")
+    
+    if unmapped_albums:
+        print(f"\n❌ Albums without covers ({len(unmapped_albums)}):")
+        for album in unmapped_albums[:10]:
+            print(f"  {album}")
+        if len(unmapped_albums) > 10:
+            print(f"  ... and {len(unmapped_albums) - 10} more")
+    
+    # Replace old covers directory with new one
+    print(f"\nReplacing covers directory...")
+    shutil.rmtree(covers_dir)
+    shutil.move(new_covers_dir, covers_dir)
+    
+    print(f"✅ Cover remapping complete!")
+    print(f"Original covers backed up to: {backup_dir}")
+    print(f"New covers available in: {covers_dir}")
+
+if __name__ == "__main__":
+    main()