Add cover art for all 23 dropped albums (ranks 501-523)

- Downloaded high-quality album artwork from iTunes API - 100% success rate for all dropped albums - Includes iconic covers: Sgt. Pepper's, White Album, Wu-Tang, Ziggy Stardust - Also covers Hendrix, Beyoncé, Prince, Sex Pistols, and more - Complete visual coverage for entire extended dataset (523 albums) - New download script for dropped albums added to scripts/ All dropped albums now have proper cover art display 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 00:55:37 +02:00 · 2025-07-01 00:55:37 +02:00 · 016e6d9a40
commit 016e6d9a40
parent 3cf9d74eae
24 changed files with 156 additions and 0 deletions
--- a/scripts/download_dropped_covers.py
+++ b/scripts/download_dropped_covers.py
@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""
+Download cover art for the dropped albums (ranks 501-523).
+Uses iTunes API to search for and download album artwork.
+"""
+
+import csv
+import urllib.request
+import urllib.parse
+import json
+import time
+import os
+import re
+
+def sanitize_filename(text):
+    """Sanitize text for use in filenames"""
+    # Remove problematic characters and replace with safe alternatives
+    text = re.sub(r'[<>:"/\\|?*]', '', text)
+    text = re.sub(r'[^\w\s\-_.]', '', text)
+    text = re.sub(r'\s+', '_', text)
+    return text[:100]  # Limit length
+
+def search_itunes(artist, album):
+    """Search iTunes API for album artwork"""
+    try:
+        # Clean up search terms
+        search_term = f"{artist} {album}".strip()
+        encoded_term = urllib.parse.quote(search_term)
+        
+        url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5"
+        
+        with urllib.request.urlopen(url, timeout=10) as response:
+            data = json.loads(response.read().decode())
+            
+            if data['resultCount'] > 0:
+                for result in data['results']:
+                    # Check if this result matches our search
+                    result_artist = result.get('artistName', '').lower()
+                    result_album = result.get('collectionName', '').lower()
+                    
+                    # Fuzzy matching - check if key terms are present
+                    artist_words = artist.lower().split()
+                    album_words = album.lower().split()
+                    
+                    artist_match = any(word in result_artist for word in artist_words if len(word) > 2)
+                    album_match = any(word in result_album for word in album_words if len(word) > 2)
+                    
+                    if artist_match and album_match:
+                        artwork_url = result.get('artworkUrl100', '')
+                        if artwork_url:
+                            # Get high resolution version
+                            high_res_url = artwork_url.replace('100x100bb', '600x600bb')
+                            return high_res_url
+                
+                # If no good match, return the first result's artwork
+                first_result = data['results'][0]
+                artwork_url = first_result.get('artworkUrl100', '')
+                if artwork_url:
+                    return artwork_url.replace('100x100bb', '600x600bb')
+    
+    except Exception as e:
+        print(f"   Error searching iTunes for {artist} - {album}: {e}")
+    
+    return None
+
+def download_cover(url, filepath):
+    """Download cover image from URL"""
+    try:
+        with urllib.request.urlopen(url, timeout=15) as response:
+            with open(filepath, 'wb') as f:
+                f.write(response.read())
+        return True
+    except Exception as e:
+        print(f"   Error downloading {filepath}: {e}")
+        return False
+
+def main():
+    # Read the CSV to get dropped albums (ranks 501-523)
+    dropped_albums = []
+    
+    with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            rank = int(row['Rank'])
+            if rank >= 501:  # Dropped albums start at rank 501
+                dropped_albums.append(row)
+    
+    print(f"🎨 Found {len(dropped_albums)} dropped albums needing cover art")
+    print("📥 Starting download process...\n")
+    
+    # Create covers directory if it doesn't exist
+    os.makedirs('covers', exist_ok=True)
+    
+    success_count = 0
+    failed_downloads = []
+    
+    for i, album in enumerate(dropped_albums, 1):
+        rank = album['Rank']
+        artist = album['Artist']
+        album_name = album['Album']
+        
+        # Generate filename
+        safe_artist = sanitize_filename(artist)
+        safe_album = sanitize_filename(album_name)
+        rank_str = rank.zfill(3)
+        filename = f"rank_{rank_str}_{safe_artist}_{safe_album}.jpg"
+        filepath = os.path.join('covers', filename)
+        
+        print(f"[{i:2d}/{len(dropped_albums)}] #{rank} - {artist} - {album_name}")
+        
+        # Check if file already exists
+        if os.path.exists(filepath):
+            print(f"   ✓ Already exists: {filename}")
+            success_count += 1
+            continue
+        
+        # Search for artwork
+        artwork_url = search_itunes(artist, album_name)
+        
+        if artwork_url:
+            print(f"   🔍 Found artwork, downloading...")
+            
+            if download_cover(artwork_url, filepath):
+                print(f"   ✅ Downloaded: {filename}")
+                success_count += 1
+            else:
+                print(f"   ❌ Download failed: {filename}")
+                failed_downloads.append((rank, artist, album_name))
+        else:
+            print(f"   ❌ No artwork found: {filename}")
+            failed_downloads.append((rank, artist, album_name))
+        
+        # Rate limiting - be nice to iTunes API
+        time.sleep(1.2)
+    
+    print(f"\n🎉 Download complete!")
+    print(f"✅ Successfully downloaded: {success_count}/{len(dropped_albums)} covers")
+    
+    if failed_downloads:
+        print(f"❌ Failed downloads: {len(failed_downloads)}")
+        print("\nFailed albums:")
+        for rank, artist, album in failed_downloads:
+            print(f"   #{rank} - {artist} - {album}")
+        
+        # Save failed downloads to file
+        with open('failed_dropped_downloads.txt', 'w', encoding='utf-8') as f:
+            f.write("Failed to download cover art for these dropped albums:\n\n")
+            for rank, artist, album in failed_downloads:
+                f.write(f"#{rank} - {artist} - {album}\n")
+        print(f"\n📝 Failed downloads saved to: failed_dropped_downloads.txt")
+    
+    coverage_percentage = (success_count / len(dropped_albums)) * 100
+    print(f"\n📊 Coverage: {coverage_percentage:.1f}% of dropped albums have cover art")
+
+if __name__ == "__main__":
+    main()