Add cover art for all 23 dropped albums (ranks 501-523)

- Downloaded high-quality album artwork from iTunes API
- 100% success rate for all dropped albums
- Includes iconic covers: Sgt. Pepper's, White Album, Wu-Tang, Ziggy Stardust
- Also covers Hendrix, Beyoncé, Prince, Sex Pistols, and more
- Complete visual coverage for entire extended dataset (523 albums)
- New download script for dropped albums added to scripts/

All dropped albums now have proper cover art display

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Johan Lundberg 2025-07-01 00:55:37 +02:00
parent 3cf9d74eae
commit 016e6d9a40
24 changed files with 156 additions and 0 deletions

View file

@ -0,0 +1,156 @@
#!/usr/bin/env python3
"""
Download cover art for the dropped albums (ranks 501-523).
Uses iTunes API to search for and download album artwork.
"""
import csv
import urllib.request
import urllib.parse
import json
import time
import os
import re
def sanitize_filename(text):
"""Sanitize text for use in filenames"""
# Remove problematic characters and replace with safe alternatives
text = re.sub(r'[<>:"/\\|?*]', '', text)
text = re.sub(r'[^\w\s\-_.]', '', text)
text = re.sub(r'\s+', '_', text)
return text[:100] # Limit length
def search_itunes(artist, album):
"""Search iTunes API for album artwork"""
try:
# Clean up search terms
search_term = f"{artist} {album}".strip()
encoded_term = urllib.parse.quote(search_term)
url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5"
with urllib.request.urlopen(url, timeout=10) as response:
data = json.loads(response.read().decode())
if data['resultCount'] > 0:
for result in data['results']:
# Check if this result matches our search
result_artist = result.get('artistName', '').lower()
result_album = result.get('collectionName', '').lower()
# Fuzzy matching - check if key terms are present
artist_words = artist.lower().split()
album_words = album.lower().split()
artist_match = any(word in result_artist for word in artist_words if len(word) > 2)
album_match = any(word in result_album for word in album_words if len(word) > 2)
if artist_match and album_match:
artwork_url = result.get('artworkUrl100', '')
if artwork_url:
# Get high resolution version
high_res_url = artwork_url.replace('100x100bb', '600x600bb')
return high_res_url
# If no good match, return the first result's artwork
first_result = data['results'][0]
artwork_url = first_result.get('artworkUrl100', '')
if artwork_url:
return artwork_url.replace('100x100bb', '600x600bb')
except Exception as e:
print(f" Error searching iTunes for {artist} - {album}: {e}")
return None
def download_cover(url, filepath):
"""Download cover image from URL"""
try:
with urllib.request.urlopen(url, timeout=15) as response:
with open(filepath, 'wb') as f:
f.write(response.read())
return True
except Exception as e:
print(f" Error downloading {filepath}: {e}")
return False
def main():
# Read the CSV to get dropped albums (ranks 501-523)
dropped_albums = []
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
rank = int(row['Rank'])
if rank >= 501: # Dropped albums start at rank 501
dropped_albums.append(row)
print(f"🎨 Found {len(dropped_albums)} dropped albums needing cover art")
print("📥 Starting download process...\n")
# Create covers directory if it doesn't exist
os.makedirs('covers', exist_ok=True)
success_count = 0
failed_downloads = []
for i, album in enumerate(dropped_albums, 1):
rank = album['Rank']
artist = album['Artist']
album_name = album['Album']
# Generate filename
safe_artist = sanitize_filename(artist)
safe_album = sanitize_filename(album_name)
rank_str = rank.zfill(3)
filename = f"rank_{rank_str}_{safe_artist}_{safe_album}.jpg"
filepath = os.path.join('covers', filename)
print(f"[{i:2d}/{len(dropped_albums)}] #{rank} - {artist} - {album_name}")
# Check if file already exists
if os.path.exists(filepath):
print(f" ✓ Already exists: {filename}")
success_count += 1
continue
# Search for artwork
artwork_url = search_itunes(artist, album_name)
if artwork_url:
print(f" 🔍 Found artwork, downloading...")
if download_cover(artwork_url, filepath):
print(f" ✅ Downloaded: {filename}")
success_count += 1
else:
print(f" ❌ Download failed: {filename}")
failed_downloads.append((rank, artist, album_name))
else:
print(f" ❌ No artwork found: {filename}")
failed_downloads.append((rank, artist, album_name))
# Rate limiting - be nice to iTunes API
time.sleep(1.2)
print(f"\n🎉 Download complete!")
print(f"✅ Successfully downloaded: {success_count}/{len(dropped_albums)} covers")
if failed_downloads:
print(f"❌ Failed downloads: {len(failed_downloads)}")
print("\nFailed albums:")
for rank, artist, album in failed_downloads:
print(f" #{rank} - {artist} - {album}")
# Save failed downloads to file
with open('failed_dropped_downloads.txt', 'w', encoding='utf-8') as f:
f.write("Failed to download cover art for these dropped albums:\n\n")
for rank, artist, album in failed_downloads:
f.write(f"#{rank} - {artist} - {album}\n")
print(f"\n📝 Failed downloads saved to: failed_dropped_downloads.txt")
coverage_percentage = (success_count / len(dropped_albums)) * 100
print(f"\n📊 Coverage: {coverage_percentage:.1f}% of dropped albums have cover art")
if __name__ == "__main__":
main()