Add cover art for all 23 dropped albums (ranks 501-523)
- Downloaded high-quality album artwork from iTunes API - 100% success rate for all dropped albums - Includes iconic covers: Sgt. Pepper's, White Album, Wu-Tang, Ziggy Stardust - Also covers Hendrix, Beyoncé, Prince, Sex Pistols, and more - Complete visual coverage for entire extended dataset (523 albums) - New download script for dropped albums added to scripts/ All dropped albums now have proper cover art display 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
3cf9d74eae
commit
016e6d9a40
24 changed files with 156 additions and 0 deletions
156
scripts/download_dropped_covers.py
Normal file
156
scripts/download_dropped_covers.py
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Download cover art for the dropped albums (ranks 501-523).
|
||||
Uses iTunes API to search for and download album artwork.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import json
|
||||
import time
|
||||
import os
|
||||
import re
|
||||
|
||||
def sanitize_filename(text):
|
||||
"""Sanitize text for use in filenames"""
|
||||
# Remove problematic characters and replace with safe alternatives
|
||||
text = re.sub(r'[<>:"/\\|?*]', '', text)
|
||||
text = re.sub(r'[^\w\s\-_.]', '', text)
|
||||
text = re.sub(r'\s+', '_', text)
|
||||
return text[:100] # Limit length
|
||||
|
||||
def search_itunes(artist, album):
|
||||
"""Search iTunes API for album artwork"""
|
||||
try:
|
||||
# Clean up search terms
|
||||
search_term = f"{artist} {album}".strip()
|
||||
encoded_term = urllib.parse.quote(search_term)
|
||||
|
||||
url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5"
|
||||
|
||||
with urllib.request.urlopen(url, timeout=10) as response:
|
||||
data = json.loads(response.read().decode())
|
||||
|
||||
if data['resultCount'] > 0:
|
||||
for result in data['results']:
|
||||
# Check if this result matches our search
|
||||
result_artist = result.get('artistName', '').lower()
|
||||
result_album = result.get('collectionName', '').lower()
|
||||
|
||||
# Fuzzy matching - check if key terms are present
|
||||
artist_words = artist.lower().split()
|
||||
album_words = album.lower().split()
|
||||
|
||||
artist_match = any(word in result_artist for word in artist_words if len(word) > 2)
|
||||
album_match = any(word in result_album for word in album_words if len(word) > 2)
|
||||
|
||||
if artist_match and album_match:
|
||||
artwork_url = result.get('artworkUrl100', '')
|
||||
if artwork_url:
|
||||
# Get high resolution version
|
||||
high_res_url = artwork_url.replace('100x100bb', '600x600bb')
|
||||
return high_res_url
|
||||
|
||||
# If no good match, return the first result's artwork
|
||||
first_result = data['results'][0]
|
||||
artwork_url = first_result.get('artworkUrl100', '')
|
||||
if artwork_url:
|
||||
return artwork_url.replace('100x100bb', '600x600bb')
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error searching iTunes for {artist} - {album}: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def download_cover(url, filepath):
|
||||
"""Download cover image from URL"""
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=15) as response:
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(response.read())
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f" Error downloading {filepath}: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
# Read the CSV to get dropped albums (ranks 501-523)
|
||||
dropped_albums = []
|
||||
|
||||
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
rank = int(row['Rank'])
|
||||
if rank >= 501: # Dropped albums start at rank 501
|
||||
dropped_albums.append(row)
|
||||
|
||||
print(f"🎨 Found {len(dropped_albums)} dropped albums needing cover art")
|
||||
print("📥 Starting download process...\n")
|
||||
|
||||
# Create covers directory if it doesn't exist
|
||||
os.makedirs('covers', exist_ok=True)
|
||||
|
||||
success_count = 0
|
||||
failed_downloads = []
|
||||
|
||||
for i, album in enumerate(dropped_albums, 1):
|
||||
rank = album['Rank']
|
||||
artist = album['Artist']
|
||||
album_name = album['Album']
|
||||
|
||||
# Generate filename
|
||||
safe_artist = sanitize_filename(artist)
|
||||
safe_album = sanitize_filename(album_name)
|
||||
rank_str = rank.zfill(3)
|
||||
filename = f"rank_{rank_str}_{safe_artist}_{safe_album}.jpg"
|
||||
filepath = os.path.join('covers', filename)
|
||||
|
||||
print(f"[{i:2d}/{len(dropped_albums)}] #{rank} - {artist} - {album_name}")
|
||||
|
||||
# Check if file already exists
|
||||
if os.path.exists(filepath):
|
||||
print(f" ✓ Already exists: {filename}")
|
||||
success_count += 1
|
||||
continue
|
||||
|
||||
# Search for artwork
|
||||
artwork_url = search_itunes(artist, album_name)
|
||||
|
||||
if artwork_url:
|
||||
print(f" 🔍 Found artwork, downloading...")
|
||||
|
||||
if download_cover(artwork_url, filepath):
|
||||
print(f" ✅ Downloaded: {filename}")
|
||||
success_count += 1
|
||||
else:
|
||||
print(f" ❌ Download failed: {filename}")
|
||||
failed_downloads.append((rank, artist, album_name))
|
||||
else:
|
||||
print(f" ❌ No artwork found: {filename}")
|
||||
failed_downloads.append((rank, artist, album_name))
|
||||
|
||||
# Rate limiting - be nice to iTunes API
|
||||
time.sleep(1.2)
|
||||
|
||||
print(f"\n🎉 Download complete!")
|
||||
print(f"✅ Successfully downloaded: {success_count}/{len(dropped_albums)} covers")
|
||||
|
||||
if failed_downloads:
|
||||
print(f"❌ Failed downloads: {len(failed_downloads)}")
|
||||
print("\nFailed albums:")
|
||||
for rank, artist, album in failed_downloads:
|
||||
print(f" #{rank} - {artist} - {album}")
|
||||
|
||||
# Save failed downloads to file
|
||||
with open('failed_dropped_downloads.txt', 'w', encoding='utf-8') as f:
|
||||
f.write("Failed to download cover art for these dropped albums:\n\n")
|
||||
for rank, artist, album in failed_downloads:
|
||||
f.write(f"#{rank} - {artist} - {album}\n")
|
||||
print(f"\n📝 Failed downloads saved to: failed_dropped_downloads.txt")
|
||||
|
||||
coverage_percentage = (success_count / len(dropped_albums)) * 100
|
||||
print(f"\n📊 Coverage: {coverage_percentage:.1f}% of dropped albums have cover art")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue