#!/usr/bin/env python3 """ Download album covers for ALL 500 albums using iTunes Search API """ import urllib.request import urllib.parse import json import csv import os import re import time def sanitize_filename(text): """Remove or replace characters that aren't valid in filenames""" # Remove/replace problematic characters text = re.sub(r'[<>:"/\\|?*]', '', text) text = re.sub(r'[^\w\s\-_\.]', '', text) text = re.sub(r'\s+', '_', text.strip()) return text[:100] # Limit length def search_itunes(artist, album): """Search iTunes API for album artwork""" # Clean up search terms search_term = f"{artist} {album}".strip() search_term = re.sub(r'\([^)]*\)', '', search_term) # Remove parentheses content search_term = re.sub(r'\s+', ' ', search_term).strip() # URL encode the search term encoded_term = urllib.parse.quote(search_term) url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5" try: with urllib.request.urlopen(url, timeout=15) as response: data = json.loads(response.read().decode()) if data['resultCount'] > 0: # Try to find the best match for result in data['results']: result_artist = result.get('artistName', '').lower() result_album = result.get('collectionName', '').lower() # Simple matching - check if artist and album names are similar if (artist.lower() in result_artist or result_artist in artist.lower()) and \ (album.lower() in result_album or result_album in album.lower()): artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600') return artwork_url # If no exact match, return the first result first_result = data['results'][0] artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600') return artwork_url except Exception as e: print(f"Error searching for {artist} - {album}: {e}") return None return None def download_all_covers(): """Download covers for all 500 albums""" # Create covers directory covers_dir = 'covers' if not os.path.exists(covers_dir): os.makedirs(covers_dir) # Read the CSV file csv_file = 'top_500_albums_2023.csv' if not os.path.exists(csv_file): print(f"Error: {csv_file} not found!") return albums_processed = 0 albums_found = 0 albums_skipped = 0 failed_albums = [] print("Downloading covers for ALL 500 albums...") print("This will take a while to be respectful to the iTunes API...\n") with open(csv_file, 'r', encoding='utf-8') as file: csv_reader = csv.DictReader(file) # Convert to list and sort by rank rows = list(csv_reader) rows.sort(key=lambda x: int(x.get('Rank', 999))) for i, row in enumerate(rows): rank = row.get('Rank', '').strip() artist = row.get('Artist', '').strip() album = row.get('Album', '').strip() if not artist or not album: continue albums_processed += 1 # Create filename safe_artist = sanitize_filename(artist) safe_album = sanitize_filename(album) filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg" filepath = os.path.join(covers_dir, filename) # Skip if already downloaded if os.path.exists(filepath): albums_skipped += 1 if albums_processed % 25 == 0: print(f"āœ“ Already exists: {rank}. {artist} - {album}") continue print(f"Searching [{albums_processed}/500]: {rank}. {artist} - {album}") # Search for artwork artwork_url = search_itunes(artist, album) if artwork_url: try: print(f" Downloading from: {artwork_url}") urllib.request.urlretrieve(artwork_url, filepath) print(f" āœ“ Downloaded: {filename}") albums_found += 1 except Exception as e: print(f" āœ— Download failed: {e}") failed_albums.append(f"{rank}. {artist} - {album}") else: print(f" āœ— No artwork found") failed_albums.append(f"{rank}. {artist} - {album}") # Be nice to the API - longer delay for bulk download time.sleep(1.2) # Progress update every 25 albums if albums_processed % 25 == 0: print(f"\n--- Progress Update ---") print(f"Processed: {albums_processed}/500 ({albums_processed/500*100:.1f}%)") print(f"Found: {albums_found}") print(f"Skipped (already exist): {albums_skipped}") print(f"Failed: {len(failed_albums)}") print(f"Success rate: {albums_found/(albums_processed-albums_skipped)*100:.1f}%") print("----------------------\n") print(f"\nšŸŽ‰ FINAL RESULTS:") print(f"Albums processed: {albums_processed}") print(f"Already existed: {albums_skipped}") print(f"New downloads: {albums_found}") print(f"Failed to find: {len(failed_albums)}") print(f"Total covers available: {albums_found + albums_skipped}") print(f"Overall success rate: {(albums_found + albums_skipped)/albums_processed*100:.1f}%") if failed_albums: print(f"\nāŒ Failed albums ({len(failed_albums)}):") for album in failed_albums[:10]: # Show first 10 print(f" {album}") if len(failed_albums) > 10: print(f" ... and {len(failed_albums) - 10} more") # Save failed albums to file with open('failed_downloads.txt', 'w') as f: for album in failed_albums: f.write(f"{album}\n") print(f"\nFull list saved to failed_downloads.txt") if __name__ == "__main__": print("šŸŽµ Top 500 Albums - Complete Cover Art Downloader") print("=" * 50) confirm = input("This will download covers for all 500 albums.\nThis may take 10-15 minutes. Continue? (y/n): ") if confirm.lower() in ['y', 'yes']: download_all_covers() else: print("Download cancelled.") print("\nDone!")