#!/usr/bin/env python3 """ Download album cover images for Top 500 Albums using iTunes Search API Simple version using only built-in Python modules """ import urllib.request import urllib.parse import json import csv import os import re import time def sanitize_filename(text): """Remove or replace characters that aren't valid in filenames""" # Remove/replace problematic characters text = re.sub(r'[<>:"/\\|?*]', '', text) text = re.sub(r'[^\w\s\-_\.]', '', text) text = re.sub(r'\s+', '_', text.strip()) return text[:100] # Limit length def search_itunes(artist, album): """Search iTunes API for album artwork""" # Clean up search terms search_term = f"{artist} {album}".strip() search_term = re.sub(r'\([^)]*\)', '', search_term) # Remove parentheses content search_term = re.sub(r'\s+', ' ', search_term).strip() # URL encode the search term encoded_term = urllib.parse.quote(search_term) url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5" try: with urllib.request.urlopen(url, timeout=10) as response: data = json.loads(response.read().decode()) if data['resultCount'] > 0: # Try to find the best match for result in data['results']: result_artist = result.get('artistName', '').lower() result_album = result.get('collectionName', '').lower() # Simple matching - check if artist and album names are similar if (artist.lower() in result_artist or result_artist in artist.lower()) and \ (album.lower() in result_album or result_album in album.lower()): artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600') return artwork_url # If no exact match, return the first result first_result = data['results'][0] artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600') return artwork_url except Exception as e: print(f"Error searching for {artist} - {album}: {e}") return None return None def download_sample_covers(limit=10): """Download a sample of album covers to test the system""" # Create covers directory covers_dir = 'covers' if not os.path.exists(covers_dir): os.makedirs(covers_dir) # Read the CSV file csv_file = 'top_500_albums_2023.csv' if not os.path.exists(csv_file): print(f"Error: {csv_file} not found!") return albums_processed = 0 albums_found = 0 print(f"Downloading sample of {limit} album covers...") with open(csv_file, 'r', encoding='utf-8') as file: csv_reader = csv.DictReader(file) for row in csv_reader: if albums_processed >= limit: break rank = row.get('Rank', '').strip() artist = row.get('Artist', '').strip() album = row.get('Album', '').strip() if not artist or not album: continue albums_processed += 1 # Create filename safe_artist = sanitize_filename(artist) safe_album = sanitize_filename(album) filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg" filepath = os.path.join(covers_dir, filename) # Skip if already downloaded if os.path.exists(filepath): print(f"✓ Already exists: {rank}. {artist} - {album}") albums_found += 1 continue print(f"Searching: {rank}. {artist} - {album}") # Search for artwork artwork_url = search_itunes(artist, album) if artwork_url: try: print(f" Downloading from: {artwork_url}") urllib.request.urlretrieve(artwork_url, filepath) print(f" ✓ Downloaded: {filename}") albums_found += 1 except Exception as e: print(f" ✗ Download failed: {e}") else: print(f" ✗ No artwork found") # Be nice to the API time.sleep(1) print(f"\nSample Results:") print(f"Albums processed: {albums_processed}") print(f"Artwork found: {albums_found}") print(f"Success rate: {albums_found/albums_processed*100:.1f}%") def download_top_albums(limit=50): """Download covers for top N albums""" # Create covers directory covers_dir = 'covers' if not os.path.exists(covers_dir): os.makedirs(covers_dir) # Read the CSV file csv_file = 'top_500_albums_2023.csv' if not os.path.exists(csv_file): print(f"Error: {csv_file} not found!") return albums_processed = 0 albums_found = 0 print(f"Downloading covers for top {limit} albums...") with open(csv_file, 'r', encoding='utf-8') as file: csv_reader = csv.DictReader(file) # Sort by rank to get top albums first rows = list(csv_reader) rows.sort(key=lambda x: int(x.get('Rank', 999))) for row in rows[:limit]: rank = row.get('Rank', '').strip() artist = row.get('Artist', '').strip() album = row.get('Album', '').strip() if not artist or not album: continue albums_processed += 1 # Create filename safe_artist = sanitize_filename(artist) safe_album = sanitize_filename(album) filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg" filepath = os.path.join(covers_dir, filename) # Skip if already downloaded if os.path.exists(filepath): print(f"✓ Already exists: {rank}. {artist} - {album}") albums_found += 1 continue print(f"Searching: {rank}. {artist} - {album}") # Search for artwork artwork_url = search_itunes(artist, album) if artwork_url: try: print(f" Downloading from: {artwork_url}") urllib.request.urlretrieve(artwork_url, filepath) print(f" ✓ Downloaded: {filename}") albums_found += 1 except Exception as e: print(f" ✗ Download failed: {e}") else: print(f" ✗ No artwork found") # Be nice to the API time.sleep(1) print(f"\nTop {limit} Results:") print(f"Albums processed: {albums_processed}") print(f"Artwork found: {albums_found}") print(f"Success rate: {albums_found/albums_processed*100:.1f}%") if __name__ == "__main__": print("Top 500 Albums Cover Art Downloader (Simple Version)") print("==================================================") choice = input("Choose option:\n1. Download sample (10 albums)\n2. Download top 50 albums\n3. Download top 100 albums\nEnter choice (1-3): ") if choice == '1': download_sample_covers(10) elif choice == '2': download_top_albums(50) elif choice == '3': download_top_albums(100) else: print("Invalid choice!") print("\nDone!")