#!/usr/bin/env python3 """ Download album cover images for Top 500 Albums using iTunes Search API """ import requests import csv import os import re import time import json from urllib.parse import quote from urllib.request import urlretrieve def sanitize_filename(text): """Remove or replace characters that aren't valid in filenames""" # Remove/replace problematic characters text = re.sub(r'[<>:"/\\|?*]', '', text) text = re.sub(r'[^\w\s\-_\.]', '', text) text = re.sub(r'\s+', '_', text.strip()) return text[:100] # Limit length def search_itunes(artist, album): """Search iTunes API for album artwork""" # Clean up search terms search_term = f"{artist} {album}".strip() search_term = re.sub(r'\([^)]*\)', '', search_term) # Remove parentheses content search_term = re.sub(r'\s+', ' ', search_term).strip() url = "https://itunes.apple.com/search" params = { 'term': search_term, 'media': 'music', 'entity': 'album', 'limit': 5 } try: response = requests.get(url, params=params, timeout=10) response.raise_for_status() data = response.json() if data['resultCount'] > 0: # Try to find the best match for result in data['results']: result_artist = result.get('artistName', '').lower() result_album = result.get('collectionName', '').lower() # Simple matching - check if artist and album names are similar if (artist.lower() in result_artist or result_artist in artist.lower()) and \ (album.lower() in result_album or result_album in album.lower()): artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600') return artwork_url # If no exact match, return the first result first_result = data['results'][0] artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600') return artwork_url except Exception as e: print(f"Error searching for {artist} - {album}: {e}") return None return None def download_album_covers(): """Main function to download all album covers""" # Create covers directory covers_dir = 'covers' if not os.path.exists(covers_dir): os.makedirs(covers_dir) # Read the CSV file csv_file = 'top_500_albums_2023.csv' if not os.path.exists(csv_file): print(f"Error: {csv_file} not found!") return albums_processed = 0 albums_found = 0 albums_downloaded = 0 # Keep track of what we've processed log_file = 'download_log.json' processed_albums = {} # Load existing log if it exists if os.path.exists(log_file): with open(log_file, 'r') as f: processed_albums = json.load(f) with open(csv_file, 'r', encoding='utf-8') as file: csv_reader = csv.DictReader(file) for row in csv_reader: rank = row.get('Rank', '').strip() artist = row.get('Artist', '').strip() album = row.get('Album', '').strip() if not artist or not album: continue albums_processed += 1 # Create filename safe_artist = sanitize_filename(artist) safe_album = sanitize_filename(album) filename = f"rank_{rank:03d}_{safe_artist}_{safe_album}.jpg" filepath = os.path.join(covers_dir, filename) # Skip if already downloaded if os.path.exists(filepath): print(f"✓ Already exists: {rank}. {artist} - {album}") albums_downloaded += 1 continue # Skip if we've already tried and failed album_key = f"{artist}_{album}" if album_key in processed_albums and not processed_albums[album_key]: print(f"⚠ Previously failed: {rank}. {artist} - {album}") continue print(f"Searching: {rank}. {artist} - {album}") # Search for artwork artwork_url = search_itunes(artist, album) if artwork_url: try: print(f" Downloading from: {artwork_url}") urlretrieve(artwork_url, filepath) print(f" ✓ Downloaded: {filename}") albums_found += 1 albums_downloaded += 1 processed_albums[album_key] = True except Exception as e: print(f" ✗ Download failed: {e}") processed_albums[album_key] = False else: print(f" ✗ No artwork found") processed_albums[album_key] = False # Save progress with open(log_file, 'w') as f: json.dump(processed_albums, f, indent=2) # Be nice to the API time.sleep(0.5) # Progress update if albums_processed % 25 == 0: print(f"\nProgress: {albums_processed}/500 processed, {albums_found} found, {albums_downloaded} downloaded\n") print(f"\nFinal Results:") print(f"Albums processed: {albums_processed}") print(f"Artwork found: {albums_found}") print(f"Total downloaded: {albums_downloaded}") print(f"Success rate: {albums_found/albums_processed*100:.1f}%") def create_missing_report(): """Create a report of albums without covers""" covers_dir = 'covers' missing_albums = [] with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file: csv_reader = csv.DictReader(file) for row in csv_reader: rank = row.get('Rank', '').strip() artist = row.get('Artist', '').strip() album = row.get('Album', '').strip() safe_artist = sanitize_filename(artist) safe_album = sanitize_filename(album) filename = f"rank_{rank:03d}_{safe_artist}_{safe_album}.jpg" filepath = os.path.join(covers_dir, filename) if not os.path.exists(filepath): missing_albums.append({ 'rank': rank, 'artist': artist, 'album': album, 'filename': filename }) print(f"\nMissing covers: {len(missing_albums)}") if missing_albums: with open('missing_covers.csv', 'w', newline='', encoding='utf-8') as file: writer = csv.DictWriter(file, fieldnames=['rank', 'artist', 'album', 'filename']) writer.writeheader() writer.writerows(missing_albums) print("Created missing_covers.csv with list of albums without artwork") if __name__ == "__main__": print("Top 500 Albums Cover Art Downloader") print("===================================") choice = input("Choose option:\n1. Download covers\n2. Create missing report\n3. Both\nEnter choice (1-3): ") if choice in ['1', '3']: download_album_covers() if choice in ['2', '3']: create_missing_report() print("\nDone!")