#!/usr/bin/env python3 """ Download cover art for the dropped albums (ranks 501-523). Uses iTunes API to search for and download album artwork. """ import csv import urllib.request import urllib.parse import json import time import os import re def sanitize_filename(text): """Sanitize text for use in filenames""" # Remove problematic characters and replace with safe alternatives text = re.sub(r'[<>:"/\\|?*]', '', text) text = re.sub(r'[^\w\s\-_.]', '', text) text = re.sub(r'\s+', '_', text) return text[:100] # Limit length def search_itunes(artist, album): """Search iTunes API for album artwork""" try: # Clean up search terms search_term = f"{artist} {album}".strip() encoded_term = urllib.parse.quote(search_term) url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5" with urllib.request.urlopen(url, timeout=10) as response: data = json.loads(response.read().decode()) if data['resultCount'] > 0: for result in data['results']: # Check if this result matches our search result_artist = result.get('artistName', '').lower() result_album = result.get('collectionName', '').lower() # Fuzzy matching - check if key terms are present artist_words = artist.lower().split() album_words = album.lower().split() artist_match = any(word in result_artist for word in artist_words if len(word) > 2) album_match = any(word in result_album for word in album_words if len(word) > 2) if artist_match and album_match: artwork_url = result.get('artworkUrl100', '') if artwork_url: # Get high resolution version high_res_url = artwork_url.replace('100x100bb', '600x600bb') return high_res_url # If no good match, return the first result's artwork first_result = data['results'][0] artwork_url = first_result.get('artworkUrl100', '') if artwork_url: return artwork_url.replace('100x100bb', '600x600bb') except Exception as e: print(f" Error searching iTunes for {artist} - {album}: {e}") return None def download_cover(url, filepath): """Download cover image from URL""" try: with urllib.request.urlopen(url, timeout=15) as response: with open(filepath, 'wb') as f: f.write(response.read()) return True except Exception as e: print(f" Error downloading {filepath}: {e}") return False def main(): # Read the CSV to get dropped albums (ranks 501-523) dropped_albums = [] with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file: reader = csv.DictReader(file) for row in reader: rank = int(row['Rank']) if rank >= 501: # Dropped albums start at rank 501 dropped_albums.append(row) print(f"šŸŽØ Found {len(dropped_albums)} dropped albums needing cover art") print("šŸ“„ Starting download process...\n") # Create covers directory if it doesn't exist os.makedirs('covers', exist_ok=True) success_count = 0 failed_downloads = [] for i, album in enumerate(dropped_albums, 1): rank = album['Rank'] artist = album['Artist'] album_name = album['Album'] # Generate filename safe_artist = sanitize_filename(artist) safe_album = sanitize_filename(album_name) rank_str = rank.zfill(3) filename = f"rank_{rank_str}_{safe_artist}_{safe_album}.jpg" filepath = os.path.join('covers', filename) print(f"[{i:2d}/{len(dropped_albums)}] #{rank} - {artist} - {album_name}") # Check if file already exists if os.path.exists(filepath): print(f" āœ“ Already exists: {filename}") success_count += 1 continue # Search for artwork artwork_url = search_itunes(artist, album_name) if artwork_url: print(f" šŸ” Found artwork, downloading...") if download_cover(artwork_url, filepath): print(f" āœ… Downloaded: {filename}") success_count += 1 else: print(f" āŒ Download failed: {filename}") failed_downloads.append((rank, artist, album_name)) else: print(f" āŒ No artwork found: {filename}") failed_downloads.append((rank, artist, album_name)) # Rate limiting - be nice to iTunes API time.sleep(1.2) print(f"\nšŸŽ‰ Download complete!") print(f"āœ… Successfully downloaded: {success_count}/{len(dropped_albums)} covers") if failed_downloads: print(f"āŒ Failed downloads: {len(failed_downloads)}") print("\nFailed albums:") for rank, artist, album in failed_downloads: print(f" #{rank} - {artist} - {album}") # Save failed downloads to file with open('failed_dropped_downloads.txt', 'w', encoding='utf-8') as f: f.write("Failed to download cover art for these dropped albums:\n\n") for rank, artist, album in failed_downloads: f.write(f"#{rank} - {artist} - {album}\n") print(f"\nšŸ“ Failed downloads saved to: failed_dropped_downloads.txt") coverage_percentage = (success_count / len(dropped_albums)) * 100 print(f"\nšŸ“Š Coverage: {coverage_percentage:.1f}% of dropped albums have cover art") if __name__ == "__main__": main()