#!/usr/bin/env python3
"""
Download album covers for ALL 500 albums using iTunes Search API
"""

import urllib.request
import urllib.parse
import json
import csv
import os
import re
import time

def sanitize_filename(text):
    """Remove or replace characters that aren't valid in filenames"""
    # Remove/replace problematic characters
    text = re.sub(r'[<>:"/\\|?*]', '', text)
    text = re.sub(r'[^\w\s\-_\.]', '', text)
    text = re.sub(r'\s+', '_', text.strip())
    return text[:100]  # Limit length

def search_itunes(artist, album):
    """Search iTunes API for album artwork"""
    # Clean up search terms
    search_term = f"{artist} {album}".strip()
    search_term = re.sub(r'\([^)]*\)', '', search_term)  # Remove parentheses content
    search_term = re.sub(r'\s+', ' ', search_term).strip()
    
    # URL encode the search term
    encoded_term = urllib.parse.quote(search_term)
    url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5"
    
    try:
        with urllib.request.urlopen(url, timeout=15) as response:
            data = json.loads(response.read().decode())
        
        if data['resultCount'] > 0:
            # Try to find the best match
            for result in data['results']:
                result_artist = result.get('artistName', '').lower()
                result_album = result.get('collectionName', '').lower()
                
                # Simple matching - check if artist and album names are similar
                if (artist.lower() in result_artist or result_artist in artist.lower()) and \
                   (album.lower() in result_album or result_album in album.lower()):
                    artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600')
                    return artwork_url
            
            # If no exact match, return the first result
            first_result = data['results'][0]
            artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600')
            return artwork_url
            
    except Exception as e:
        print(f"Error searching for {artist} - {album}: {e}")
        return None
    
    return None

def download_all_covers():
    """Download covers for all 500 albums"""
    
    # Create covers directory
    covers_dir = 'covers'
    if not os.path.exists(covers_dir):
        os.makedirs(covers_dir)
    
    # Read the CSV file
    csv_file = 'top_500_albums_2023.csv'
    if not os.path.exists(csv_file):
        print(f"Error: {csv_file} not found!")
        return
    
    albums_processed = 0
    albums_found = 0
    albums_skipped = 0
    failed_albums = []
    
    print("Downloading covers for ALL 500 albums...")
    print("This will take a while to be respectful to the iTunes API...\n")
    
    with open(csv_file, 'r', encoding='utf-8') as file:
        csv_reader = csv.DictReader(file)
        
        # Convert to list and sort by rank
        rows = list(csv_reader)
        rows.sort(key=lambda x: int(x.get('Rank', 999)))
        
        for i, row in enumerate(rows):
            rank = row.get('Rank', '').strip()
            artist = row.get('Artist', '').strip()
            album = row.get('Album', '').strip()
            
            if not artist or not album:
                continue
            
            albums_processed += 1
            
            # Create filename
            safe_artist = sanitize_filename(artist)
            safe_album = sanitize_filename(album)
            filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg"
            filepath = os.path.join(covers_dir, filename)
            
            # Skip if already downloaded
            if os.path.exists(filepath):
                albums_skipped += 1
                if albums_processed % 25 == 0:
                    print(f"✓ Already exists: {rank}. {artist} - {album}")
                continue
            
            print(f"Searching [{albums_processed}/500]: {rank}. {artist} - {album}")
            
            # Search for artwork
            artwork_url = search_itunes(artist, album)
            
            if artwork_url:
                try:
                    print(f"  Downloading from: {artwork_url}")
                    urllib.request.urlretrieve(artwork_url, filepath)
                    print(f"  ✓ Downloaded: {filename}")
                    albums_found += 1
                    
                except Exception as e:
                    print(f"  ✗ Download failed: {e}")
                    failed_albums.append(f"{rank}. {artist} - {album}")
            else:
                print(f"  ✗ No artwork found")
                failed_albums.append(f"{rank}. {artist} - {album}")
            
            # Be nice to the API - longer delay for bulk download
            time.sleep(1.2)
            
            # Progress update every 25 albums
            if albums_processed % 25 == 0:
                print(f"\n--- Progress Update ---")
                print(f"Processed: {albums_processed}/500 ({albums_processed/500*100:.1f}%)")
                print(f"Found: {albums_found}")
                print(f"Skipped (already exist): {albums_skipped}")
                print(f"Failed: {len(failed_albums)}")
                print(f"Success rate: {albums_found/(albums_processed-albums_skipped)*100:.1f}%")
                print("----------------------\n")
    
    print(f"\n🎉 FINAL RESULTS:")
    print(f"Albums processed: {albums_processed}")
    print(f"Already existed: {albums_skipped}")
    print(f"New downloads: {albums_found}")
    print(f"Failed to find: {len(failed_albums)}")
    print(f"Total covers available: {albums_found + albums_skipped}")
    print(f"Overall success rate: {(albums_found + albums_skipped)/albums_processed*100:.1f}%")
    
    if failed_albums:
        print(f"\n❌ Failed albums ({len(failed_albums)}):")
        for album in failed_albums[:10]:  # Show first 10
            print(f"  {album}")
        if len(failed_albums) > 10:
            print(f"  ... and {len(failed_albums) - 10} more")
        
        # Save failed albums to file
        with open('failed_downloads.txt', 'w') as f:
            for album in failed_albums:
                f.write(f"{album}\n")
        print(f"\nFull list saved to failed_downloads.txt")

if __name__ == "__main__":
    print("🎵 Top 500 Albums - Complete Cover Art Downloader")
    print("=" * 50)
    
    confirm = input("This will download covers for all 500 albums.\nThis may take 10-15 minutes. Continue? (y/n): ")
    
    if confirm.lower() in ['y', 'yes']:
        download_all_covers()
    else:
        print("Download cancelled.")
    
    print("\nDone!")