Organize repository by moving all scripts to scripts/ folder

- Moved all Python processing scripts to scripts/ directory for better organization - Preserves git history using git mv command - Clean separation between main project files and utility scripts 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 00:36:08 +02:00 · 2025-07-01 00:36:08 +02:00 · 872fdfa0ee
commit 872fdfa0ee
parent 462fdcfa84
15 changed files with 0 additions and 0 deletions
--- a/scripts/download_all_covers.py
+++ b/scripts/download_all_covers.py
@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+"""
+Download album covers for ALL 500 albums using iTunes Search API
+"""
+
+import urllib.request
+import urllib.parse
+import json
+import csv
+import os
+import re
+import time
+
+def sanitize_filename(text):
+    """Remove or replace characters that aren't valid in filenames"""
+    # Remove/replace problematic characters
+    text = re.sub(r'[<>:"/\\|?*]', '', text)
+    text = re.sub(r'[^\w\s\-_\.]', '', text)
+    text = re.sub(r'\s+', '_', text.strip())
+    return text[:100]  # Limit length
+
+def search_itunes(artist, album):
+    """Search iTunes API for album artwork"""
+    # Clean up search terms
+    search_term = f"{artist} {album}".strip()
+    search_term = re.sub(r'\([^)]*\)', '', search_term)  # Remove parentheses content
+    search_term = re.sub(r'\s+', ' ', search_term).strip()
+    
+    # URL encode the search term
+    encoded_term = urllib.parse.quote(search_term)
+    url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5"
+    
+    try:
+        with urllib.request.urlopen(url, timeout=15) as response:
+            data = json.loads(response.read().decode())
+        
+        if data['resultCount'] > 0:
+            # Try to find the best match
+            for result in data['results']:
+                result_artist = result.get('artistName', '').lower()
+                result_album = result.get('collectionName', '').lower()
+                
+                # Simple matching - check if artist and album names are similar
+                if (artist.lower() in result_artist or result_artist in artist.lower()) and \
+                   (album.lower() in result_album or result_album in album.lower()):
+                    artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600')
+                    return artwork_url
+            
+            # If no exact match, return the first result
+            first_result = data['results'][0]
+            artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600')
+            return artwork_url
+            
+    except Exception as e:
+        print(f"Error searching for {artist} - {album}: {e}")
+        return None
+    
+    return None
+
+def download_all_covers():
+    """Download covers for all 500 albums"""
+    
+    # Create covers directory
+    covers_dir = 'covers'
+    if not os.path.exists(covers_dir):
+        os.makedirs(covers_dir)
+    
+    # Read the CSV file
+    csv_file = 'top_500_albums_2023.csv'
+    if not os.path.exists(csv_file):
+        print(f"Error: {csv_file} not found!")
+        return
+    
+    albums_processed = 0
+    albums_found = 0
+    albums_skipped = 0
+    failed_albums = []
+    
+    print("Downloading covers for ALL 500 albums...")
+    print("This will take a while to be respectful to the iTunes API...\n")
+    
+    with open(csv_file, 'r', encoding='utf-8') as file:
+        csv_reader = csv.DictReader(file)
+        
+        # Convert to list and sort by rank
+        rows = list(csv_reader)
+        rows.sort(key=lambda x: int(x.get('Rank', 999)))
+        
+        for i, row in enumerate(rows):
+            rank = row.get('Rank', '').strip()
+            artist = row.get('Artist', '').strip()
+            album = row.get('Album', '').strip()
+            
+            if not artist or not album:
+                continue
+            
+            albums_processed += 1
+            
+            # Create filename
+            safe_artist = sanitize_filename(artist)
+            safe_album = sanitize_filename(album)
+            filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg"
+            filepath = os.path.join(covers_dir, filename)
+            
+            # Skip if already downloaded
+            if os.path.exists(filepath):
+                albums_skipped += 1
+                if albums_processed % 25 == 0:
+                    print(f"✓ Already exists: {rank}. {artist} - {album}")
+                continue
+            
+            print(f"Searching [{albums_processed}/500]: {rank}. {artist} - {album}")
+            
+            # Search for artwork
+            artwork_url = search_itunes(artist, album)
+            
+            if artwork_url:
+                try:
+                    print(f"  Downloading from: {artwork_url}")
+                    urllib.request.urlretrieve(artwork_url, filepath)
+                    print(f"  ✓ Downloaded: {filename}")
+                    albums_found += 1
+                    
+                except Exception as e:
+                    print(f"  ✗ Download failed: {e}")
+                    failed_albums.append(f"{rank}. {artist} - {album}")
+            else:
+                print(f"  ✗ No artwork found")
+                failed_albums.append(f"{rank}. {artist} - {album}")
+            
+            # Be nice to the API - longer delay for bulk download
+            time.sleep(1.2)
+            
+            # Progress update every 25 albums
+            if albums_processed % 25 == 0:
+                print(f"\n--- Progress Update ---")
+                print(f"Processed: {albums_processed}/500 ({albums_processed/500*100:.1f}%)")
+                print(f"Found: {albums_found}")
+                print(f"Skipped (already exist): {albums_skipped}")
+                print(f"Failed: {len(failed_albums)}")
+                print(f"Success rate: {albums_found/(albums_processed-albums_skipped)*100:.1f}%")
+                print("----------------------\n")
+    
+    print(f"\n🎉 FINAL RESULTS:")
+    print(f"Albums processed: {albums_processed}")
+    print(f"Already existed: {albums_skipped}")
+    print(f"New downloads: {albums_found}")
+    print(f"Failed to find: {len(failed_albums)}")
+    print(f"Total covers available: {albums_found + albums_skipped}")
+    print(f"Overall success rate: {(albums_found + albums_skipped)/albums_processed*100:.1f}%")
+    
+    if failed_albums:
+        print(f"\n❌ Failed albums ({len(failed_albums)}):")
+        for album in failed_albums[:10]:  # Show first 10
+            print(f"  {album}")
+        if len(failed_albums) > 10:
+            print(f"  ... and {len(failed_albums) - 10} more")
+        
+        # Save failed albums to file
+        with open('failed_downloads.txt', 'w') as f:
+            for album in failed_albums:
+                f.write(f"{album}\n")
+        print(f"\nFull list saved to failed_downloads.txt")
+
+if __name__ == "__main__":
+    print("🎵 Top 500 Albums - Complete Cover Art Downloader")
+    print("=" * 50)
+    
+    confirm = input("This will download covers for all 500 albums.\nThis may take 10-15 minutes. Continue? (y/n): ")
+    
+    if confirm.lower() in ['y', 'yes']:
+        download_all_covers()
+    else:
+        print("Download cancelled.")
+    
+    print("\nDone!")