Organize repository by moving all scripts to scripts/ folder

- Moved all Python processing scripts to scripts/ directory for better organization - Preserves git history using git mv command - Clean separation between main project files and utility scripts 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 00:36:08 +02:00 · 2025-07-01 00:36:08 +02:00 · 872fdfa0ee
commit 872fdfa0ee
parent 462fdcfa84
15 changed files with 0 additions and 0 deletions
--- a/scripts/download_album_covers.py
+++ b/scripts/download_album_covers.py
@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+"""
+Download album cover images for Top 500 Albums using iTunes Search API
+"""
+
+import requests
+import csv
+import os
+import re
+import time
+import json
+from urllib.parse import quote
+from urllib.request import urlretrieve
+
+def sanitize_filename(text):
+    """Remove or replace characters that aren't valid in filenames"""
+    # Remove/replace problematic characters
+    text = re.sub(r'[<>:"/\\|?*]', '', text)
+    text = re.sub(r'[^\w\s\-_\.]', '', text)
+    text = re.sub(r'\s+', '_', text.strip())
+    return text[:100]  # Limit length
+
+def search_itunes(artist, album):
+    """Search iTunes API for album artwork"""
+    # Clean up search terms
+    search_term = f"{artist} {album}".strip()
+    search_term = re.sub(r'\([^)]*\)', '', search_term)  # Remove parentheses content
+    search_term = re.sub(r'\s+', ' ', search_term).strip()
+    
+    url = "https://itunes.apple.com/search"
+    params = {
+        'term': search_term,
+        'media': 'music',
+        'entity': 'album',
+        'limit': 5
+    }
+    
+    try:
+        response = requests.get(url, params=params, timeout=10)
+        response.raise_for_status()
+        data = response.json()
+        
+        if data['resultCount'] > 0:
+            # Try to find the best match
+            for result in data['results']:
+                result_artist = result.get('artistName', '').lower()
+                result_album = result.get('collectionName', '').lower()
+                
+                # Simple matching - check if artist and album names are similar
+                if (artist.lower() in result_artist or result_artist in artist.lower()) and \
+                   (album.lower() in result_album or result_album in album.lower()):
+                    artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600')
+                    return artwork_url
+            
+            # If no exact match, return the first result
+            first_result = data['results'][0]
+            artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600')
+            return artwork_url
+            
+    except Exception as e:
+        print(f"Error searching for {artist} - {album}: {e}")
+        return None
+    
+    return None
+
+def download_album_covers():
+    """Main function to download all album covers"""
+    
+    # Create covers directory
+    covers_dir = 'covers'
+    if not os.path.exists(covers_dir):
+        os.makedirs(covers_dir)
+    
+    # Read the CSV file
+    csv_file = 'top_500_albums_2023.csv'
+    if not os.path.exists(csv_file):
+        print(f"Error: {csv_file} not found!")
+        return
+    
+    albums_processed = 0
+    albums_found = 0
+    albums_downloaded = 0
+    
+    # Keep track of what we've processed
+    log_file = 'download_log.json'
+    processed_albums = {}
+    
+    # Load existing log if it exists
+    if os.path.exists(log_file):
+        with open(log_file, 'r') as f:
+            processed_albums = json.load(f)
+    
+    with open(csv_file, 'r', encoding='utf-8') as file:
+        csv_reader = csv.DictReader(file)
+        
+        for row in csv_reader:
+            rank = row.get('Rank', '').strip()
+            artist = row.get('Artist', '').strip()
+            album = row.get('Album', '').strip()
+            
+            if not artist or not album:
+                continue
+            
+            albums_processed += 1
+            
+            # Create filename
+            safe_artist = sanitize_filename(artist)
+            safe_album = sanitize_filename(album)
+            filename = f"rank_{rank:03d}_{safe_artist}_{safe_album}.jpg"
+            filepath = os.path.join(covers_dir, filename)
+            
+            # Skip if already downloaded
+            if os.path.exists(filepath):
+                print(f"✓ Already exists: {rank}. {artist} - {album}")
+                albums_downloaded += 1
+                continue
+            
+            # Skip if we've already tried and failed
+            album_key = f"{artist}_{album}"
+            if album_key in processed_albums and not processed_albums[album_key]:
+                print(f"⚠ Previously failed: {rank}. {artist} - {album}")
+                continue
+            
+            print(f"Searching: {rank}. {artist} - {album}")
+            
+            # Search for artwork
+            artwork_url = search_itunes(artist, album)
+            
+            if artwork_url:
+                try:
+                    print(f"  Downloading from: {artwork_url}")
+                    urlretrieve(artwork_url, filepath)
+                    print(f"  ✓ Downloaded: {filename}")
+                    albums_found += 1
+                    albums_downloaded += 1
+                    processed_albums[album_key] = True
+                    
+                except Exception as e:
+                    print(f"  ✗ Download failed: {e}")
+                    processed_albums[album_key] = False
+            else:
+                print(f"  ✗ No artwork found")
+                processed_albums[album_key] = False
+            
+            # Save progress
+            with open(log_file, 'w') as f:
+                json.dump(processed_albums, f, indent=2)
+            
+            # Be nice to the API
+            time.sleep(0.5)
+            
+            # Progress update
+            if albums_processed % 25 == 0:
+                print(f"\nProgress: {albums_processed}/500 processed, {albums_found} found, {albums_downloaded} downloaded\n")
+    
+    print(f"\nFinal Results:")
+    print(f"Albums processed: {albums_processed}")
+    print(f"Artwork found: {albums_found}")
+    print(f"Total downloaded: {albums_downloaded}")
+    print(f"Success rate: {albums_found/albums_processed*100:.1f}%")
+
+def create_missing_report():
+    """Create a report of albums without covers"""
+    covers_dir = 'covers'
+    missing_albums = []
+    
+    with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
+        csv_reader = csv.DictReader(file)
+        
+        for row in csv_reader:
+            rank = row.get('Rank', '').strip()
+            artist = row.get('Artist', '').strip()
+            album = row.get('Album', '').strip()
+            
+            safe_artist = sanitize_filename(artist)
+            safe_album = sanitize_filename(album)
+            filename = f"rank_{rank:03d}_{safe_artist}_{safe_album}.jpg"
+            filepath = os.path.join(covers_dir, filename)
+            
+            if not os.path.exists(filepath):
+                missing_albums.append({
+                    'rank': rank,
+                    'artist': artist,
+                    'album': album,
+                    'filename': filename
+                })
+    
+    print(f"\nMissing covers: {len(missing_albums)}")
+    
+    if missing_albums:
+        with open('missing_covers.csv', 'w', newline='', encoding='utf-8') as file:
+            writer = csv.DictWriter(file, fieldnames=['rank', 'artist', 'album', 'filename'])
+            writer.writeheader()
+            writer.writerows(missing_albums)
+        print("Created missing_covers.csv with list of albums without artwork")
+
+if __name__ == "__main__":
+    print("Top 500 Albums Cover Art Downloader")
+    print("===================================")
+    
+    choice = input("Choose option:\n1. Download covers\n2. Create missing report\n3. Both\nEnter choice (1-3): ")
+    
+    if choice in ['1', '3']:
+        download_album_covers()
+    
+    if choice in ['2', '3']:
+        create_missing_report()
+    
+    print("\nDone!")