- Moved all Python processing scripts to scripts/ directory for better organization - Preserves git history using git mv command - Clean separation between main project files and utility scripts 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
221 lines
No EOL
7.6 KiB
Python
221 lines
No EOL
7.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Download album cover images for Top 500 Albums using iTunes Search API
|
|
Simple version using only built-in Python modules
|
|
"""
|
|
|
|
import urllib.request
|
|
import urllib.parse
|
|
import json
|
|
import csv
|
|
import os
|
|
import re
|
|
import time
|
|
|
|
def sanitize_filename(text):
|
|
"""Remove or replace characters that aren't valid in filenames"""
|
|
# Remove/replace problematic characters
|
|
text = re.sub(r'[<>:"/\\|?*]', '', text)
|
|
text = re.sub(r'[^\w\s\-_\.]', '', text)
|
|
text = re.sub(r'\s+', '_', text.strip())
|
|
return text[:100] # Limit length
|
|
|
|
def search_itunes(artist, album):
|
|
"""Search iTunes API for album artwork"""
|
|
# Clean up search terms
|
|
search_term = f"{artist} {album}".strip()
|
|
search_term = re.sub(r'\([^)]*\)', '', search_term) # Remove parentheses content
|
|
search_term = re.sub(r'\s+', ' ', search_term).strip()
|
|
|
|
# URL encode the search term
|
|
encoded_term = urllib.parse.quote(search_term)
|
|
url = f"https://itunes.apple.com/search?term={encoded_term}&media=music&entity=album&limit=5"
|
|
|
|
try:
|
|
with urllib.request.urlopen(url, timeout=10) as response:
|
|
data = json.loads(response.read().decode())
|
|
|
|
if data['resultCount'] > 0:
|
|
# Try to find the best match
|
|
for result in data['results']:
|
|
result_artist = result.get('artistName', '').lower()
|
|
result_album = result.get('collectionName', '').lower()
|
|
|
|
# Simple matching - check if artist and album names are similar
|
|
if (artist.lower() in result_artist or result_artist in artist.lower()) and \
|
|
(album.lower() in result_album or result_album in album.lower()):
|
|
artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600')
|
|
return artwork_url
|
|
|
|
# If no exact match, return the first result
|
|
first_result = data['results'][0]
|
|
artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600')
|
|
return artwork_url
|
|
|
|
except Exception as e:
|
|
print(f"Error searching for {artist} - {album}: {e}")
|
|
return None
|
|
|
|
return None
|
|
|
|
def download_sample_covers(limit=10):
|
|
"""Download a sample of album covers to test the system"""
|
|
|
|
# Create covers directory
|
|
covers_dir = 'covers'
|
|
if not os.path.exists(covers_dir):
|
|
os.makedirs(covers_dir)
|
|
|
|
# Read the CSV file
|
|
csv_file = 'top_500_albums_2023.csv'
|
|
if not os.path.exists(csv_file):
|
|
print(f"Error: {csv_file} not found!")
|
|
return
|
|
|
|
albums_processed = 0
|
|
albums_found = 0
|
|
|
|
print(f"Downloading sample of {limit} album covers...")
|
|
|
|
with open(csv_file, 'r', encoding='utf-8') as file:
|
|
csv_reader = csv.DictReader(file)
|
|
|
|
for row in csv_reader:
|
|
if albums_processed >= limit:
|
|
break
|
|
|
|
rank = row.get('Rank', '').strip()
|
|
artist = row.get('Artist', '').strip()
|
|
album = row.get('Album', '').strip()
|
|
|
|
if not artist or not album:
|
|
continue
|
|
|
|
albums_processed += 1
|
|
|
|
# Create filename
|
|
safe_artist = sanitize_filename(artist)
|
|
safe_album = sanitize_filename(album)
|
|
filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg"
|
|
filepath = os.path.join(covers_dir, filename)
|
|
|
|
# Skip if already downloaded
|
|
if os.path.exists(filepath):
|
|
print(f"✓ Already exists: {rank}. {artist} - {album}")
|
|
albums_found += 1
|
|
continue
|
|
|
|
print(f"Searching: {rank}. {artist} - {album}")
|
|
|
|
# Search for artwork
|
|
artwork_url = search_itunes(artist, album)
|
|
|
|
if artwork_url:
|
|
try:
|
|
print(f" Downloading from: {artwork_url}")
|
|
urllib.request.urlretrieve(artwork_url, filepath)
|
|
print(f" ✓ Downloaded: {filename}")
|
|
albums_found += 1
|
|
|
|
except Exception as e:
|
|
print(f" ✗ Download failed: {e}")
|
|
else:
|
|
print(f" ✗ No artwork found")
|
|
|
|
# Be nice to the API
|
|
time.sleep(1)
|
|
|
|
print(f"\nSample Results:")
|
|
print(f"Albums processed: {albums_processed}")
|
|
print(f"Artwork found: {albums_found}")
|
|
print(f"Success rate: {albums_found/albums_processed*100:.1f}%")
|
|
|
|
def download_top_albums(limit=50):
|
|
"""Download covers for top N albums"""
|
|
|
|
# Create covers directory
|
|
covers_dir = 'covers'
|
|
if not os.path.exists(covers_dir):
|
|
os.makedirs(covers_dir)
|
|
|
|
# Read the CSV file
|
|
csv_file = 'top_500_albums_2023.csv'
|
|
if not os.path.exists(csv_file):
|
|
print(f"Error: {csv_file} not found!")
|
|
return
|
|
|
|
albums_processed = 0
|
|
albums_found = 0
|
|
|
|
print(f"Downloading covers for top {limit} albums...")
|
|
|
|
with open(csv_file, 'r', encoding='utf-8') as file:
|
|
csv_reader = csv.DictReader(file)
|
|
|
|
# Sort by rank to get top albums first
|
|
rows = list(csv_reader)
|
|
rows.sort(key=lambda x: int(x.get('Rank', 999)))
|
|
|
|
for row in rows[:limit]:
|
|
rank = row.get('Rank', '').strip()
|
|
artist = row.get('Artist', '').strip()
|
|
album = row.get('Album', '').strip()
|
|
|
|
if not artist or not album:
|
|
continue
|
|
|
|
albums_processed += 1
|
|
|
|
# Create filename
|
|
safe_artist = sanitize_filename(artist)
|
|
safe_album = sanitize_filename(album)
|
|
filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg"
|
|
filepath = os.path.join(covers_dir, filename)
|
|
|
|
# Skip if already downloaded
|
|
if os.path.exists(filepath):
|
|
print(f"✓ Already exists: {rank}. {artist} - {album}")
|
|
albums_found += 1
|
|
continue
|
|
|
|
print(f"Searching: {rank}. {artist} - {album}")
|
|
|
|
# Search for artwork
|
|
artwork_url = search_itunes(artist, album)
|
|
|
|
if artwork_url:
|
|
try:
|
|
print(f" Downloading from: {artwork_url}")
|
|
urllib.request.urlretrieve(artwork_url, filepath)
|
|
print(f" ✓ Downloaded: {filename}")
|
|
albums_found += 1
|
|
|
|
except Exception as e:
|
|
print(f" ✗ Download failed: {e}")
|
|
else:
|
|
print(f" ✗ No artwork found")
|
|
|
|
# Be nice to the API
|
|
time.sleep(1)
|
|
|
|
print(f"\nTop {limit} Results:")
|
|
print(f"Albums processed: {albums_processed}")
|
|
print(f"Artwork found: {albums_found}")
|
|
print(f"Success rate: {albums_found/albums_processed*100:.1f}%")
|
|
|
|
if __name__ == "__main__":
|
|
print("Top 500 Albums Cover Art Downloader (Simple Version)")
|
|
print("==================================================")
|
|
|
|
choice = input("Choose option:\n1. Download sample (10 albums)\n2. Download top 50 albums\n3. Download top 100 albums\nEnter choice (1-3): ")
|
|
|
|
if choice == '1':
|
|
download_sample_covers(10)
|
|
elif choice == '2':
|
|
download_top_albums(50)
|
|
elif choice == '3':
|
|
download_top_albums(100)
|
|
else:
|
|
print("Invalid choice!")
|
|
|
|
print("\nDone!") |