Organize repository by moving all scripts to scripts/ folder
- Moved all Python processing scripts to scripts/ directory for better organization - Preserves git history using git mv command - Clean separation between main project files and utility scripts 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
462fdcfa84
commit
872fdfa0ee
15 changed files with 0 additions and 0 deletions
209
scripts/download_album_covers.py
Normal file
209
scripts/download_album_covers.py
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Download album cover images for Top 500 Albums using iTunes Search API
|
||||
"""
|
||||
|
||||
import requests
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import json
|
||||
from urllib.parse import quote
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
def sanitize_filename(text):
|
||||
"""Remove or replace characters that aren't valid in filenames"""
|
||||
# Remove/replace problematic characters
|
||||
text = re.sub(r'[<>:"/\\|?*]', '', text)
|
||||
text = re.sub(r'[^\w\s\-_\.]', '', text)
|
||||
text = re.sub(r'\s+', '_', text.strip())
|
||||
return text[:100] # Limit length
|
||||
|
||||
def search_itunes(artist, album):
|
||||
"""Search iTunes API for album artwork"""
|
||||
# Clean up search terms
|
||||
search_term = f"{artist} {album}".strip()
|
||||
search_term = re.sub(r'\([^)]*\)', '', search_term) # Remove parentheses content
|
||||
search_term = re.sub(r'\s+', ' ', search_term).strip()
|
||||
|
||||
url = "https://itunes.apple.com/search"
|
||||
params = {
|
||||
'term': search_term,
|
||||
'media': 'music',
|
||||
'entity': 'album',
|
||||
'limit': 5
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(url, params=params, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if data['resultCount'] > 0:
|
||||
# Try to find the best match
|
||||
for result in data['results']:
|
||||
result_artist = result.get('artistName', '').lower()
|
||||
result_album = result.get('collectionName', '').lower()
|
||||
|
||||
# Simple matching - check if artist and album names are similar
|
||||
if (artist.lower() in result_artist or result_artist in artist.lower()) and \
|
||||
(album.lower() in result_album or result_album in album.lower()):
|
||||
artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600')
|
||||
return artwork_url
|
||||
|
||||
# If no exact match, return the first result
|
||||
first_result = data['results'][0]
|
||||
artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600')
|
||||
return artwork_url
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error searching for {artist} - {album}: {e}")
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def download_album_covers():
|
||||
"""Main function to download all album covers"""
|
||||
|
||||
# Create covers directory
|
||||
covers_dir = 'covers'
|
||||
if not os.path.exists(covers_dir):
|
||||
os.makedirs(covers_dir)
|
||||
|
||||
# Read the CSV file
|
||||
csv_file = 'top_500_albums_2023.csv'
|
||||
if not os.path.exists(csv_file):
|
||||
print(f"Error: {csv_file} not found!")
|
||||
return
|
||||
|
||||
albums_processed = 0
|
||||
albums_found = 0
|
||||
albums_downloaded = 0
|
||||
|
||||
# Keep track of what we've processed
|
||||
log_file = 'download_log.json'
|
||||
processed_albums = {}
|
||||
|
||||
# Load existing log if it exists
|
||||
if os.path.exists(log_file):
|
||||
with open(log_file, 'r') as f:
|
||||
processed_albums = json.load(f)
|
||||
|
||||
with open(csv_file, 'r', encoding='utf-8') as file:
|
||||
csv_reader = csv.DictReader(file)
|
||||
|
||||
for row in csv_reader:
|
||||
rank = row.get('Rank', '').strip()
|
||||
artist = row.get('Artist', '').strip()
|
||||
album = row.get('Album', '').strip()
|
||||
|
||||
if not artist or not album:
|
||||
continue
|
||||
|
||||
albums_processed += 1
|
||||
|
||||
# Create filename
|
||||
safe_artist = sanitize_filename(artist)
|
||||
safe_album = sanitize_filename(album)
|
||||
filename = f"rank_{rank:03d}_{safe_artist}_{safe_album}.jpg"
|
||||
filepath = os.path.join(covers_dir, filename)
|
||||
|
||||
# Skip if already downloaded
|
||||
if os.path.exists(filepath):
|
||||
print(f"✓ Already exists: {rank}. {artist} - {album}")
|
||||
albums_downloaded += 1
|
||||
continue
|
||||
|
||||
# Skip if we've already tried and failed
|
||||
album_key = f"{artist}_{album}"
|
||||
if album_key in processed_albums and not processed_albums[album_key]:
|
||||
print(f"⚠ Previously failed: {rank}. {artist} - {album}")
|
||||
continue
|
||||
|
||||
print(f"Searching: {rank}. {artist} - {album}")
|
||||
|
||||
# Search for artwork
|
||||
artwork_url = search_itunes(artist, album)
|
||||
|
||||
if artwork_url:
|
||||
try:
|
||||
print(f" Downloading from: {artwork_url}")
|
||||
urlretrieve(artwork_url, filepath)
|
||||
print(f" ✓ Downloaded: {filename}")
|
||||
albums_found += 1
|
||||
albums_downloaded += 1
|
||||
processed_albums[album_key] = True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Download failed: {e}")
|
||||
processed_albums[album_key] = False
|
||||
else:
|
||||
print(f" ✗ No artwork found")
|
||||
processed_albums[album_key] = False
|
||||
|
||||
# Save progress
|
||||
with open(log_file, 'w') as f:
|
||||
json.dump(processed_albums, f, indent=2)
|
||||
|
||||
# Be nice to the API
|
||||
time.sleep(0.5)
|
||||
|
||||
# Progress update
|
||||
if albums_processed % 25 == 0:
|
||||
print(f"\nProgress: {albums_processed}/500 processed, {albums_found} found, {albums_downloaded} downloaded\n")
|
||||
|
||||
print(f"\nFinal Results:")
|
||||
print(f"Albums processed: {albums_processed}")
|
||||
print(f"Artwork found: {albums_found}")
|
||||
print(f"Total downloaded: {albums_downloaded}")
|
||||
print(f"Success rate: {albums_found/albums_processed*100:.1f}%")
|
||||
|
||||
def create_missing_report():
|
||||
"""Create a report of albums without covers"""
|
||||
covers_dir = 'covers'
|
||||
missing_albums = []
|
||||
|
||||
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
||||
csv_reader = csv.DictReader(file)
|
||||
|
||||
for row in csv_reader:
|
||||
rank = row.get('Rank', '').strip()
|
||||
artist = row.get('Artist', '').strip()
|
||||
album = row.get('Album', '').strip()
|
||||
|
||||
safe_artist = sanitize_filename(artist)
|
||||
safe_album = sanitize_filename(album)
|
||||
filename = f"rank_{rank:03d}_{safe_artist}_{safe_album}.jpg"
|
||||
filepath = os.path.join(covers_dir, filename)
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
missing_albums.append({
|
||||
'rank': rank,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'filename': filename
|
||||
})
|
||||
|
||||
print(f"\nMissing covers: {len(missing_albums)}")
|
||||
|
||||
if missing_albums:
|
||||
with open('missing_covers.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
writer = csv.DictWriter(file, fieldnames=['rank', 'artist', 'album', 'filename'])
|
||||
writer.writeheader()
|
||||
writer.writerows(missing_albums)
|
||||
print("Created missing_covers.csv with list of albums without artwork")
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Top 500 Albums Cover Art Downloader")
|
||||
print("===================================")
|
||||
|
||||
choice = input("Choose option:\n1. Download covers\n2. Create missing report\n3. Both\nEnter choice (1-3): ")
|
||||
|
||||
if choice in ['1', '3']:
|
||||
download_album_covers()
|
||||
|
||||
if choice in ['2', '3']:
|
||||
create_missing_report()
|
||||
|
||||
print("\nDone!")
|
||||
Loading…
Add table
Add a link
Reference in a new issue