Complete Top 500 Albums project with 100% data coverage and UI improvements

- Fixed Info/Description columns after regenerating CSV with clean Wikipedia data
- Remapped and downloaded missing album covers to match new rankings
- Modified website UI to show all description text without click-to-expand
- Added comprehensive Info/Description for all 500 albums using research
- Created multiple data processing scripts for album information completion
- Achieved 100% data completion with descriptions ending "(by Claude)" for new content
- All albums now have complete metadata and cover art

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Johan Lundberg 2025-07-01 00:33:47 +02:00
parent 09b5491f8a
commit 462fdcfa84
500 changed files with 2323 additions and 502 deletions

209
download_album_covers.py Normal file
View file

@ -0,0 +1,209 @@
#!/usr/bin/env python3
"""
Download album cover images for Top 500 Albums using iTunes Search API
"""
import requests
import csv
import os
import re
import time
import json
from urllib.parse import quote
from urllib.request import urlretrieve
def sanitize_filename(text):
"""Remove or replace characters that aren't valid in filenames"""
# Remove/replace problematic characters
text = re.sub(r'[<>:"/\\|?*]', '', text)
text = re.sub(r'[^\w\s\-_\.]', '', text)
text = re.sub(r'\s+', '_', text.strip())
return text[:100] # Limit length
def search_itunes(artist, album):
"""Search iTunes API for album artwork"""
# Clean up search terms
search_term = f"{artist} {album}".strip()
search_term = re.sub(r'\([^)]*\)', '', search_term) # Remove parentheses content
search_term = re.sub(r'\s+', ' ', search_term).strip()
url = "https://itunes.apple.com/search"
params = {
'term': search_term,
'media': 'music',
'entity': 'album',
'limit': 5
}
try:
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
if data['resultCount'] > 0:
# Try to find the best match
for result in data['results']:
result_artist = result.get('artistName', '').lower()
result_album = result.get('collectionName', '').lower()
# Simple matching - check if artist and album names are similar
if (artist.lower() in result_artist or result_artist in artist.lower()) and \
(album.lower() in result_album or result_album in album.lower()):
artwork_url = result.get('artworkUrl100', '').replace('100x100', '600x600')
return artwork_url
# If no exact match, return the first result
first_result = data['results'][0]
artwork_url = first_result.get('artworkUrl100', '').replace('100x100', '600x600')
return artwork_url
except Exception as e:
print(f"Error searching for {artist} - {album}: {e}")
return None
return None
def download_album_covers():
"""Main function to download all album covers"""
# Create covers directory
covers_dir = 'covers'
if not os.path.exists(covers_dir):
os.makedirs(covers_dir)
# Read the CSV file
csv_file = 'top_500_albums_2023.csv'
if not os.path.exists(csv_file):
print(f"Error: {csv_file} not found!")
return
albums_processed = 0
albums_found = 0
albums_downloaded = 0
# Keep track of what we've processed
log_file = 'download_log.json'
processed_albums = {}
# Load existing log if it exists
if os.path.exists(log_file):
with open(log_file, 'r') as f:
processed_albums = json.load(f)
with open(csv_file, 'r', encoding='utf-8') as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
rank = row.get('Rank', '').strip()
artist = row.get('Artist', '').strip()
album = row.get('Album', '').strip()
if not artist or not album:
continue
albums_processed += 1
# Create filename
safe_artist = sanitize_filename(artist)
safe_album = sanitize_filename(album)
filename = f"rank_{rank:03d}_{safe_artist}_{safe_album}.jpg"
filepath = os.path.join(covers_dir, filename)
# Skip if already downloaded
if os.path.exists(filepath):
print(f"✓ Already exists: {rank}. {artist} - {album}")
albums_downloaded += 1
continue
# Skip if we've already tried and failed
album_key = f"{artist}_{album}"
if album_key in processed_albums and not processed_albums[album_key]:
print(f"⚠ Previously failed: {rank}. {artist} - {album}")
continue
print(f"Searching: {rank}. {artist} - {album}")
# Search for artwork
artwork_url = search_itunes(artist, album)
if artwork_url:
try:
print(f" Downloading from: {artwork_url}")
urlretrieve(artwork_url, filepath)
print(f" ✓ Downloaded: {filename}")
albums_found += 1
albums_downloaded += 1
processed_albums[album_key] = True
except Exception as e:
print(f" ✗ Download failed: {e}")
processed_albums[album_key] = False
else:
print(f" ✗ No artwork found")
processed_albums[album_key] = False
# Save progress
with open(log_file, 'w') as f:
json.dump(processed_albums, f, indent=2)
# Be nice to the API
time.sleep(0.5)
# Progress update
if albums_processed % 25 == 0:
print(f"\nProgress: {albums_processed}/500 processed, {albums_found} found, {albums_downloaded} downloaded\n")
print(f"\nFinal Results:")
print(f"Albums processed: {albums_processed}")
print(f"Artwork found: {albums_found}")
print(f"Total downloaded: {albums_downloaded}")
print(f"Success rate: {albums_found/albums_processed*100:.1f}%")
def create_missing_report():
"""Create a report of albums without covers"""
covers_dir = 'covers'
missing_albums = []
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
rank = row.get('Rank', '').strip()
artist = row.get('Artist', '').strip()
album = row.get('Album', '').strip()
safe_artist = sanitize_filename(artist)
safe_album = sanitize_filename(album)
filename = f"rank_{rank:03d}_{safe_artist}_{safe_album}.jpg"
filepath = os.path.join(covers_dir, filename)
if not os.path.exists(filepath):
missing_albums.append({
'rank': rank,
'artist': artist,
'album': album,
'filename': filename
})
print(f"\nMissing covers: {len(missing_albums)}")
if missing_albums:
with open('missing_covers.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.DictWriter(file, fieldnames=['rank', 'artist', 'album', 'filename'])
writer.writeheader()
writer.writerows(missing_albums)
print("Created missing_covers.csv with list of albums without artwork")
if __name__ == "__main__":
print("Top 500 Albums Cover Art Downloader")
print("===================================")
choice = input("Choose option:\n1. Download covers\n2. Create missing report\n3. Both\nEnter choice (1-3): ")
if choice in ['1', '3']:
download_album_covers()
if choice in ['2', '3']:
create_missing_report()
print("\nDone!")