Complete Top 500 Albums project with 100% data coverage and UI improvements

- Fixed Info/Description columns after regenerating CSV with clean Wikipedia data
- Remapped and downloaded missing album covers to match new rankings
- Modified website UI to show all description text without click-to-expand
- Added comprehensive Info/Description for all 500 albums using research
- Created multiple data processing scripts for album information completion
- Achieved 100% data completion with descriptions ending "(by Claude)" for new content
- All albums now have complete metadata and cover art

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Johan Lundberg 2025-07-01 00:33:47 +02:00
parent 09b5491f8a
commit 462fdcfa84
500 changed files with 2323 additions and 502 deletions

176
remap_covers.py Normal file
View file

@ -0,0 +1,176 @@
#!/usr/bin/env python3
"""
Script to remap existing cover art files to match the new CSV ranking structure.
This avoids having to re-download all the covers.
"""
import csv
import os
import re
import shutil
from pathlib import Path
def sanitize_filename(text):
"""Remove or replace characters that aren't valid in filenames"""
# Remove/replace problematic characters
text = re.sub(r'[<>:"/\\|?*]', '', text)
text = re.sub(r'[^\w\s\-_\.]', '', text)
text = re.sub(r'\s+', '_', text.strip())
return text[:100] # Limit length
def normalize_for_matching(text):
"""Normalize text for matching album/artist names"""
text = text.lower().strip()
# Remove common punctuation and normalize
text = re.sub(r'[^\w\s&]', '', text)
text = re.sub(r'\s+', ' ', text)
# Handle common variations
text = text.replace(' and ', ' ').replace(' & ', ' ')
# Remove "the" from start
if text.startswith('the '):
text = text[4:]
return text
def find_matching_cover(artist, album, existing_covers):
"""Find existing cover file that matches this artist/album"""
target_artist = normalize_for_matching(artist)
target_album = normalize_for_matching(album)
for cover_file in existing_covers:
# Extract artist and album from filename
# Format: rank_XXX_Artist_Album.jpg
parts = cover_file.replace('.jpg', '').split('_')
if len(parts) < 4:
continue
# Skip rank part, reconstruct artist and album
file_parts = parts[2:] # Skip "rank" and "XXX"
# Find where artist ends and album begins (tricky!)
# We'll try different splits and see which gives best match
best_match_score = 0
best_file = None
for split_point in range(1, len(file_parts)):
file_artist = '_'.join(file_parts[:split_point])
file_album = '_'.join(file_parts[split_point:])
norm_file_artist = normalize_for_matching(file_artist.replace('_', ' '))
norm_file_album = normalize_for_matching(file_album.replace('_', ' '))
# Calculate match score
artist_match = target_artist in norm_file_artist or norm_file_artist in target_artist
album_match = target_album in norm_file_album or norm_file_album in target_album
if artist_match and album_match:
# Calculate more precise score
score = len(set(target_artist.split()) & set(norm_file_artist.split())) + \
len(set(target_album.split()) & set(norm_file_album.split()))
if score > best_match_score:
best_match_score = score
best_file = cover_file
# If we found a good match, return it
if best_match_score >= 2: # At least 2 word matches
return best_file
return None
def main():
covers_dir = Path('covers')
backup_dir = Path('covers_backup')
if not covers_dir.exists():
print("No covers directory found!")
return
# Create backup directory
if backup_dir.exists():
print("Backup directory already exists. Removing it...")
shutil.rmtree(backup_dir)
print("Creating backup of existing covers...")
shutil.copytree(covers_dir, backup_dir)
print(f"Backup created at {backup_dir}")
# Get list of existing cover files
existing_covers = [f for f in os.listdir(covers_dir) if f.endswith('.jpg')]
print(f"Found {len(existing_covers)} existing cover files")
# Load current CSV
csv_file = 'top_500_albums_2023.csv'
if not os.path.exists(csv_file):
print(f"Error: {csv_file} not found!")
return
new_covers_dir = Path('covers_new')
if new_covers_dir.exists():
shutil.rmtree(new_covers_dir)
new_covers_dir.mkdir()
mapped_count = 0
unmapped_count = 0
unmapped_albums = []
print("\nMapping covers to new rankings...")
with open(csv_file, 'r', encoding='utf-8') as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
rank = row.get('Rank', '').strip()
artist = row.get('Artist', '').strip()
album = row.get('Album', '').strip()
if not artist or not album:
continue
# Find matching existing cover
matching_cover = find_matching_cover(artist, album, existing_covers)
if matching_cover:
# Create new filename with correct ranking
safe_artist = sanitize_filename(artist)
safe_album = sanitize_filename(album)
new_filename = f"rank_{rank.zfill(3)}_{safe_artist}_{safe_album}.jpg"
# Copy file with new name
old_path = covers_dir / matching_cover
new_path = new_covers_dir / new_filename
shutil.copy2(old_path, new_path)
mapped_count += 1
if mapped_count % 50 == 0:
print(f"✓ Mapped {mapped_count} covers so far...")
else:
unmapped_count += 1
unmapped_albums.append(f"{rank}. {artist} - {album}")
print(f"✗ No cover found for: {rank}. {artist} - {album}")
print(f"\n🎉 MAPPING RESULTS:")
print(f"Successfully mapped: {mapped_count}")
print(f"Could not map: {unmapped_count}")
print(f"Total albums: {mapped_count + unmapped_count}")
print(f"Success rate: {mapped_count/(mapped_count + unmapped_count)*100:.1f}%")
if unmapped_albums:
print(f"\n❌ Albums without covers ({len(unmapped_albums)}):")
for album in unmapped_albums[:10]:
print(f" {album}")
if len(unmapped_albums) > 10:
print(f" ... and {len(unmapped_albums) - 10} more")
# Replace old covers directory with new one
print(f"\nReplacing covers directory...")
shutil.rmtree(covers_dir)
shutil.move(new_covers_dir, covers_dir)
print(f"✅ Cover remapping complete!")
print(f"Original covers backed up to: {backup_dir}")
print(f"New covers available in: {covers_dir}")
if __name__ == "__main__":
main()