top500albums/scripts/fix_white_album.py
Johan Lundberg a2713e9fb1 Fix dropped albums list - remove duplicates and correct data
- Removed 15 albums incorrectly marked as dropped that are still in main list
- Fixed White Album duplication (was listed at both rank 29 and 502)
- Beatles albums Sgt. Pepper's (#24) and White Album (#29) are NOT dropped
- Final dropped albums: 7 genuinely removed albums (ranks 501-507)
- Updated JavaScript validation for correct range (1-507)
- Removed unnecessary cover art files for incorrectly marked albums

Correctly identifies only truly dropped albums from 2020→2023

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 01:03:11 +02:00

62 lines
No EOL
2.3 KiB
Python

#!/usr/bin/env python3
"""
Fix the White Album duplication - it's listed both in main list and dropped.
"""
import csv
def main():
# Read current CSV
albums = []
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
albums.append(row)
print("🔍 Looking for White Album duplication...")
# Find White Album entries
white_album_entries = []
for album in albums:
if album['Artist'] == 'The Beatles' and ('White Album' in album['Album'] or album['Album'] == 'The Beatles'):
white_album_entries.append(album)
print(f" Found: Rank {album['Rank']} - {album['Artist']} - {album['Album']} ({album['Status']})")
if len(white_album_entries) > 1:
print(f"\n❌ Found {len(white_album_entries)} White Album entries - removing duplicate")
# Remove the dropped version (should be rank 502)
fixed_albums = []
removed_count = 0
for album in albums:
# Remove the dropped White Album entry
if (album['Artist'] == 'The Beatles' and
'White Album' in album['Album'] and
album['Status'].startswith('Dropped')):
print(f" 🗑️ Removed duplicate: Rank {album['Rank']} - {album['Album']}")
removed_count += 1
else:
fixed_albums.append(album)
# Renumber the remaining albums after rank 500
current_rank = 501
for album in fixed_albums:
if int(album['Rank']) > 500:
album['Rank'] = str(current_rank)
current_rank += 1
# Write corrected CSV
with open('top_500_albums_2023.csv', 'w', newline='', encoding='utf-8') as file:
fieldnames = ['Rank', 'Artist', 'Album', 'Status', 'Info', 'Description']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(fixed_albums)
print(f"\n✅ Fixed! Removed {removed_count} duplicate entry")
print(f"📊 Total albums now: {len(fixed_albums)}")
else:
print("✅ No duplicates found")
if __name__ == "__main__":
main()