top500albums/scripts/fix_final_dropped_list.py
Johan Lundberg e64b267ee3 Update dropped albums list with corrected artist/album names
- Re-ran dropped album matching with corrected names from user
- Fixed final list to have exactly 89 correctly identified dropped albums
- Removed any albums incorrectly marked as dropped that are in main list
- Verified no duplicates between main list (1-500) and dropped list (501-589)
- Total: 589 albums (500 current + 89 dropped)

All dropped albums now correctly match albums missing from 2020→2023

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 02:37:36 +02:00

87 lines
No EOL
2.9 KiB
Python

#!/usr/bin/env python3
"""
Fix the final dropped list with the corrected album names.
Replace current dropped albums with the correct ones.
"""
import csv
def normalize_text(text):
"""Normalize text for comparison"""
return text.lower().strip().replace('&', 'and').replace(' ', ' ')
def main():
# Read the correct dropped albums list
correct_dropped = []
with open('correct_dropped_albums.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
correct_dropped.append({
'rank_2020': row['Original_Rank_2020'],
'artist': row['Artist'],
'album': row['Album']
})
print(f"📊 Loading {len(correct_dropped)} correct dropped albums")
# Get info/descriptions from 2020 data for dropped albums
info_desc_2020 = {}
with open('rolling_stone_top_500_albums_2020.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
key = (row['Artist'], row['Album'])
info_desc_2020[key] = {
'info': row.get('Info', ''),
'description': row.get('Description', '')
}
# Read current CSV and remove all dropped albums
albums = []
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
if 'Dropped' not in row['Status']:
albums.append(row)
print(f"📊 Kept {len(albums)} non-dropped albums")
# Add the correct dropped albums
next_rank = 501
added_count = 0
for album in correct_dropped:
# Find info/description for this album
key = (album['artist'], album['album'])
info_data = info_desc_2020.get(key, {'info': '', 'description': ''})
albums.append({
'Rank': str(next_rank),
'Artist': album['artist'],
'Album': album['album'],
'Status': f"Dropped (was #{album['rank_2020']} in 2020)",
'Info': info_data['info'],
'Description': info_data['description']
})
next_rank += 1
added_count += 1
if added_count <= 10:
print(f"✓ Added: #{album['rank_2020']} - {album['artist']} - {album['album']}")
if added_count > 10:
print(f"... and {added_count - 10} more")
# Write updated CSV
with open('top_500_albums_2023.csv', 'w', newline='', encoding='utf-8') as file:
fieldnames = ['Rank', 'Artist', 'Album', 'Status', 'Info', 'Description']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(albums)
print(f"\n✅ Updated dropped albums list")
print(f"📊 Total albums now: {len(albums)}")
print(f"📊 Main list (1-500): 500")
print(f"📊 Dropped (501-589): {added_count}")
if __name__ == "__main__":
main()