top500albums/scripts/find_all_dropped.py
Johan Lundberg c3a24799c8 Complete dropped albums list with all 89 truly dropped albums from 2020
- Added all 89 albums that were genuinely dropped from 2020 to 2023
- Fixed incorrect status markings (many albums marked "New in 2023" were not new)
- Removed duplicates and albums incorrectly marked as dropped
- Final count: 589 total (500 main list + 89 dropped)
- Updated JavaScript validation for extended range
- Created comprehensive analysis scripts to verify data

Math now adds up correctly: 89 albums dropped to make room for new additions

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 01:14:06 +02:00

79 lines
No EOL
2.7 KiB
Python

#!/usr/bin/env python3
"""
Find ALL albums that were dropped from 2020 to 2023 by comparing the lists.
"""
import csv
def normalize_text(text):
"""Normalize text for comparison"""
return text.lower().strip().replace('&', 'and').replace(' ', ' ')
def main():
# Read 2020 albums (simplified)
albums_2020 = {}
with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
albums_2020[key] = {
'rank': row['Rank'],
'artist': row['Artist'],
'album': row['Album']
}
print(f"📊 Loaded {len(albums_2020)} albums from 2020 list")
# Read 2023 albums (only first 500)
albums_2023 = set()
new_count = 0
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
rank = int(row['Rank'])
if rank <= 500:
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
albums_2023.add(key)
if row['Status'] == 'New in 2023':
new_count += 1
print(f"📊 Loaded {len(albums_2023)} albums from 2023 list")
print(f"🆕 Found {new_count} albums marked as 'New in 2023'")
# Find dropped albums
dropped_albums = []
for key, album_info in albums_2020.items():
if key not in albums_2023:
dropped_albums.append(album_info)
# Sort by original 2020 rank
dropped_albums.sort(key=lambda x: int(x['rank']))
print(f"\n❌ Found {len(dropped_albums)} albums dropped from 2020 to 2023:")
print("=" * 80)
for album in dropped_albums:
print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
print("=" * 80)
print(f"\n📊 Summary:")
print(f" - New albums in 2023: {new_count}")
print(f" - Dropped albums: {len(dropped_albums)}")
print(f" - Match: {'✅ Yes' if new_count == len(dropped_albums) else '❌ No'}")
# Save dropped albums list
with open('truly_dropped_albums.csv', 'w', newline='', encoding='utf-8') as file:
fieldnames = ['Original_Rank_2020', 'Artist', 'Album']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for album in dropped_albums:
writer.writerow({
'Original_Rank_2020': album['rank'],
'Artist': album['artist'],
'Album': album['album']
})
print(f"\n💾 Saved complete list to: truly_dropped_albums.csv")
if __name__ == "__main__":
main()