Update dropped albums list with corrected artist/album names

- Re-ran dropped album matching with corrected names from user
- Fixed final list to have exactly 89 correctly identified dropped albums
- Removed any albums incorrectly marked as dropped that are in main list
- Verified no duplicates between main list (1-500) and dropped list (501-589)
- Total: 589 albums (500 current + 89 dropped)

All dropped albums now correctly match albums missing from 2020→2023

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Johan Lundberg 2025-07-01 02:37:36 +02:00
parent 0f753b69f7
commit e64b267ee3
4 changed files with 411 additions and 172 deletions

View file

@ -0,0 +1,126 @@
#!/usr/bin/env python3
"""
Re-check dropped albums with the corrected artist/album names.
"""
import csv
def normalize_text(text):
"""Normalize text for comparison"""
return text.lower().strip().replace('&', 'and').replace(' ', ' ')
def main():
# Read 2020 albums (simplified)
albums_2020 = {}
with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
albums_2020[key] = {
'rank': row['Rank'],
'artist': row['Artist'],
'album': row['Album']
}
print(f"📊 Loaded {len(albums_2020)} albums from 2020 list")
# Read current 2023 albums (only first 500)
albums_2023 = set()
albums_2023_details = []
new_count = 0
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
rank = int(row['Rank'])
if rank <= 500:
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
albums_2023.add(key)
albums_2023_details.append({
'rank': row['Rank'],
'artist': row['Artist'],
'album': row['Album'],
'status': row['Status']
})
if row['Status'] == 'New in 2023':
new_count += 1
print(f"📊 Loaded {len(albums_2023)} albums from 2023 list (first 500)")
print(f"🆕 Found {new_count} albums marked as 'New in 2023'")
# Find albums currently marked as dropped
current_dropped = []
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
reader = csv.DictReader(file)
for row in reader:
if 'Dropped' in row['Status']:
current_dropped.append({
'rank': row['Rank'],
'artist': row['Artist'],
'album': row['Album'],
'status': row['Status']
})
print(f"📊 Currently have {len(current_dropped)} albums marked as dropped")
# Find truly dropped albums (in 2020 but not in 2023)
truly_dropped = []
for key, album_info in albums_2020.items():
if key not in albums_2023:
truly_dropped.append(album_info)
# Sort by original 2020 rank
truly_dropped.sort(key=lambda x: int(x['rank']))
print(f"\n❌ Found {len(truly_dropped)} albums that should be dropped (in 2020 but not in 2023)")
# Show first 20 that should be dropped
print("\nFirst 20 albums that should be dropped:")
print("=" * 80)
for i, album in enumerate(truly_dropped[:20]):
print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
print("=" * 80)
# Check if any current "dropped" albums are actually in the 2023 list
incorrect_drops = []
for dropped in current_dropped:
key = (normalize_text(dropped['artist']), normalize_text(dropped['album']))
if key in albums_2023:
# Find it in the 2023 list
for album_2023 in albums_2023_details:
if (normalize_text(album_2023['artist']) == key[0] and
normalize_text(album_2023['album']) == key[1]):
incorrect_drops.append({
'dropped_rank': dropped['rank'],
'actual_rank': album_2023['rank'],
'artist': dropped['artist'],
'album': dropped['album']
})
break
if incorrect_drops:
print(f"\n⚠️ Found {len(incorrect_drops)} albums marked as dropped but are in main list:")
for inc in incorrect_drops[:10]:
print(f" Marked as dropped at #{inc['dropped_rank']} but is at #{inc['actual_rank']} - {inc['artist']} - {inc['album']}")
print(f"\n📊 Summary:")
print(f" - Should have dropped: {len(truly_dropped)}")
print(f" - Currently marked as dropped: {len(current_dropped)}")
print(f" - Difference: {len(truly_dropped) - len(current_dropped)}")
# Save the correct dropped list
with open('correct_dropped_albums.csv', 'w', newline='', encoding='utf-8') as file:
fieldnames = ['Original_Rank_2020', 'Artist', 'Album']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for album in truly_dropped:
writer.writerow({
'Original_Rank_2020': album['rank'],
'Artist': album['artist'],
'Album': album['album']
})
print(f"\n💾 Saved correct dropped list to: correct_dropped_albums.csv")
if __name__ == "__main__":
main()