Update dropped albums list with corrected artist/album names

- Re-ran dropped album matching with corrected names from user - Fixed final list to have exactly 89 correctly identified dropped albums - Removed any albums incorrectly marked as dropped that are in main list - Verified no duplicates between main list (1-500) and dropped list (501-589) - Total: 589 albums (500 current + 89 dropped) All dropped albums now correctly match albums missing from 2020→2023 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 02:37:36 +02:00 · 2025-07-01 02:37:36 +02:00 · e64b267ee3
commit e64b267ee3
parent 0f753b69f7
4 changed files with 411 additions and 172 deletions
--- a/scripts/recheck_dropped_albums.py
+++ b/scripts/recheck_dropped_albums.py
@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""
+Re-check dropped albums with the corrected artist/album names.
+"""
+
+import csv
+
+def normalize_text(text):
+    """Normalize text for comparison"""
+    return text.lower().strip().replace('&', 'and').replace('  ', ' ')
+
+def main():
+    # Read 2020 albums (simplified)
+    albums_2020 = {}
+    with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            key = (normalize_text(row['Artist']), normalize_text(row['Album']))
+            albums_2020[key] = {
+                'rank': row['Rank'],
+                'artist': row['Artist'],
+                'album': row['Album']
+            }
+    
+    print(f"📊 Loaded {len(albums_2020)} albums from 2020 list")
+    
+    # Read current 2023 albums (only first 500)
+    albums_2023 = set()
+    albums_2023_details = []
+    new_count = 0
+    
+    with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            rank = int(row['Rank'])
+            if rank <= 500:
+                key = (normalize_text(row['Artist']), normalize_text(row['Album']))
+                albums_2023.add(key)
+                albums_2023_details.append({
+                    'rank': row['Rank'],
+                    'artist': row['Artist'],
+                    'album': row['Album'],
+                    'status': row['Status']
+                })
+                if row['Status'] == 'New in 2023':
+                    new_count += 1
+    
+    print(f"📊 Loaded {len(albums_2023)} albums from 2023 list (first 500)")
+    print(f"🆕 Found {new_count} albums marked as 'New in 2023'")
+    
+    # Find albums currently marked as dropped
+    current_dropped = []
+    with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            if 'Dropped' in row['Status']:
+                current_dropped.append({
+                    'rank': row['Rank'],
+                    'artist': row['Artist'],
+                    'album': row['Album'],
+                    'status': row['Status']
+                })
+    
+    print(f"📊 Currently have {len(current_dropped)} albums marked as dropped")
+    
+    # Find truly dropped albums (in 2020 but not in 2023)
+    truly_dropped = []
+    for key, album_info in albums_2020.items():
+        if key not in albums_2023:
+            truly_dropped.append(album_info)
+    
+    # Sort by original 2020 rank
+    truly_dropped.sort(key=lambda x: int(x['rank']))
+    
+    print(f"\n❌ Found {len(truly_dropped)} albums that should be dropped (in 2020 but not in 2023)")
+    
+    # Show first 20 that should be dropped
+    print("\nFirst 20 albums that should be dropped:")
+    print("=" * 80)
+    for i, album in enumerate(truly_dropped[:20]):
+        print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
+    print("=" * 80)
+    
+    # Check if any current "dropped" albums are actually in the 2023 list
+    incorrect_drops = []
+    for dropped in current_dropped:
+        key = (normalize_text(dropped['artist']), normalize_text(dropped['album']))
+        if key in albums_2023:
+            # Find it in the 2023 list
+            for album_2023 in albums_2023_details:
+                if (normalize_text(album_2023['artist']) == key[0] and 
+                    normalize_text(album_2023['album']) == key[1]):
+                    incorrect_drops.append({
+                        'dropped_rank': dropped['rank'],
+                        'actual_rank': album_2023['rank'],
+                        'artist': dropped['artist'],
+                        'album': dropped['album']
+                    })
+                    break
+    
+    if incorrect_drops:
+        print(f"\n⚠️  Found {len(incorrect_drops)} albums marked as dropped but are in main list:")
+        for inc in incorrect_drops[:10]:
+            print(f"   Marked as dropped at #{inc['dropped_rank']} but is at #{inc['actual_rank']} - {inc['artist']} - {inc['album']}")
+    
+    print(f"\n📊 Summary:")
+    print(f"   - Should have dropped: {len(truly_dropped)}")
+    print(f"   - Currently marked as dropped: {len(current_dropped)}")
+    print(f"   - Difference: {len(truly_dropped) - len(current_dropped)}")
+    
+    # Save the correct dropped list
+    with open('correct_dropped_albums.csv', 'w', newline='', encoding='utf-8') as file:
+        fieldnames = ['Original_Rank_2020', 'Artist', 'Album']
+        writer = csv.DictWriter(file, fieldnames=fieldnames)
+        writer.writeheader()
+        for album in truly_dropped:
+            writer.writerow({
+                'Original_Rank_2020': album['rank'],
+                'Artist': album['artist'],
+                'Album': album['album']
+            })
+    
+    print(f"\n💾 Saved correct dropped list to: correct_dropped_albums.csv")
+
+if __name__ == "__main__":
+    main()