Finalize dropped albums list with correct 8 albums and balance new albums

- Corrected dropped albums to exactly 8 albums through detailed comparison analysis - Updated dropped albums list (ranks 501-508) with proper albums that were truly removed - Fixed "New in 2023" markings to show only 8 albums (balancing the 8 dropped) - Downloaded cover art for all 8 dropped albums - Removed incorrect cover art files for albums that weren't actually dropped - Updated data files with corrected artist/album name formatting for accurate matching 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-01 03:32:16 +02:00 · 2025-07-01 03:32:16 +02:00 · 88a6434132
commit 88a6434132
parent e64b267ee3
31 changed files with 1082 additions and 217 deletions
--- a/scripts/compare_2020_vs_2023.py
+++ b/scripts/compare_2020_vs_2023.py
@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Compare rolling_stone_top_500_albums_2020.csv and wikipedia_top_500_albums.csv to find dropped albums.
+"""
+
+import csv
+
+def normalize_text(text):
+    """Normalize text for comparison"""
+    return text.lower().strip().replace('&', 'and').replace('  ', ' ')
+
+def main():
+    # Read 2020 Rolling Stone albums
+    albums_2020 = {}
+    with open('rolling_stone_2020_simplified.csv', 'r', encoding='utf-8') as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            key = (normalize_text(row['Artist']), normalize_text(row['Album']))
+            albums_2020[key] = {
+                'rank': row['Rank'],
+                'artist': row['Artist'],
+                'album': row['Album']
+            }
+    
+    print(f"📊 Loaded {len(albums_2020)} albums from 2020 Rolling Stone list")
+    
+    # Read Wikipedia 2023 albums (note: columns are rank,album,artist)
+    albums_2023_wiki = set()
+    with open('wikipedia_top_500_albums.csv', 'r', encoding='utf-8') as file:
+        reader = csv.DictReader(file)
+        for row in reader:
+            # Wikipedia format: rank,album,artist
+            key = (normalize_text(row['artist']), normalize_text(row['album']))
+            albums_2023_wiki.add(key)
+    
+    print(f"📊 Loaded {len(albums_2023_wiki)} albums from Wikipedia 2023 list")
+    
+    # Find dropped albums (in 2020 but not in Wikipedia 2023)
+    dropped_albums = []
+    for key, album_info in albums_2020.items():
+        if key not in albums_2023_wiki:
+            dropped_albums.append(album_info)
+    
+    # Sort by original 2020 rank
+    dropped_albums.sort(key=lambda x: int(x['rank']))
+    
+    print(f"\n❌ Found {len(dropped_albums)} albums dropped from 2020 to 2023:")
+    print("=" * 80)
+    
+    for album in dropped_albums:
+        print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
+    
+    print("=" * 80)
+    print(f"\n📊 Summary:")
+    print(f"   - Albums in 2020: {len(albums_2020)}")
+    print(f"   - Albums in Wikipedia 2023: {len(albums_2023_wiki)}")
+    print(f"   - Albums dropped: {len(dropped_albums)}")
+    
+    # Save dropped albums list
+    with open('dropped_albums_2020_vs_2023.csv', 'w', newline='', encoding='utf-8') as file:
+        fieldnames = ['Original_Rank_2020', 'Artist', 'Album']
+        writer = csv.DictWriter(file, fieldnames=fieldnames)
+        writer.writeheader()
+        for album in dropped_albums:
+            writer.writerow({
+                'Original_Rank_2020': album['rank'],
+                'Artist': album['artist'],
+                'Album': album['album']
+            })
+    
+    print(f"\n💾 Saved dropped albums list to: dropped_albums_2020_vs_2023.csv")
+
+if __name__ == "__main__":
+    main()