- Re-ran dropped album matching with corrected names from user - Fixed final list to have exactly 89 correctly identified dropped albums - Removed any albums incorrectly marked as dropped that are in main list - Verified no duplicates between main list (1-500) and dropped list (501-589) - Total: 589 albums (500 current + 89 dropped) All dropped albums now correctly match albums missing from 2020→2023 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
126 lines
No EOL
4.8 KiB
Python
126 lines
No EOL
4.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Re-check dropped albums with the corrected artist/album names.
|
|
"""
|
|
|
|
import csv
|
|
|
|
def normalize_text(text):
|
|
"""Normalize text for comparison"""
|
|
return text.lower().strip().replace('&', 'and').replace(' ', ' ')
|
|
|
|
def main():
|
|
# Read 2020 albums (simplified)
|
|
albums_2020 = {}
|
|
with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file:
|
|
reader = csv.DictReader(file)
|
|
for row in reader:
|
|
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
|
albums_2020[key] = {
|
|
'rank': row['Rank'],
|
|
'artist': row['Artist'],
|
|
'album': row['Album']
|
|
}
|
|
|
|
print(f"📊 Loaded {len(albums_2020)} albums from 2020 list")
|
|
|
|
# Read current 2023 albums (only first 500)
|
|
albums_2023 = set()
|
|
albums_2023_details = []
|
|
new_count = 0
|
|
|
|
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
|
reader = csv.DictReader(file)
|
|
for row in reader:
|
|
rank = int(row['Rank'])
|
|
if rank <= 500:
|
|
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
|
albums_2023.add(key)
|
|
albums_2023_details.append({
|
|
'rank': row['Rank'],
|
|
'artist': row['Artist'],
|
|
'album': row['Album'],
|
|
'status': row['Status']
|
|
})
|
|
if row['Status'] == 'New in 2023':
|
|
new_count += 1
|
|
|
|
print(f"📊 Loaded {len(albums_2023)} albums from 2023 list (first 500)")
|
|
print(f"🆕 Found {new_count} albums marked as 'New in 2023'")
|
|
|
|
# Find albums currently marked as dropped
|
|
current_dropped = []
|
|
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
|
reader = csv.DictReader(file)
|
|
for row in reader:
|
|
if 'Dropped' in row['Status']:
|
|
current_dropped.append({
|
|
'rank': row['Rank'],
|
|
'artist': row['Artist'],
|
|
'album': row['Album'],
|
|
'status': row['Status']
|
|
})
|
|
|
|
print(f"📊 Currently have {len(current_dropped)} albums marked as dropped")
|
|
|
|
# Find truly dropped albums (in 2020 but not in 2023)
|
|
truly_dropped = []
|
|
for key, album_info in albums_2020.items():
|
|
if key not in albums_2023:
|
|
truly_dropped.append(album_info)
|
|
|
|
# Sort by original 2020 rank
|
|
truly_dropped.sort(key=lambda x: int(x['rank']))
|
|
|
|
print(f"\n❌ Found {len(truly_dropped)} albums that should be dropped (in 2020 but not in 2023)")
|
|
|
|
# Show first 20 that should be dropped
|
|
print("\nFirst 20 albums that should be dropped:")
|
|
print("=" * 80)
|
|
for i, album in enumerate(truly_dropped[:20]):
|
|
print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
|
|
print("=" * 80)
|
|
|
|
# Check if any current "dropped" albums are actually in the 2023 list
|
|
incorrect_drops = []
|
|
for dropped in current_dropped:
|
|
key = (normalize_text(dropped['artist']), normalize_text(dropped['album']))
|
|
if key in albums_2023:
|
|
# Find it in the 2023 list
|
|
for album_2023 in albums_2023_details:
|
|
if (normalize_text(album_2023['artist']) == key[0] and
|
|
normalize_text(album_2023['album']) == key[1]):
|
|
incorrect_drops.append({
|
|
'dropped_rank': dropped['rank'],
|
|
'actual_rank': album_2023['rank'],
|
|
'artist': dropped['artist'],
|
|
'album': dropped['album']
|
|
})
|
|
break
|
|
|
|
if incorrect_drops:
|
|
print(f"\n⚠️ Found {len(incorrect_drops)} albums marked as dropped but are in main list:")
|
|
for inc in incorrect_drops[:10]:
|
|
print(f" Marked as dropped at #{inc['dropped_rank']} but is at #{inc['actual_rank']} - {inc['artist']} - {inc['album']}")
|
|
|
|
print(f"\n📊 Summary:")
|
|
print(f" - Should have dropped: {len(truly_dropped)}")
|
|
print(f" - Currently marked as dropped: {len(current_dropped)}")
|
|
print(f" - Difference: {len(truly_dropped) - len(current_dropped)}")
|
|
|
|
# Save the correct dropped list
|
|
with open('correct_dropped_albums.csv', 'w', newline='', encoding='utf-8') as file:
|
|
fieldnames = ['Original_Rank_2020', 'Artist', 'Album']
|
|
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for album in truly_dropped:
|
|
writer.writerow({
|
|
'Original_Rank_2020': album['rank'],
|
|
'Artist': album['artist'],
|
|
'Album': album['album']
|
|
})
|
|
|
|
print(f"\n💾 Saved correct dropped list to: correct_dropped_albums.csv")
|
|
|
|
if __name__ == "__main__":
|
|
main() |