Update dropped albums list with corrected artist/album names
- Re-ran dropped album matching with corrected names from user - Fixed final list to have exactly 89 correctly identified dropped albums - Removed any albums incorrectly marked as dropped that are in main list - Verified no duplicates between main list (1-500) and dropped list (501-589) - Total: 589 albums (500 current + 89 dropped) All dropped albums now correctly match albums missing from 2020→2023 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
0f753b69f7
commit
e64b267ee3
4 changed files with 411 additions and 172 deletions
126
scripts/recheck_dropped_albums.py
Normal file
126
scripts/recheck_dropped_albums.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Re-check dropped albums with the corrected artist/album names.
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
def normalize_text(text):
|
||||
"""Normalize text for comparison"""
|
||||
return text.lower().strip().replace('&', 'and').replace(' ', ' ')
|
||||
|
||||
def main():
|
||||
# Read 2020 albums (simplified)
|
||||
albums_2020 = {}
|
||||
with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
||||
albums_2020[key] = {
|
||||
'rank': row['Rank'],
|
||||
'artist': row['Artist'],
|
||||
'album': row['Album']
|
||||
}
|
||||
|
||||
print(f"📊 Loaded {len(albums_2020)} albums from 2020 list")
|
||||
|
||||
# Read current 2023 albums (only first 500)
|
||||
albums_2023 = set()
|
||||
albums_2023_details = []
|
||||
new_count = 0
|
||||
|
||||
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
rank = int(row['Rank'])
|
||||
if rank <= 500:
|
||||
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
||||
albums_2023.add(key)
|
||||
albums_2023_details.append({
|
||||
'rank': row['Rank'],
|
||||
'artist': row['Artist'],
|
||||
'album': row['Album'],
|
||||
'status': row['Status']
|
||||
})
|
||||
if row['Status'] == 'New in 2023':
|
||||
new_count += 1
|
||||
|
||||
print(f"📊 Loaded {len(albums_2023)} albums from 2023 list (first 500)")
|
||||
print(f"🆕 Found {new_count} albums marked as 'New in 2023'")
|
||||
|
||||
# Find albums currently marked as dropped
|
||||
current_dropped = []
|
||||
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
if 'Dropped' in row['Status']:
|
||||
current_dropped.append({
|
||||
'rank': row['Rank'],
|
||||
'artist': row['Artist'],
|
||||
'album': row['Album'],
|
||||
'status': row['Status']
|
||||
})
|
||||
|
||||
print(f"📊 Currently have {len(current_dropped)} albums marked as dropped")
|
||||
|
||||
# Find truly dropped albums (in 2020 but not in 2023)
|
||||
truly_dropped = []
|
||||
for key, album_info in albums_2020.items():
|
||||
if key not in albums_2023:
|
||||
truly_dropped.append(album_info)
|
||||
|
||||
# Sort by original 2020 rank
|
||||
truly_dropped.sort(key=lambda x: int(x['rank']))
|
||||
|
||||
print(f"\n❌ Found {len(truly_dropped)} albums that should be dropped (in 2020 but not in 2023)")
|
||||
|
||||
# Show first 20 that should be dropped
|
||||
print("\nFirst 20 albums that should be dropped:")
|
||||
print("=" * 80)
|
||||
for i, album in enumerate(truly_dropped[:20]):
|
||||
print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
|
||||
print("=" * 80)
|
||||
|
||||
# Check if any current "dropped" albums are actually in the 2023 list
|
||||
incorrect_drops = []
|
||||
for dropped in current_dropped:
|
||||
key = (normalize_text(dropped['artist']), normalize_text(dropped['album']))
|
||||
if key in albums_2023:
|
||||
# Find it in the 2023 list
|
||||
for album_2023 in albums_2023_details:
|
||||
if (normalize_text(album_2023['artist']) == key[0] and
|
||||
normalize_text(album_2023['album']) == key[1]):
|
||||
incorrect_drops.append({
|
||||
'dropped_rank': dropped['rank'],
|
||||
'actual_rank': album_2023['rank'],
|
||||
'artist': dropped['artist'],
|
||||
'album': dropped['album']
|
||||
})
|
||||
break
|
||||
|
||||
if incorrect_drops:
|
||||
print(f"\n⚠️ Found {len(incorrect_drops)} albums marked as dropped but are in main list:")
|
||||
for inc in incorrect_drops[:10]:
|
||||
print(f" Marked as dropped at #{inc['dropped_rank']} but is at #{inc['actual_rank']} - {inc['artist']} - {inc['album']}")
|
||||
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" - Should have dropped: {len(truly_dropped)}")
|
||||
print(f" - Currently marked as dropped: {len(current_dropped)}")
|
||||
print(f" - Difference: {len(truly_dropped) - len(current_dropped)}")
|
||||
|
||||
# Save the correct dropped list
|
||||
with open('correct_dropped_albums.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
fieldnames = ['Original_Rank_2020', 'Artist', 'Album']
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for album in truly_dropped:
|
||||
writer.writerow({
|
||||
'Original_Rank_2020': album['rank'],
|
||||
'Artist': album['artist'],
|
||||
'Album': album['album']
|
||||
})
|
||||
|
||||
print(f"\n💾 Saved correct dropped list to: correct_dropped_albums.csv")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue