Finalize dropped albums list with correct 8 albums and balance new albums
- Corrected dropped albums to exactly 8 albums through detailed comparison analysis - Updated dropped albums list (ranks 501-508) with proper albums that were truly removed - Fixed "New in 2023" markings to show only 8 albums (balancing the 8 dropped) - Downloaded cover art for all 8 dropped albums - Removed incorrect cover art files for albums that weren't actually dropped - Updated data files with corrected artist/album name formatting for accurate matching 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
e64b267ee3
commit
88a6434132
31 changed files with 1082 additions and 217 deletions
74
scripts/compare_2020_vs_2023.py
Normal file
74
scripts/compare_2020_vs_2023.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Compare rolling_stone_top_500_albums_2020.csv and wikipedia_top_500_albums.csv to find dropped albums.
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
def normalize_text(text):
|
||||
"""Normalize text for comparison"""
|
||||
return text.lower().strip().replace('&', 'and').replace(' ', ' ')
|
||||
|
||||
def main():
|
||||
# Read 2020 Rolling Stone albums
|
||||
albums_2020 = {}
|
||||
with open('rolling_stone_2020_simplified.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
||||
albums_2020[key] = {
|
||||
'rank': row['Rank'],
|
||||
'artist': row['Artist'],
|
||||
'album': row['Album']
|
||||
}
|
||||
|
||||
print(f"📊 Loaded {len(albums_2020)} albums from 2020 Rolling Stone list")
|
||||
|
||||
# Read Wikipedia 2023 albums (note: columns are rank,album,artist)
|
||||
albums_2023_wiki = set()
|
||||
with open('wikipedia_top_500_albums.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
# Wikipedia format: rank,album,artist
|
||||
key = (normalize_text(row['artist']), normalize_text(row['album']))
|
||||
albums_2023_wiki.add(key)
|
||||
|
||||
print(f"📊 Loaded {len(albums_2023_wiki)} albums from Wikipedia 2023 list")
|
||||
|
||||
# Find dropped albums (in 2020 but not in Wikipedia 2023)
|
||||
dropped_albums = []
|
||||
for key, album_info in albums_2020.items():
|
||||
if key not in albums_2023_wiki:
|
||||
dropped_albums.append(album_info)
|
||||
|
||||
# Sort by original 2020 rank
|
||||
dropped_albums.sort(key=lambda x: int(x['rank']))
|
||||
|
||||
print(f"\n❌ Found {len(dropped_albums)} albums dropped from 2020 to 2023:")
|
||||
print("=" * 80)
|
||||
|
||||
for album in dropped_albums:
|
||||
print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
|
||||
|
||||
print("=" * 80)
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" - Albums in 2020: {len(albums_2020)}")
|
||||
print(f" - Albums in Wikipedia 2023: {len(albums_2023_wiki)}")
|
||||
print(f" - Albums dropped: {len(dropped_albums)}")
|
||||
|
||||
# Save dropped albums list
|
||||
with open('dropped_albums_2020_vs_2023.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
fieldnames = ['Original_Rank_2020', 'Artist', 'Album']
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for album in dropped_albums:
|
||||
writer.writerow({
|
||||
'Original_Rank_2020': album['rank'],
|
||||
'Artist': album['artist'],
|
||||
'Album': album['album']
|
||||
})
|
||||
|
||||
print(f"\n💾 Saved dropped albums list to: dropped_albums_2020_vs_2023.csv")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
73
scripts/compare_2020_vs_wikipedia.py
Normal file
73
scripts/compare_2020_vs_wikipedia.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Compare 2020 Rolling Stone list against full 2023 data to find truly dropped albums.
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
def normalize_text(text):
|
||||
"""Normalize text for comparison"""
|
||||
return text.lower().strip().replace('&', 'and').replace(' ', ' ')
|
||||
|
||||
def main():
|
||||
# Read 2020 albums (simplified)
|
||||
albums_2020 = {}
|
||||
with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
||||
albums_2020[key] = {
|
||||
'rank': row['Rank'],
|
||||
'artist': row['Artist'],
|
||||
'album': row['Album']
|
||||
}
|
||||
|
||||
print(f"📊 Loaded {len(albums_2020)} albums from 2020 list")
|
||||
|
||||
# Read Wikipedia 2023 albums
|
||||
albums_2023_all = set()
|
||||
with open('wikipedia_top_500_albums.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
key = (normalize_text(row['artist']), normalize_text(row['album']))
|
||||
albums_2023_all.add(key)
|
||||
|
||||
print(f"📊 Loaded {len(albums_2023_all)} albums from Wikipedia 2023 list")
|
||||
|
||||
# Find dropped albums (in 2020 but not in complete 2023 list)
|
||||
dropped_albums = []
|
||||
for key, album_info in albums_2020.items():
|
||||
if key not in albums_2023_all:
|
||||
dropped_albums.append(album_info)
|
||||
|
||||
# Sort by original 2020 rank
|
||||
dropped_albums.sort(key=lambda x: int(x['rank']))
|
||||
|
||||
print(f"\n❌ Found {len(dropped_albums)} albums dropped from 2020 to 2023:")
|
||||
print("=" * 80)
|
||||
|
||||
for album in dropped_albums:
|
||||
print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
|
||||
|
||||
print("=" * 80)
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" - Albums in 2020: {len(albums_2020)}")
|
||||
print(f" - Albums in complete 2023: {len(albums_2023_all)}")
|
||||
print(f" - Albums dropped: {len(dropped_albums)}")
|
||||
|
||||
# Save dropped albums list
|
||||
with open('final_dropped_albums.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
fieldnames = ['Original_Rank_2020', 'Artist', 'Album']
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for album in dropped_albums:
|
||||
writer.writerow({
|
||||
'Original_Rank_2020': album['rank'],
|
||||
'Artist': album['artist'],
|
||||
'Album': album['album']
|
||||
})
|
||||
|
||||
print(f"\n💾 Saved final dropped list to: final_dropped_albums.csv")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
86
scripts/correct_new_albums.py
Normal file
86
scripts/correct_new_albums.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Correct the "New in 2023" markings to show only truly new albums.
|
||||
Should be 8 new albums to balance the 8 dropped albums.
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
def normalize_text(text):
|
||||
"""Normalize text for comparison"""
|
||||
return text.lower().strip().replace('&', 'and').replace(' ', ' ')
|
||||
|
||||
def main():
|
||||
# Read 2020 albums for comparison
|
||||
albums_2020 = set()
|
||||
with open('rolling_stone_2020_simplified.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
||||
albums_2020.add(key)
|
||||
|
||||
print(f"📊 Loaded {len(albums_2020)} albums from 2020 list")
|
||||
|
||||
# Read current 2023 data
|
||||
albums_2023 = []
|
||||
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
albums_2023.append(row)
|
||||
|
||||
# Analyze albums marked as "New in 2023"
|
||||
currently_marked_new = []
|
||||
truly_new = []
|
||||
incorrectly_marked = []
|
||||
|
||||
for album in albums_2023:
|
||||
if 'New in 2023' in album.get('Status', ''):
|
||||
currently_marked_new.append(album)
|
||||
|
||||
# Check if this album was actually in 2020
|
||||
key = (normalize_text(album['Artist']), normalize_text(album['Album']))
|
||||
if key in albums_2020:
|
||||
incorrectly_marked.append(album)
|
||||
else:
|
||||
truly_new.append(album)
|
||||
|
||||
print(f"\\n📊 Analysis of albums marked as 'New in 2023':")
|
||||
print(f" Total marked as new: {len(currently_marked_new)}")
|
||||
print(f" Truly new (not in 2020): {len(truly_new)}")
|
||||
print(f" Incorrectly marked (were in 2020): {len(incorrectly_marked)}")
|
||||
|
||||
print(f"\\n✅ Truly new albums in 2023:")
|
||||
for album in truly_new:
|
||||
print(f" #{album['Rank']} - {album['Artist']} - {album['Album']}")
|
||||
|
||||
print(f"\\n❌ Incorrectly marked as new (were in 2020):")
|
||||
for album in incorrectly_marked:
|
||||
print(f" #{album['Rank']} - {album['Artist']} - {album['Album']}")
|
||||
|
||||
# Update the CSV to correct the statuses
|
||||
updated_albums = []
|
||||
for album in albums_2023:
|
||||
updated_album = album.copy()
|
||||
|
||||
# If this album is marked as "New in 2023" but was actually in 2020, correct it
|
||||
if 'New in 2023' in album.get('Status', ''):
|
||||
key = (normalize_text(album['Artist']), normalize_text(album['Album']))
|
||||
if key in albums_2020:
|
||||
# This was incorrectly marked - change to "No change" or appropriate status
|
||||
updated_album['Status'] = 'No change'
|
||||
|
||||
updated_albums.append(updated_album)
|
||||
|
||||
# Write corrected CSV
|
||||
with open('top_500_albums_2023.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
fieldnames = ['Rank', 'Artist', 'Album', 'Status', 'Info', 'Description']
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(updated_albums)
|
||||
|
||||
print(f"\\n✅ Corrected CSV written with proper 'New in 2023' markings")
|
||||
print(f"📁 Updated: top_500_albums_2023.csv")
|
||||
print(f"\\n📊 Final count: {len(truly_new)} truly new albums in 2023")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
32
scripts/create_2020_simplified.py
Normal file
32
scripts/create_2020_simplified.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create a simplified version of the 2020 Rolling Stone data with only Rank, Artist, and Album columns.
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
def main():
|
||||
# Read the original 2020 CSV and extract only needed columns
|
||||
simplified_albums = []
|
||||
|
||||
with open('rolling_stone_top_500_albums_2020.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
simplified_albums.append({
|
||||
'Rank': row['Rank'],
|
||||
'Artist': row['Artist'],
|
||||
'Album': row['Album']
|
||||
})
|
||||
|
||||
# Write simplified CSV
|
||||
with open('rolling_stone_2020_simplified.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
fieldnames = ['Rank', 'Artist', 'Album']
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(simplified_albums)
|
||||
|
||||
print(f"✅ Created simplified 2020 CSV with {len(simplified_albums)} albums")
|
||||
print(f"📁 Saved as: rolling_stone_2020_simplified.csv")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
31
scripts/create_new_2020_simple.py
Normal file
31
scripts/create_new_2020_simple.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create a fresh simplified version of the 2020 CSV with only Rank, Artist, and Album columns.
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
def main():
|
||||
# Read the 2020 CSV and extract only needed columns
|
||||
simplified_albums = []
|
||||
|
||||
with open('rolling_stone_top_500_albums_2020.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
simplified_albums.append({
|
||||
'Rank': row['Rank'],
|
||||
'Artist': row['Artist'],
|
||||
'Album': row['Album']
|
||||
})
|
||||
|
||||
# Write simplified CSV
|
||||
with open('rolling_stone_2020_simple.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
fieldnames = ['Rank', 'Artist', 'Album']
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(simplified_albums)
|
||||
|
||||
print(f"✅ Created fresh simplified 2020 CSV with {len(simplified_albums)} albums")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
62
scripts/fix_new_albums.py
Normal file
62
scripts/fix_new_albums.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Update the CSV to mark only the correct 8 albums as "New in 2023".
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
def main():
|
||||
# The correct 8 new albums
|
||||
correct_new_albums = {
|
||||
491: ('Harry Styles', "Harry's House"),
|
||||
466: ('Black Uhuru', 'Red'),
|
||||
437: ('Gorillaz', 'Demon Days'),
|
||||
430: ('Bad Bunny', 'Un Verano Sin Ti'),
|
||||
358: ('Olivia Rodrigo', 'Sour'),
|
||||
351: ('SZA', 'SOS'),
|
||||
170: ('Taylor Swift', 'Folklore'),
|
||||
71: ('Beyonce', 'Renaissance')
|
||||
}
|
||||
|
||||
print(f"✅ Correcting to mark only {len(correct_new_albums)} albums as 'New in 2023'")
|
||||
|
||||
# Read current data
|
||||
albums = []
|
||||
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
albums.append(row)
|
||||
|
||||
# Update statuses
|
||||
updated_count = 0
|
||||
for album in albums:
|
||||
rank = int(album['Rank'])
|
||||
|
||||
# If this rank should be marked as "New in 2023"
|
||||
if rank in correct_new_albums:
|
||||
expected_artist, expected_album = correct_new_albums[rank]
|
||||
# Verify it matches
|
||||
if album['Artist'] == expected_artist or expected_album.lower() in album['Album'].lower():
|
||||
album['Status'] = 'New in 2023'
|
||||
updated_count += 1
|
||||
print(f"✅ #{rank} - {album['Artist']} - {album['Album']} -> New in 2023")
|
||||
else:
|
||||
print(f"⚠️ Rank {rank} mismatch: expected {expected_artist} - {expected_album}, got {album['Artist']} - {album['Album']}")
|
||||
|
||||
# If it's currently marked as "New in 2023" but not in our list, change it
|
||||
elif 'New in 2023' in album.get('Status', ''):
|
||||
album['Status'] = 'No change'
|
||||
print(f"❌ #{rank} - {album['Artist']} - {album['Album']} -> Changed to No change")
|
||||
|
||||
# Write updated CSV
|
||||
with open('top_500_albums_2023.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
fieldnames = ['Rank', 'Artist', 'Album', 'Status', 'Info', 'Description']
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(albums)
|
||||
|
||||
print(f"\n✅ Updated CSV with {updated_count} albums marked as 'New in 2023'")
|
||||
print(f"📁 Saved to: top_500_albums_2023.csv")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
56
scripts/update_dropped_albums.py
Normal file
56
scripts/update_dropped_albums.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Update the main CSV with the correct 8 dropped albums, removing all incorrect entries.
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
def main():
|
||||
# The 8 correct dropped albums
|
||||
correct_dropped = [
|
||||
{'rank': 48, 'artist': 'Bob Marley and the Wailers', 'album': 'Legend'},
|
||||
{'rank': 170, 'artist': 'Cream', 'album': 'Disraeli Gears'},
|
||||
{'rank': 351, 'artist': 'Roxy Music', 'album': 'For Your Pleasure'},
|
||||
{'rank': 358, 'artist': 'Sonic Youth', 'album': 'Goo'},
|
||||
{'rank': 430, 'artist': 'Elvis Costello', 'album': 'My Aim Is True'},
|
||||
{'rank': 437, 'artist': 'Primal Scream', 'album': 'Screamadelica'},
|
||||
{'rank': 466, 'artist': 'The Beach Boys', 'album': 'The Beach Boys Today!'},
|
||||
{'rank': 491, 'artist': 'Harry Styles', 'album': 'Fine Line'}
|
||||
]
|
||||
|
||||
# Read current data, keeping only ranks 1-500
|
||||
updated_albums = []
|
||||
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
for row in reader:
|
||||
rank = int(row['Rank'])
|
||||
if rank <= 500:
|
||||
updated_albums.append(row)
|
||||
|
||||
print(f"✅ Kept {len(updated_albums)} albums from ranks 1-500")
|
||||
|
||||
# Add the 8 correct dropped albums
|
||||
for i, dropped in enumerate(correct_dropped, 1):
|
||||
updated_albums.append({
|
||||
'Rank': str(500 + i),
|
||||
'Artist': dropped['artist'],
|
||||
'Album': dropped['album'],
|
||||
'Status': f"Dropped (was #{dropped['rank']} in 2020)",
|
||||
'Info': '',
|
||||
'Description': ''
|
||||
})
|
||||
|
||||
print(f"✅ Added {len(correct_dropped)} correct dropped albums")
|
||||
|
||||
# Write updated CSV
|
||||
with open('top_500_albums_2023.csv', 'w', newline='', encoding='utf-8') as file:
|
||||
fieldnames = ['Rank', 'Artist', 'Album', 'Status', 'Info', 'Description']
|
||||
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(updated_albums)
|
||||
|
||||
print(f"✅ Updated CSV with {len(updated_albums)} total albums (500 + 8 dropped)")
|
||||
print(f"📁 Saved to: top_500_albums_2023.csv")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue