- Added all 89 albums that were genuinely dropped from 2020 to 2023 - Fixed incorrect status markings (many albums marked "New in 2023" were not new) - Removed duplicates and albums incorrectly marked as dropped - Final count: 589 total (500 main list + 89 dropped) - Updated JavaScript validation for extended range - Created comprehensive analysis scripts to verify data Math now adds up correctly: 89 albums dropped to make room for new additions 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
101 lines
No EOL
3.9 KiB
Python
101 lines
No EOL
3.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Find albums that are TRULY new in 2023 with better name matching.
|
|
"""
|
|
|
|
import csv
|
|
import re
|
|
|
|
def normalize_text(text):
|
|
"""Normalize text for comparison - more aggressive"""
|
|
text = text.lower().strip()
|
|
# Remove punctuation and common variations
|
|
text = re.sub(r'[^\w\s]', '', text) # Remove all punctuation
|
|
text = text.replace('and', '')
|
|
text = text.replace('the', '')
|
|
text = text.replace(' ', ' ')
|
|
return text
|
|
|
|
def main():
|
|
# Read 2020 albums with original data
|
|
albums_2020_normalized = {}
|
|
albums_2020_original = {}
|
|
|
|
with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file:
|
|
reader = csv.DictReader(file)
|
|
for row in reader:
|
|
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
|
albums_2020_normalized[key] = row
|
|
# Also store original for reference
|
|
orig_key = (row['Artist'], row['Album'])
|
|
albums_2020_original[orig_key] = row['Rank']
|
|
|
|
print(f"📊 Loaded {len(albums_2020_normalized)} albums from 2020 list")
|
|
|
|
# Check specific cases
|
|
print("\n🔍 Checking specific albums:")
|
|
test_cases = [
|
|
("The Rolling Stones", "Exile on Main St."),
|
|
("The Beatles", "Sgt. Pepper's Lonely Hearts Club Band"),
|
|
("Beyonce", "Renaissance"),
|
|
("Taylor Swift", "Folklore"),
|
|
("Bad Bunny", "Un Verano Sin Ti")
|
|
]
|
|
|
|
for artist, album in test_cases:
|
|
norm_key = (normalize_text(artist), normalize_text(album))
|
|
found = norm_key in albums_2020_normalized
|
|
print(f" {artist} - {album}: {'Found in 2020' if found else 'NOT in 2020'}")
|
|
|
|
# Read 2023 albums and find truly new ones
|
|
truly_new = []
|
|
incorrectly_marked_new = []
|
|
|
|
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
|
|
reader = csv.DictReader(file)
|
|
for row in reader:
|
|
rank = int(row['Rank'])
|
|
if rank <= 500:
|
|
key = (normalize_text(row['Artist']), normalize_text(row['Album']))
|
|
|
|
if row['Status'] == 'New in 2023':
|
|
if key not in albums_2020_normalized:
|
|
truly_new.append({
|
|
'rank': row['Rank'],
|
|
'artist': row['Artist'],
|
|
'album': row['Album']
|
|
})
|
|
else:
|
|
orig_2020 = albums_2020_normalized[key]
|
|
incorrectly_marked_new.append({
|
|
'rank': row['Rank'],
|
|
'artist': row['Artist'],
|
|
'album': row['Album'],
|
|
'rank_2020': orig_2020['Rank']
|
|
})
|
|
|
|
print(f"\n✅ TRULY new albums in 2023 (not in 2020 list):")
|
|
print("=" * 80)
|
|
for album in truly_new:
|
|
print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
|
|
print("=" * 80)
|
|
print(f"Total truly new: {len(truly_new)}")
|
|
|
|
print(f"\n❌ Incorrectly marked as 'New in 2023' (were in 2020 list):")
|
|
print("=" * 80)
|
|
for album in incorrectly_marked_new[:20]: # Show first 20
|
|
print(f"#{album['rank']:3s} - {album['artist']} - {album['album']} (was #{album['rank_2020']} in 2020)")
|
|
if len(incorrectly_marked_new) > 20:
|
|
print(f"... and {len(incorrectly_marked_new) - 20} more")
|
|
print("=" * 80)
|
|
print(f"Total incorrectly marked: {len(incorrectly_marked_new)}")
|
|
|
|
# Calculate correct numbers
|
|
print(f"\n📊 Final Summary:")
|
|
print(f" - Albums marked 'New in 2023': {len(truly_new) + len(incorrectly_marked_new)}")
|
|
print(f" - Actually new (not in 2020): {len(truly_new)}")
|
|
print(f" - Incorrectly marked as new: {len(incorrectly_marked_new)}")
|
|
print(f" - Total dropped from 2020: Should be {len(truly_new)} to maintain 500 total")
|
|
|
|
if __name__ == "__main__":
|
|
main() |