#!/usr/bin/env python3 """ Find albums that are TRULY new in 2023 (not in 2020 list at all). """ import csv def normalize_text(text): """Normalize text for comparison""" return text.lower().strip().replace('&', 'and').replace(' ', ' ') def main(): # Read 2020 albums albums_2020 = set() with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file: reader = csv.DictReader(file) for row in reader: key = (normalize_text(row['Artist']), normalize_text(row['Album'])) albums_2020.add(key) print(f"šŸ“Š Loaded {len(albums_2020)} albums from 2020 list") # Read 2023 albums and find truly new ones truly_new = [] incorrectly_marked_new = [] with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file: reader = csv.DictReader(file) for row in reader: rank = int(row['Rank']) if rank <= 500: key = (normalize_text(row['Artist']), normalize_text(row['Album'])) if row['Status'] == 'New in 2023': if key not in albums_2020: truly_new.append({ 'rank': row['Rank'], 'artist': row['Artist'], 'album': row['Album'] }) else: incorrectly_marked_new.append({ 'rank': row['Rank'], 'artist': row['Artist'], 'album': row['Album'] }) print(f"\nāœ… TRULY new albums in 2023 (not in 2020 list):") print("=" * 80) for album in truly_new: print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}") print("=" * 80) print(f"Total truly new: {len(truly_new)}") print(f"\nāŒ Incorrectly marked as 'New in 2023' (were in 2020 list):") print("=" * 80) for album in incorrectly_marked_new[:10]: # Show first 10 print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}") if len(incorrectly_marked_new) > 10: print(f"... and {len(incorrectly_marked_new) - 10} more") print("=" * 80) print(f"Total incorrectly marked: {len(incorrectly_marked_new)}") if __name__ == "__main__": main()