#!/usr/bin/env python3
"""
Find albums that are TRULY new in 2023 (not in 2020 list at all).
"""

import csv

def normalize_text(text):
    """Normalize text for comparison"""
    return text.lower().strip().replace('&', 'and').replace('  ', ' ')

def main():
    # Read 2020 albums
    albums_2020 = set()
    with open('rolling_stone_2020_simple.csv', 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            key = (normalize_text(row['Artist']), normalize_text(row['Album']))
            albums_2020.add(key)
    
    print(f"📊 Loaded {len(albums_2020)} albums from 2020 list")
    
    # Read 2023 albums and find truly new ones
    truly_new = []
    incorrectly_marked_new = []
    
    with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            rank = int(row['Rank'])
            if rank <= 500:
                key = (normalize_text(row['Artist']), normalize_text(row['Album']))
                
                if row['Status'] == 'New in 2023':
                    if key not in albums_2020:
                        truly_new.append({
                            'rank': row['Rank'],
                            'artist': row['Artist'],
                            'album': row['Album']
                        })
                    else:
                        incorrectly_marked_new.append({
                            'rank': row['Rank'],
                            'artist': row['Artist'],
                            'album': row['Album']
                        })
    
    print(f"\n✅ TRULY new albums in 2023 (not in 2020 list):")
    print("=" * 80)
    for album in truly_new:
        print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
    print("=" * 80)
    print(f"Total truly new: {len(truly_new)}")
    
    print(f"\n❌ Incorrectly marked as 'New in 2023' (were in 2020 list):")
    print("=" * 80)
    for album in incorrectly_marked_new[:10]:  # Show first 10
        print(f"#{album['rank']:3s} - {album['artist']} - {album['album']}")
    if len(incorrectly_marked_new) > 10:
        print(f"... and {len(incorrectly_marked_new) - 10} more")
    print("=" * 80)
    print(f"Total incorrectly marked: {len(incorrectly_marked_new)}")

if __name__ == "__main__":
    main()