top500albums/scripts/final_cleanup_dropped.py

#!/usr/bin/env python3
"""
Final cleanup - remove any dropped albums that are actually in the main Top 500 list.
"""

import csv

def normalize_name(text):
    """Normalize for comparison"""
    return text.lower().strip().replace('  ', ' ')

def main():
    # Read current CSV
    albums = []
    with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            albums.append(row)

    print(f"📊 Total albums before final cleanup: {len(albums)}")

    # Get all albums in main list (ranks 1-500)
    main_list_albums = set()
    for album in albums:
        if int(album['Rank']) <= 500:
            key = (normalize_name(album['Artist']), normalize_name(album['Album']))
            main_list_albums.add(key)

    print(f"📊 Albums in main Top 500 list: {len(main_list_albums)}")

    # Find dropped albums that are actually in the main list
    to_remove = []
    for i, album in enumerate(albums):
        if 'Dropped' in album['Status']:
            key = (normalize_name(album['Artist']), normalize_name(album['Album']))
            if key in main_list_albums:
                print(f"❌ Found incorrectly dropped album that's in main list:")
                print(f"   Rank {album['Rank']} - {album['Artist']} - {album['Album']}")
                to_remove.append(i)

    # Remove incorrect entries
    if to_remove:
        print(f"\n🗑️  Removing {len(to_remove)} incorrect entries...")
        for i in reversed(to_remove):
            del albums[i]

        # Renumber albums after 500
        current_rank = 501
        for album in albums:
            if int(album['Rank']) > 500:
                album['Rank'] = str(current_rank)
                current_rank += 1

    # Write final CSV
    with open('top_500_albums_2023.csv', 'w', newline='', encoding='utf-8') as file:
        fieldnames = ['Rank', 'Artist', 'Album', 'Status', 'Info', 'Description']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(albums)

    print(f"\n✅ Final cleanup complete!")
    print(f"📊 Total albums now: {len(albums)}")
    print(f"📊 Total dropped albums: {len([a for a in albums if 'Dropped' in a['Status']])}")

    # Verify the math
    new_albums = len([a for a in albums if a['Status'] == 'New in 2023' and int(a['Rank']) <= 500])
    dropped_albums = len([a for a in albums if 'Dropped' in a['Status']])
    print(f"\n🔍 Verification:")
    print(f"   New albums in 2023: {new_albums}")
    print(f"   Dropped albums: {dropped_albums}")
    print(f"   Should both equal: {'✅ YES' if new_albums == dropped_albums else '❌ NO'}")

if __name__ == "__main__":
    main()