top500albums/scripts/fix_final_dropped_list.py

#!/usr/bin/env python3
"""
Fix the final dropped list with the corrected album names.
Replace current dropped albums with the correct ones.
"""

import csv

def normalize_text(text):
    """Normalize text for comparison"""
    return text.lower().strip().replace('&', 'and').replace('  ', ' ')

def main():
    # Read the correct dropped albums list
    correct_dropped = []
    with open('correct_dropped_albums.csv', 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            correct_dropped.append({
                'rank_2020': row['Original_Rank_2020'],
                'artist': row['Artist'],
                'album': row['Album']
            })

    print(f"📊 Loading {len(correct_dropped)} correct dropped albums")

    # Get info/descriptions from 2020 data for dropped albums
    info_desc_2020 = {}
    with open('rolling_stone_top_500_albums_2020.csv', 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            key = (row['Artist'], row['Album'])
            info_desc_2020[key] = {
                'info': row.get('Info', ''),
                'description': row.get('Description', '')
            }

    # Read current CSV and remove all dropped albums
    albums = []
    with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            if 'Dropped' not in row['Status']:
                albums.append(row)

    print(f"📊 Kept {len(albums)} non-dropped albums")

    # Add the correct dropped albums
    next_rank = 501
    added_count = 0

    for album in correct_dropped:
        # Find info/description for this album
        key = (album['artist'], album['album'])
        info_data = info_desc_2020.get(key, {'info': '', 'description': ''})

        albums.append({
            'Rank': str(next_rank),
            'Artist': album['artist'],
            'Album': album['album'],
            'Status': f"Dropped (was #{album['rank_2020']} in 2020)",
            'Info': info_data['info'],
            'Description': info_data['description']
        })
        next_rank += 1
        added_count += 1

        if added_count <= 10:
            print(f"✓ Added: #{album['rank_2020']} - {album['artist']} - {album['album']}")

    if added_count > 10:
        print(f"... and {added_count - 10} more")

    # Write updated CSV
    with open('top_500_albums_2023.csv', 'w', newline='', encoding='utf-8') as file:
        fieldnames = ['Rank', 'Artist', 'Album', 'Status', 'Info', 'Description']
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(albums)

    print(f"\n✅ Updated dropped albums list")
    print(f"📊 Total albums now: {len(albums)}")
    print(f"📊 Main list (1-500): 500")
    print(f"📊 Dropped (501-589): {added_count}")

if __name__ == "__main__":
    main()