top500albums/extract_spotify_urls.py
Johan Lundberg 75bbf157e7 Add Spotify integration, Next button navigation, and UI improvements
- Added Spotify links to all album cards with official green styling
- Implemented Next Album button for sequential navigation (albums 1-499)
- Fixed jump-to-rank to scroll to album card top instead of centering
- Moved share button to same row as album status for better layout
- Improved mobile UI: fixed theme selector overlap, full-width filter controls
- Grouped sort dropdown with reverse button for logical organization
- Updated README with new features and Spotify integration details
- Enhanced vertical alignment of status badges and share buttons
- Refined button sizing and spacing for consistent appearance

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-02 01:18:42 +02:00

193 lines
6.9 KiB
Python

#!/usr/bin/env python3
"""
Extract Spotify URLs for Top 500 Albums
This script searches for each album on Spotify using the Spotify Web API
and creates a mapping file similar to the Wikipedia URLs.
Note: You'll need to set up a Spotify app at https://developer.spotify.com/
and get your client credentials.
"""
import csv
import json
import time
import urllib.parse
import urllib.request
import base64
from typing import Dict, Optional
# Spotify API credentials (you'll need to get these from Spotify Developer Dashboard)
CLIENT_ID = "7b82a80e9292465e8e778f0e5fc6f017"
CLIENT_SECRET = "dacbe619d3c849c48e1b01dffaf89b53"
def get_spotify_access_token() -> Optional[str]:
"""Get access token from Spotify API"""
auth_url = "https://accounts.spotify.com/api/token"
# Encode credentials
credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
encoded_credentials = base64.b64encode(credentials.encode()).decode()
headers = {
'Authorization': f'Basic {encoded_credentials}',
'Content-Type': 'application/x-www-form-urlencoded'
}
data = 'grant_type=client_credentials'
try:
request = urllib.request.Request(auth_url, data=data.encode(), headers=headers)
response = urllib.request.urlopen(request)
result = json.loads(response.read().decode())
return result.get('access_token')
except Exception as e:
print(f"Error getting access token: {e}")
return None
def search_spotify_album(artist: str, album: str, access_token: str) -> Optional[str]:
"""Search for album on Spotify and return the album URL"""
# Clean up artist and album names for search
search_artist = artist.replace("&", "and").strip()
search_album = album.replace("&", "and").strip()
# Remove common prefixes that might confuse search
if search_artist.startswith("The "):
search_artist_alt = search_artist[4:]
else:
search_artist_alt = f"The {search_artist}"
# Try different search strategies
search_queries = [
f'album:"{search_album}" artist:"{search_artist}"',
f'album:"{search_album}" artist:"{search_artist_alt}"',
f'"{search_album}" "{search_artist}"',
f'"{search_album}" "{search_artist_alt}"',
f'{search_album} {search_artist}',
]
for query in search_queries:
try:
encoded_query = urllib.parse.quote(query)
search_url = f"https://api.spotify.com/v1/search?q={encoded_query}&type=album&limit=10"
headers = {
'Authorization': f'Bearer {access_token}'
}
request = urllib.request.Request(search_url, headers=headers)
response = urllib.request.urlopen(request)
data = json.loads(response.read().decode())
albums = data.get('albums', {}).get('items', [])
if albums:
# Look for the best match
for spotify_album in albums:
spotify_name = spotify_album['name'].lower()
spotify_artist = spotify_album['artists'][0]['name'].lower()
# Check for exact or close matches
if (album.lower() in spotify_name or spotify_name in album.lower()) and \
(artist.lower() in spotify_artist or spotify_artist in artist.lower()):
return spotify_album['external_urls']['spotify']
# If no perfect match, return the first result
return albums[0]['external_urls']['spotify']
# Rate limiting
time.sleep(0.1)
except Exception as e:
print(f"Error searching for {artist} - {album}: {e}")
time.sleep(1)
continue
return None
def main():
"""Main function to extract Spotify URLs"""
print("Spotify URL Extractor for Top 500 Albums")
print("=" * 50)
# Check if credentials are set
if CLIENT_ID == "your_client_id_here" or CLIENT_SECRET == "your_client_secret_here":
print("ERROR: Please set your Spotify API credentials in the script!")
print("1. Go to https://developer.spotify.com/")
print("2. Create an app and get your Client ID and Client Secret")
print("3. Replace CLIENT_ID and CLIENT_SECRET in this script")
return
# Get access token
print("Getting Spotify access token...")
access_token = get_spotify_access_token()
if not access_token:
print("Failed to get access token. Check your credentials.")
return
print("Access token obtained successfully!")
# Read the albums data
albums = []
try:
with open('top_500_albums_2023.csv', 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
albums = list(reader)
except FileNotFoundError:
print("Error: top_500_albums_2023.csv not found!")
return
print(f"Found {len(albums)} albums to process")
spotify_mappings = {}
failed_albums = []
for i, album in enumerate(albums, 1):
artist = album['Artist']
album_name = album['Album']
print(f"[{i}/{len(albums)}] Searching: {artist} - {album_name}")
spotify_url = search_spotify_album(artist, album_name, access_token)
if spotify_url:
spotify_mappings[album_name] = spotify_url
print(f" ✓ Found: {spotify_url}")
else:
failed_albums.append((artist, album_name))
print(f" ✗ Not found")
# Rate limiting to be respectful to Spotify API
time.sleep(0.2)
# Save progress every 50 albums
if i % 50 == 0:
with open('spotify_urls_mapping_progress.json', 'w', encoding='utf-8') as f:
json.dump(spotify_mappings, f, indent=2, ensure_ascii=False)
print(f"Progress saved. Found {len(spotify_mappings)} URLs so far.")
# Save the final mappings
with open('spotify_urls_mapping.json', 'w', encoding='utf-8') as f:
json.dump(spotify_mappings, f, indent=2, ensure_ascii=False)
# Save failed albums for manual review
if failed_albums:
with open('failed_spotify_searches.txt', 'w', encoding='utf-8') as f:
f.write("Albums not found on Spotify:\n")
f.write("=" * 40 + "\n")
for artist, album in failed_albums:
f.write(f"{artist} - {album}\n")
print(f"\nCompleted!")
print(f"Successfully found Spotify URLs for {len(spotify_mappings)} albums")
print(f"Failed to find {len(failed_albums)} albums")
print(f"Success rate: {len(spotify_mappings)/len(albums)*100:.1f}%")
print(f"\nResults saved to:")
print(f" - spotify_urls_mapping.json")
if failed_albums:
print(f" - failed_spotify_searches.txt")
if __name__ == "__main__":
main()