import requests from bs4 import BeautifulSoup import csv ROSTER_URL = "https://www.49ers.com/team/players-roster/" def scrape_49ers_roster(output_csv='niners_players_headshots.csv'): """ Scrapes the 49ers roster page for player data and saves to CSV. Extracts: - Name - Headshot Image URL """ response = requests.get(ROSTER_URL) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') player_rows = soup.select('div.d3-o-table--horizontal-scroll tbody tr') if not player_rows: raise ValueError("No player rows found. The page structure may have changed.") roster_data = [] for row in player_rows: try: # Extract player name and headshot player_cell = row.find('td') name_tag = player_cell.select_one('.nfl-o-roster__player-name') name = name_tag.get_text(strip=True) if name_tag else "" img_tag = player_cell.find('img') headshot_url = img_tag['src'] if img_tag and img_tag.get('src') else "" # Fix the URL by replacing t_lazy with t_thumb_squared_2x if headshot_url: headshot_url = headshot_url.replace('/t_thumb_squared/t_lazy/', '/t_thumb_squared_2x/') # Other stats (in order of table columns) # cells = row.find_all('td') # jersey_number = cells[1].get_text(strip=True) if len(cells) > 1 else "" # position = cells[2].get_text(strip=True) if len(cells) > 2 else "" # height = cells[3].get_text(strip=True) if len(cells) > 3 else "" # weight = cells[4].get_text(strip=True) if len(cells) > 4 else "" # age = cells[5].get_text(strip=True) if len(cells) > 5 else "" # experience = cells[6].get_text(strip=True) if len(cells) > 6 else "" # college = cells[7].get_text(strip=True) if len(cells) > 7 else "" roster_data.append({ 'name': name, # 'jersey_number': jersey_number, # 'position': position, # 'height': height, # 'weight': weight, # 'age': age, # 'experience': experience, # 'college': college, 'headshot_url': headshot_url }) except Exception as e: print(f"[WARNING] Skipping row due to error: {e}") continue # Save to CSV with open(output_csv, 'w', newline='', encoding='utf-8') as f: fieldnames = ['name', 'headshot_url'] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(roster_data) print(f"[INFO] Successfully saved {len(roster_data)} players to '{output_csv}'.") if __name__ == "__main__": scrape_49ers_roster()