i share code for Time form Greyhound result downloader.
meeting time, position, greyhound_name, trap ,bsp , official_sp, forecast, tricast, traine,r winning_time, winning_grade, runner_profile_url,
i have issue to fix official_sp sorted as its coming as date/month. i don't need it, so didn't spend long time. as most of us need BSP so that's correct. i do have runner profile link. where you can download data for runner i only share those code with people who has x number of post in forum.
import csv
import requests
from bs4 import BeautifulSoup
from datetime import date, timedelta
def parse_html_and_get_data(html_content):
soup = BeautifulSoup(html_content, 'lxml')
meetings = soup.find_all('div', class_='waf-meeting')
scraped_data = []
for meeting in meetings:
meeting_name_tag = meeting.find('h3', class_='waf-meeting-header')
if not meeting_name_tag:
continue
meeting_name = meeting_name_tag.get_text(strip=True).replace('Results', '').replace('Top↑', '').strip()
races = meeting.find_all('div', class_='waf-result')
for race in races:
race_time_tag = race.find('a', class_='waf-header')
if not race_time_tag:
continue
race_time = race_time_tag.get_text(strip=True)
forecast = None
tricast = None
forecast_span = race.find('span', title=lambda x: x and 'first two greyhounds' in x)
if forecast_span:
forecast = forecast_span.find('b').get_text(strip=True).replace('£', '')
tricast_span = race.find('span', title=lambda x: x and 'first three greyhounds' in x)
if tricast_span:
tricast = tricast_span.find('b').get_text(strip=True).replace('£', '')
trainer = None
trainer_span = race.find('span', title=lambda x: x and 'winning trainer' in x)
if trainer_span:
trainer = trainer_span.get_text(strip=True).strip().rstrip(',')
winning_time = None
winning_grade = None
winning_time_span = race.find('span', class_='waf-time')
if winning_time_span:
time_text = winning_time_span.find('b').get_text(strip=True) if winning_time_span.find('b') else ''
grade_text = winning_time_span.get_text()
if '(' in grade_text:
winning_time = time_text
winning_grade = grade_text.split('(')[1].replace(')', '').strip()
else:
winning_time = time_text
table = race.find('table')
if table:
for row in table.find_all('tr'):
cols = row.find_all('td')
position = cols[0].get_text(strip=True)
name_link = cols[1].find('a')
name = name_link.get_text(strip=True)
trap = cols[1].find('img')['alt']
bsp_mob = cols[2].get_text(strip=True) if len(cols) > 2 else None
official_sp = cols[3].get_text(strip=True) if len(cols) > 3 else None
# Extract runner profile URL
runner_profile_url = None
if name_link and name_link.has_attr('href'):
href = name_link['href']
if href.startswith('http'):
runner_profile_url = href
else:
runner_profile_url = f"https://www.timeform.com{href}"
scraped_data.append({
'meeting': meeting_name,
'time': race_time,
'position': position,
'greyhound_name': name,
'trap': trap,
'bsp': bsp_mob,
'official_sp': official_sp,
'forecast': forecast,
'tricast': tricast,
'trainer': trainer,
'winning_time': winning_time,
'winning_grade': winning_grade,
'runner_profile_url': runner_profile_url
})
return scraped_data
def main():
start_date = date(2021, 1, 1)
end_date = date(2025, 1, 31) change date here.
all_results = []
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime('%Y-%m-%d')
url = f'
https://www.timeform.com/greyhound-raci ... /{date_str}'
print(f"Scraping data for {date_str}...")
try:
response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
response.raise_for_status()
html_content = response.text
daily_data = parse_html_and_get_data(html_content)
all_results.extend(daily_data)
print(f"Found {len(daily_data)} results.")
except requests.exceptions.RequestException as e:
print(f"Error fetching URL {url}: {e}")
current_date += timedelta(days=1)
if not all_results:
print("No data was scraped. Exiting.")
return
with open('results.csv', 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = [
'meeting', 'time', 'position', 'greyhound_name', 'trap',
'bsp', 'official_sp', 'forecast', 'tricast', 'trainer',
'winning_time', 'winning_grade', 'runner_profile_url'
]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_results)
print(f"\nSuccessfully scraped a total of {len(all_results)} results.")
print(f"Data saved to results.csv") <<<<<<<<< file name change. you can if you dont know how to change then ask chat gpt.
if __name__ == "__main__":
main()
only issue is sp price as i dont need so cant be bother to sorted. you can see the data at bottom remeber it only 1,2,3 finish not all race card.
meeting time position greyhound_name trap bsp official_sp forecast tricast trainer winning_time winning_grade runner_profile_url
Crayford 13:17 1st GOBBY DOBBY 3 4.7 03-Jan 9.26 18.88 T Batchelor 23.76 A5
https://www.timeform.com/greyhound-raci ... obby/69090
Crayford 13:17 2nd CAMP BEST 1 2.85 11/8f 9.26 18.88 T Batchelor 23.76 A5
https://www.timeform.com/greyhound-raci ... best/70330
Crayford 13:17 3rd JOSIES GOLD 4 5.3 03-Jan 9.26 18.88 T Batchelor 23.76 A5
https://www.timeform.com/greyhound-raci ... gold/67050
Crayford 13:33 1st SLOWLANE QUEEN 3 4.3 05-Feb 8.61 18.17 T M Levers 23.7 A9
https://www.timeform.com/greyhound-raci ... ueen/65490
Crayford 13:33 2nd COOLSIDE OPHELIA 1 3.25 7/4f 8.61 18.17 T M Levers 23.7 A9
https://www.timeform.com/greyhound-raci ... elia/57505
Crayford 13:33 3rd DECE ZARKOV 2 4.82 03-Jan 8.61 18.17 T M Levers 23.7 A9
https://www.timeform.com/greyhound-raci ... rkov/67608
Enjoy