Asynchronous Web Scraping with asyncio, aiohttp, and BeautifulSoup

vluzzy · 01-02-2024, 02:55 AM

import aiohttp
import asyncio
from bs4 import BeautifulSoup

async def fetch(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
return await response.text()

async def main():
urls = ['https://example1.com', 'https://example2.com', 'https://example3.com']
tasks = [fetch(url) for url in urls]
html_pages = await asyncio.gather(*tasks)

for i, html in enumerate(html_pages, start=1):
soup = BeautifulSoup(html, 'html.parser')
title = soup.title.string
print(f"Title of page {i}: {title}")

if __name__ == '__main__':
asyncio.run(main())