Playwright is a powerful framework for web testing and automation. This article demonstrates how to extract the HTML content of child IFRAMEs within a webpage using Python and Playwright.
Installation
To use Playwright in your Python project, you'll need to install it using pip:
pip install playwright playwright install
Python Code
Here's the Python code:
import asyncio
from playwright.async_api import async_playwright, Frame
async def main():
"""
This function retrieves the HTML content of each child iframe on a webpage.
"""
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
url = "https://interactive-examples.mdn.mozilla.net/pages/tabbed/iframe.html"
await page.goto(url)
# Wait for page to load and iframes to be rendered
await page.wait_for_load_state("networkidle")
for frame in page.frames:
if not frame.url.startswith("http"):
# May start with "about:"
print(f'{frame.url}: Not a valid URL')
continue
if frame.url == url:
print(f'{frame.url}: Skipping parent')
continue
print(f'{frame.url}: content below')
print('*' * 80)
content = await frame.content()
print(content)
print('*' * 80)
await browser.close()
asyncio.run(main())