PageSpeed Insights API with Python

Fetch Lighthouse performance data with Python. Working code examples for the PageSpeed Insights API with type hints.
Harlan WiltonHarlan Wilton4 min read Published

Fetch Lighthouse performance data programmatically with Python.

Basic Request

import os
import requests

API_KEY = os.environ['PSI_API_KEY']
url = 'https://example.com'

response = requests.get(
    'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
    params={
        'url': url,
        'key': API_KEY
    }
)
data = response.json()

score = data['lighthouseResult']['categories']['performance']['score'] * 100
print(f'Performance: {score}')

The score is 0-1, so multiply by 100 for the familiar 0-100 scale.

Extract Core Web Vitals

def get_core_web_vitals(data: dict) -> dict:
    audits = data['lighthouseResult']['audits']
    return {
        'lcp': audits['largest-contentful-paint']['numericValue'],
        'cls': audits['cumulative-layout-shift']['numericValue'],
        'tbt': audits['total-blocking-time']['numericValue'],
    }

vitals = get_core_web_vitals(data)
print(f"LCP: {vitals['lcp']}ms, CLS: {vitals['cls']}, TBT: {vitals['tbt']}ms")

Note: The API returns TBT (Total Blocking Time) as a proxy for INP since INP requires real user interaction data.

Get All Category Scores

Request multiple categories in one call:

import requests

def fetch_all_categories(url: str, api_key: str) -> dict:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={
            'url': url,
            'key': api_key,
            'category': ['performance', 'accessibility', 'seo', 'best-practices']
        }
    )
    data = response.json()

    categories = data['lighthouseResult']['categories']
    return {
        key: round(value['score'] * 100)
        for key, value in categories.items()
    }

scores = fetch_all_categories('https://example.com', API_KEY)
print(scores)
# {'performance': 87, 'accessibility': 92, 'seo': 100, 'best-practices': 95}

With Type Hints

from typing import TypedDict, Optional
import requests

class CategoryResult(TypedDict):
    score: float
    title: str

class AuditResult(TypedDict):
    score: Optional[float]
    numericValue: Optional[float]
    displayValue: Optional[str]

class MetricValue(TypedDict):
    percentile: int
    category: str  # 'FAST', 'AVERAGE', 'SLOW'

class LoadingExperience(TypedDict, total=False):
    metrics: dict[str, MetricValue]

class LighthouseResult(TypedDict):
    categories: dict[str, CategoryResult]
    audits: dict[str, AuditResult]

class PSIResponse(TypedDict):
    lighthouseResult: LighthouseResult
    loadingExperience: Optional[LoadingExperience]

def fetch_psi(url: str, api_key: str) -> PSIResponse:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={'url': url, 'key': api_key}
    )
    response.raise_for_status()
    return response.json()

Get Field Data (Real User Metrics)

The loadingExperience object contains real Chrome user data when available:

def get_field_data(data: dict) -> Optional[dict]:
    field = data.get('loadingExperience', {}).get('metrics')
    if not field:
        return None

    return {
        'lcp': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('percentile'),
        'cls': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('percentile'),
        'inp': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('percentile'),
        'lcp_category': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('category'),
        'cls_category': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('category'),
        'inp_category': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('category'),
    }

Field data is only available for URLs with enough Chrome traffic. Low-traffic pages return None.

Error Handling

import requests
from requests.exceptions import RequestException

def fetch_psi(url: str, api_key: str) -> Optional[dict]:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={'url': url, 'key': api_key}
    )

    if response.status_code == 429:
        retry_after = response.headers.get('Retry-After', 60)
        print(f'Rate limited. Retry after {retry_after}s')
        return None

    if response.status_code == 400:
        error = response.json()
        print(f"Invalid request: {error['error']['message']}")
        return None

    if not response.ok:
        print(f'PSI API error: {response.status_code}')
        return None

    return response.json()

Async with aiohttp

For better performance when testing multiple URLs:

import asyncio
import aiohttp
import os

API_KEY = os.environ['PSI_API_KEY']

async def fetch_psi_async(session: aiohttp.ClientSession, url: str) -> Optional[dict]:
    params = {
        'url': url,
        'key': API_KEY,
        'strategy': 'mobile'
    }

    async with session.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params=params
    ) as response:
        if response.status == 429:
            print(f'Rate limited for {url}')
            return None
        if not response.ok:
            print(f'Error for {url}: {response.status}')
            return None
        return await response.json()

async def analyze_urls(urls: list[str]) -> list[dict]:
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_psi_async(session, url) for url in urls]
        return await asyncio.gather(*tasks)

urls = ['https://example.com', 'https://example.com/about']
results = asyncio.run(analyze_urls(urls))

Complete Example

import os
import requests
from typing import Optional

API_KEY = os.environ['PSI_API_KEY']

def analyze_url(url: str) -> Optional[dict]:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={
            'url': url,
            'key': API_KEY,
            'strategy': 'mobile',
            'category': ['performance', 'accessibility']
        }
    )

    if not response.ok:
        print(f'Failed to analyze {url}: {response.status_code}')
        return None

    data = response.json()
    lighthouse = data['lighthouseResult']
    audits = lighthouse['audits']

    return {
        'url': url,
        'performance': round(lighthouse['categories']['performance']['score'] * 100),
        'accessibility': round(lighthouse['categories']['accessibility']['score'] * 100),
        'lcp': audits['largest-contentful-paint']['numericValue'],
        'cls': audits['cumulative-layout-shift']['numericValue'],
        'tbt': audits['total-blocking-time']['numericValue'],
    }

if __name__ == '__main__':
    result = analyze_url('https://example.com')
    print(result)

Next Steps

Skip the Boilerplate

Building API clients, handling rate limits, and parsing responses is time you're not spending on actual performance improvements.

Unlighthouse handles the complexity and crawls your entire site:

npx unlighthouse --site https://your-site.com
Try Unlighthouse