PageSpeed Insights API with Python

Fetch Lighthouse performance data with Python. Working code examples for the PageSpeed Insights API with type hints.
Harlan WiltonHarlan Wilton4 min read Published

Fetch Lighthouse performance data programmatically with Python.

HTTP Client Choice

For PSI API requests:

LibraryBest For
requestsSimple scripts, familiar API
httpxSync/async flexibility, HTTP/2 support
aiohttpHigh concurrency (100+ URLs), best connection pooling

For bulk testing, aiohttp outperforms httpx at high concurrency.

Basic Request

import os
import requests

API_KEY = os.environ['PSI_API_KEY']
url = 'https://example.com'

response = requests.get(
    'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
    params={
        'url': url,
        'key': API_KEY
    },
    timeout=60  # PSI requests take 10-60s (https://github.com/GoogleChrome/lighthouse/issues/14072)
)
data = response.json()

score = data['lighthouseResult']['categories']['performance']['score'] * 100
print(f'Performance: {score}')

The score is 0-1, so multiply by 100 for the familiar 0-100 scale.

Extract Core Web Vitals

def get_core_web_vitals(data: dict) -> dict:
    audits = data['lighthouseResult']['audits']
    return {
        'lcp': audits['largest-contentful-paint']['numericValue'],
        'cls': audits['cumulative-layout-shift']['numericValue'],
        'tbt': audits['total-blocking-time']['numericValue'],
    }

vitals = get_core_web_vitals(data)
print(f"LCP: {vitals['lcp']}ms, CLS: {vitals['cls']}, TBT: {vitals['tbt']}ms")

Note: The API returns TBT (Total Blocking Time) as a proxy for INP since INP requires real user interaction data.

Get All Category Scores

Request multiple categories in one call:

import requests

def fetch_all_categories(url: str, api_key: str) -> dict:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={
            'url': url,
            'key': api_key,
            'category': ['performance', 'accessibility', 'seo', 'best-practices']
        }
    )
    data = response.json()

    categories = data['lighthouseResult']['categories']
    return {
        key: round(value['score'] * 100)
        for key, value in categories.items()
    }

scores = fetch_all_categories('https://example.com', API_KEY)
print(scores)
# {'performance': 87, 'accessibility': 92, 'seo': 100, 'best-practices': 95}

With Type Hints

from typing import TypedDict, Optional
import requests

class CategoryResult(TypedDict):
    score: float
    title: str

class AuditResult(TypedDict):
    score: Optional[float]
    numericValue: Optional[float]
    displayValue: Optional[str]

class MetricValue(TypedDict):
    percentile: int
    category: str  # 'FAST', 'AVERAGE', 'SLOW'

class LoadingExperience(TypedDict, total=False):
    metrics: dict[str, MetricValue]

class LighthouseResult(TypedDict):
    categories: dict[str, CategoryResult]
    audits: dict[str, AuditResult]

class PSIResponse(TypedDict):
    lighthouseResult: LighthouseResult
    loadingExperience: Optional[LoadingExperience]

def fetch_psi(url: str, api_key: str) -> PSIResponse:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={'url': url, 'key': api_key}
    )
    response.raise_for_status()
    return response.json()

Get Field Data (Real User Metrics)

The loadingExperience object contains real Chrome user data when available:

def get_field_data(data: dict) -> Optional[dict]:
    field = data.get('loadingExperience', {}).get('metrics')
    if not field:
        return None

    return {
        'lcp': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('percentile'),
        'cls': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('percentile'),
        'inp': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('percentile'),
        'lcp_category': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('category'),
        'cls_category': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('category'),
        'inp_category': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('category'),
    }

Field data is only available for URLs with enough Chrome traffic. Low-traffic pages return None.

Error Handling

The API returns 500 errors for undocumented rate limits, not just 429. Treat persistent 500s as throttling:

import requests

def fetch_psi(url: str, api_key: str) -> Optional[dict]:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={'url': url, 'key': api_key},
        timeout=60
    )

    if response.status_code == 429:
        retry_after = response.headers.get('Retry-After', 60)
        print(f'Rate limited. Retry after {retry_after}s')
        return None

    if response.status_code == 500:
        # Undocumented rate limit - wait 60-180s before retrying
        print('Possible rate limit (500). Wait before retrying.')
        return None

    if response.status_code == 400:
        error = response.json()
        print(f"Invalid request: {error['error']['message']}")
        return None

    if not response.ok:
        print(f'PSI API error: {response.status_code}')
        return None

    return response.json()

Async with aiohttp

For better performance when testing multiple URLs. Use aiolimiter for rate limiting:

pip install aiohttp aiolimiter
import asyncio
import aiohttp
import os
from aiolimiter import AsyncLimiter

API_KEY = os.environ['PSI_API_KEY']

# 240 requests per minute (stay under limit)
limiter = AsyncLimiter(240, 60)

async def fetch_psi_async(session: aiohttp.ClientSession, url: str) -> Optional[dict]:
    async with limiter:
        params = {
            'url': url,
            'key': API_KEY,
            'strategy': 'mobile'
        }

        async with session.get(
            'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
            params=params
        ) as response:
            if response.status == 429:
                print(f'Rate limited for {url}')
                return None
            if response.status == 500:
                print(f'Possible rate limit for {url}')
                return None
            if not response.ok:
                print(f'Error for {url}: {response.status}')
                return None
            return await response.json()

async def analyze_urls(urls: list[str]) -> list[dict]:
    timeout = aiohttp.ClientTimeout(total=60)
    async with aiohttp.ClientSession(timeout=timeout) as session:
        tasks = [fetch_psi_async(session, url) for url in urls]
        return await asyncio.gather(*tasks)

urls = ['https://example.com', 'https://example.com/about']
results = asyncio.run(analyze_urls(urls))

aiohttp.ClientSession automatically handles connection pooling for better performance.

Complete Example

import os
import requests
from typing import Optional

API_KEY = os.environ['PSI_API_KEY']

def analyze_url(url: str) -> Optional[dict]:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={
            'url': url,
            'key': API_KEY,
            'strategy': 'mobile',
            'category': ['performance', 'accessibility']
        }
    )

    if not response.ok:
        print(f'Failed to analyze {url}: {response.status_code}')
        return None

    data = response.json()
    lighthouse = data['lighthouseResult']
    audits = lighthouse['audits']

    return {
        'url': url,
        'performance': round(lighthouse['categories']['performance']['score'] * 100),
        'accessibility': round(lighthouse['categories']['accessibility']['score'] * 100),
        'lcp': audits['largest-contentful-paint']['numericValue'],
        'cls': audits['cumulative-layout-shift']['numericValue'],
        'tbt': audits['total-blocking-time']['numericValue'],
    }

if __name__ == '__main__':
    result = analyze_url('https://example.com')
    print(result)

Next Steps

Skip the Boilerplate

Building API clients, handling rate limits, and parsing responses is time you're not spending on actual performance improvements.

Unlighthouse handles the complexity and crawls your entire site:

npx unlighthouse --site https://your-site.com
Try Unlighthouse