PageSpeed Insights API with Python
Fetch Lighthouse performance data programmatically with Python.
HTTP Client Choice
For PSI API requests:
| Library | Best For |
|---|---|
requests | Simple scripts, familiar API |
httpx | Sync/async flexibility, HTTP/2 support |
aiohttp | High concurrency (100+ URLs), best connection pooling |
For bulk testing, aiohttp outperforms httpx at high concurrency.
Basic Request
import os
import requests
API_KEY = os.environ['PSI_API_KEY']
url = 'https://example.com'
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={
'url': url,
'key': API_KEY
},
timeout=60 # PSI requests take 10-60s (https://github.com/GoogleChrome/lighthouse/issues/14072)
)
data = response.json()
score = data['lighthouseResult']['categories']['performance']['score'] * 100
print(f'Performance: {score}')
The score is 0-1, so multiply by 100 for the familiar 0-100 scale.
Extract Core Web Vitals
def get_core_web_vitals(data: dict) -> dict:
audits = data['lighthouseResult']['audits']
return {
'lcp': audits['largest-contentful-paint']['numericValue'],
'cls': audits['cumulative-layout-shift']['numericValue'],
'tbt': audits['total-blocking-time']['numericValue'],
}
vitals = get_core_web_vitals(data)
print(f"LCP: {vitals['lcp']}ms, CLS: {vitals['cls']}, TBT: {vitals['tbt']}ms")
Note: The API returns TBT (Total Blocking Time) as a proxy for INP since INP requires real user interaction data.
Get All Category Scores
Request multiple categories in one call:
import requests
def fetch_all_categories(url: str, api_key: str) -> dict:
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={
'url': url,
'key': api_key,
'category': ['performance', 'accessibility', 'seo', 'best-practices']
}
)
data = response.json()
categories = data['lighthouseResult']['categories']
return {
key: round(value['score'] * 100)
for key, value in categories.items()
}
scores = fetch_all_categories('https://example.com', API_KEY)
print(scores)
# {'performance': 87, 'accessibility': 92, 'seo': 100, 'best-practices': 95}
With Type Hints
from typing import TypedDict, Optional
import requests
class CategoryResult(TypedDict):
score: float
title: str
class AuditResult(TypedDict):
score: Optional[float]
numericValue: Optional[float]
displayValue: Optional[str]
class MetricValue(TypedDict):
percentile: int
category: str # 'FAST', 'AVERAGE', 'SLOW'
class LoadingExperience(TypedDict, total=False):
metrics: dict[str, MetricValue]
class LighthouseResult(TypedDict):
categories: dict[str, CategoryResult]
audits: dict[str, AuditResult]
class PSIResponse(TypedDict):
lighthouseResult: LighthouseResult
loadingExperience: Optional[LoadingExperience]
def fetch_psi(url: str, api_key: str) -> PSIResponse:
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={'url': url, 'key': api_key}
)
response.raise_for_status()
return response.json()
Get Field Data (Real User Metrics)
The loadingExperience object contains real Chrome user data when available:
def get_field_data(data: dict) -> Optional[dict]:
field = data.get('loadingExperience', {}).get('metrics')
if not field:
return None
return {
'lcp': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('percentile'),
'cls': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('percentile'),
'inp': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('percentile'),
'lcp_category': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('category'),
'cls_category': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('category'),
'inp_category': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('category'),
}
Field data is only available for URLs with enough Chrome traffic. Low-traffic pages return None.
Error Handling
The API returns 500 errors for undocumented rate limits, not just 429. Treat persistent 500s as throttling:
import requests
def fetch_psi(url: str, api_key: str) -> Optional[dict]:
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={'url': url, 'key': api_key},
timeout=60
)
if response.status_code == 429:
retry_after = response.headers.get('Retry-After', 60)
print(f'Rate limited. Retry after {retry_after}s')
return None
if response.status_code == 500:
# Undocumented rate limit - wait 60-180s before retrying
print('Possible rate limit (500). Wait before retrying.')
return None
if response.status_code == 400:
error = response.json()
print(f"Invalid request: {error['error']['message']}")
return None
if not response.ok:
print(f'PSI API error: {response.status_code}')
return None
return response.json()
Async with aiohttp
For better performance when testing multiple URLs. Use aiolimiter for rate limiting:
pip install aiohttp aiolimiter
import asyncio
import aiohttp
import os
from aiolimiter import AsyncLimiter
API_KEY = os.environ['PSI_API_KEY']
# 240 requests per minute (stay under limit)
limiter = AsyncLimiter(240, 60)
async def fetch_psi_async(session: aiohttp.ClientSession, url: str) -> Optional[dict]:
async with limiter:
params = {
'url': url,
'key': API_KEY,
'strategy': 'mobile'
}
async with session.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params=params
) as response:
if response.status == 429:
print(f'Rate limited for {url}')
return None
if response.status == 500:
print(f'Possible rate limit for {url}')
return None
if not response.ok:
print(f'Error for {url}: {response.status}')
return None
return await response.json()
async def analyze_urls(urls: list[str]) -> list[dict]:
timeout = aiohttp.ClientTimeout(total=60)
async with aiohttp.ClientSession(timeout=timeout) as session:
tasks = [fetch_psi_async(session, url) for url in urls]
return await asyncio.gather(*tasks)
urls = ['https://example.com', 'https://example.com/about']
results = asyncio.run(analyze_urls(urls))
aiohttp.ClientSession automatically handles connection pooling for better performance.
Complete Example
import os
import requests
from typing import Optional
API_KEY = os.environ['PSI_API_KEY']
def analyze_url(url: str) -> Optional[dict]:
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={
'url': url,
'key': API_KEY,
'strategy': 'mobile',
'category': ['performance', 'accessibility']
}
)
if not response.ok:
print(f'Failed to analyze {url}: {response.status_code}')
return None
data = response.json()
lighthouse = data['lighthouseResult']
audits = lighthouse['audits']
return {
'url': url,
'performance': round(lighthouse['categories']['performance']['score'] * 100),
'accessibility': round(lighthouse['categories']['accessibility']['score'] * 100),
'lcp': audits['largest-contentful-paint']['numericValue'],
'cls': audits['cumulative-layout-shift']['numericValue'],
'tbt': audits['total-blocking-time']['numericValue'],
}
if __name__ == '__main__':
result = analyze_url('https://example.com')
print(result)
Next Steps
Skip the Boilerplate
Building API clients, handling rate limits, and parsing responses is time you're not spending on actual performance improvements.
Unlighthouse handles the complexity and crawls your entire site:
npx unlighthouse --site https://your-site.com