Fetch Lighthouse performance data programmatically with Python.
import os
import requests
API_KEY = os.environ['PSI_API_KEY']
url = 'https://example.com'
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={
'url': url,
'key': API_KEY
}
)
data = response.json()
score = data['lighthouseResult']['categories']['performance']['score'] * 100
print(f'Performance: {score}')
The score is 0-1, so multiply by 100 for the familiar 0-100 scale.
def get_core_web_vitals(data: dict) -> dict:
audits = data['lighthouseResult']['audits']
return {
'lcp': audits['largest-contentful-paint']['numericValue'],
'cls': audits['cumulative-layout-shift']['numericValue'],
'tbt': audits['total-blocking-time']['numericValue'],
}
vitals = get_core_web_vitals(data)
print(f"LCP: {vitals['lcp']}ms, CLS: {vitals['cls']}, TBT: {vitals['tbt']}ms")
Note: The API returns TBT (Total Blocking Time) as a proxy for INP since INP requires real user interaction data.
Request multiple categories in one call:
import requests
def fetch_all_categories(url: str, api_key: str) -> dict:
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={
'url': url,
'key': api_key,
'category': ['performance', 'accessibility', 'seo', 'best-practices']
}
)
data = response.json()
categories = data['lighthouseResult']['categories']
return {
key: round(value['score'] * 100)
for key, value in categories.items()
}
scores = fetch_all_categories('https://example.com', API_KEY)
print(scores)
# {'performance': 87, 'accessibility': 92, 'seo': 100, 'best-practices': 95}
from typing import TypedDict, Optional
import requests
class CategoryResult(TypedDict):
score: float
title: str
class AuditResult(TypedDict):
score: Optional[float]
numericValue: Optional[float]
displayValue: Optional[str]
class MetricValue(TypedDict):
percentile: int
category: str # 'FAST', 'AVERAGE', 'SLOW'
class LoadingExperience(TypedDict, total=False):
metrics: dict[str, MetricValue]
class LighthouseResult(TypedDict):
categories: dict[str, CategoryResult]
audits: dict[str, AuditResult]
class PSIResponse(TypedDict):
lighthouseResult: LighthouseResult
loadingExperience: Optional[LoadingExperience]
def fetch_psi(url: str, api_key: str) -> PSIResponse:
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={'url': url, 'key': api_key}
)
response.raise_for_status()
return response.json()
The loadingExperience object contains real Chrome user data when available:
def get_field_data(data: dict) -> Optional[dict]:
field = data.get('loadingExperience', {}).get('metrics')
if not field:
return None
return {
'lcp': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('percentile'),
'cls': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('percentile'),
'inp': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('percentile'),
'lcp_category': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('category'),
'cls_category': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('category'),
'inp_category': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('category'),
}
Field data is only available for URLs with enough Chrome traffic. Low-traffic pages return None.
import requests
from requests.exceptions import RequestException
def fetch_psi(url: str, api_key: str) -> Optional[dict]:
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={'url': url, 'key': api_key}
)
if response.status_code == 429:
retry_after = response.headers.get('Retry-After', 60)
print(f'Rate limited. Retry after {retry_after}s')
return None
if response.status_code == 400:
error = response.json()
print(f"Invalid request: {error['error']['message']}")
return None
if not response.ok:
print(f'PSI API error: {response.status_code}')
return None
return response.json()
For better performance when testing multiple URLs:
import asyncio
import aiohttp
import os
API_KEY = os.environ['PSI_API_KEY']
async def fetch_psi_async(session: aiohttp.ClientSession, url: str) -> Optional[dict]:
params = {
'url': url,
'key': API_KEY,
'strategy': 'mobile'
}
async with session.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params=params
) as response:
if response.status == 429:
print(f'Rate limited for {url}')
return None
if not response.ok:
print(f'Error for {url}: {response.status}')
return None
return await response.json()
async def analyze_urls(urls: list[str]) -> list[dict]:
async with aiohttp.ClientSession() as session:
tasks = [fetch_psi_async(session, url) for url in urls]
return await asyncio.gather(*tasks)
urls = ['https://example.com', 'https://example.com/about']
results = asyncio.run(analyze_urls(urls))
import os
import requests
from typing import Optional
API_KEY = os.environ['PSI_API_KEY']
def analyze_url(url: str) -> Optional[dict]:
response = requests.get(
'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
params={
'url': url,
'key': API_KEY,
'strategy': 'mobile',
'category': ['performance', 'accessibility']
}
)
if not response.ok:
print(f'Failed to analyze {url}: {response.status_code}')
return None
data = response.json()
lighthouse = data['lighthouseResult']
audits = lighthouse['audits']
return {
'url': url,
'performance': round(lighthouse['categories']['performance']['score'] * 100),
'accessibility': round(lighthouse['categories']['accessibility']['score'] * 100),
'lcp': audits['largest-contentful-paint']['numericValue'],
'cls': audits['cumulative-layout-shift']['numericValue'],
'tbt': audits['total-blocking-time']['numericValue'],
}
if __name__ == '__main__':
result = analyze_url('https://example.com')
print(result)
Building API clients, handling rate limits, and parsing responses is time you're not spending on actual performance improvements.
Unlighthouse handles the complexity and crawls your entire site:
npx unlighthouse --site https://your-site.com