---
title: "PageSpeed Insights API Python Example (googleapis.com/pagespeedonline)"
description: "Fetch Lighthouse data with Python using the googleapis.com/pagespeedonline/v5/runPagespeed endpoint. Working code examples with type hints."
canonical_url: "https://unlighthouse.dev/learn-lighthouse/pagespeed-insights-api/python-example"
last_updated: "2025-01-18"
---

Fetch Lighthouse performance data programmatically with Python.

## HTTP Client Choice

For PSI API requests:

<table>
<thead>
  <tr>
    <th>
      Library
    </th>
    
    <th>
      Best For
    </th>
  </tr>
</thead>

<tbody>
  <tr>
    <td>
      <code>
        requests
      </code>
    </td>
    
    <td>
      Simple scripts, familiar API
    </td>
  </tr>
  
  <tr>
    <td>
      <code>
        httpx
      </code>
    </td>
    
    <td>
      Sync/async flexibility, HTTP/2 support
    </td>
  </tr>
  
  <tr>
    <td>
      <code>
        aiohttp
      </code>
    </td>
    
    <td>
      High concurrency (100+ URLs), best connection pooling
    </td>
  </tr>
</tbody>
</table>

For bulk testing, [aiohttp is often preferred for high-concurrency workloads](https://docs.aiohttp.org/en/stable/client_quickstart.html) at high concurrency.

## Basic Request

```python
import os
import requests

API_KEY = os.environ['PSI_API_KEY']
url = 'https://example.com'

response = requests.get(
    'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
    params={
        'url': url,
        'key': API_KEY
    },
    timeout=60  # PSI requests take 10-60s (https://github.com/GoogleChrome/lighthouse/issues/14072)
)
data = response.json()

score = data['lighthouseResult']['categories']['performance']['score'] * 100
print(f'Performance: {score}')
```

The score is 0-1, so multiply by 100 for the familiar 0-100 scale.

## Extract Core Web Vitals

```python
def get_core_web_vitals(data: dict) -> dict:
    audits = data['lighthouseResult']['audits']
    return {
        'lcp': audits['largest-contentful-paint']['numericValue'],
        'cls': audits['cumulative-layout-shift']['numericValue'],
        'tbt': audits['total-blocking-time']['numericValue'],
    }

vitals = get_core_web_vitals(data)
print(f"LCP: {vitals['lcp']}ms, CLS: {vitals['cls']}, TBT: {vitals['tbt']}ms")
```

Note: The API returns TBT (Total Blocking Time) as a proxy for INP since INP requires real user interaction data.

## Get All Category Scores

Request multiple categories in one call:

```python
import requests

def fetch_all_categories(url: str, api_key: str) -> dict:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={
            'url': url,
            'key': api_key,
            'category': ['performance', 'accessibility', 'seo', 'best-practices']
        }
    )
    data = response.json()

    categories = data['lighthouseResult']['categories']
    return {
        key: round(value['score'] * 100)
        for key, value in categories.items()
    }

scores = fetch_all_categories('https://example.com', API_KEY)
print(scores)
# {'performance': 87, 'accessibility': 92, 'seo': 100, 'best-practices': 95}
```

## With Type Hints

```python
from typing import TypedDict, Optional
import requests

class CategoryResult(TypedDict):
    score: float
    title: str

class AuditResult(TypedDict):
    score: Optional[float]
    numericValue: Optional[float]
    displayValue: Optional[str]

class MetricValue(TypedDict):
    percentile: int
    category: str  # 'FAST', 'AVERAGE', 'SLOW'

class LoadingExperience(TypedDict, total=False):
    metrics: dict[str, MetricValue]

class LighthouseResult(TypedDict):
    categories: dict[str, CategoryResult]
    audits: dict[str, AuditResult]

class PSIResponse(TypedDict):
    lighthouseResult: LighthouseResult
    loadingExperience: Optional[LoadingExperience]

def fetch_psi(url: str, api_key: str) -> PSIResponse:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={'url': url, 'key': api_key}
    )
    response.raise_for_status()
    return response.json()
```

## Get Field Data (Real User Metrics)

The `loadingExperience` object contains real Chrome user data when available:

```python
def get_field_data(data: dict) -> Optional[dict]:
    field = data.get('loadingExperience', {}).get('metrics')
    if not field:
        return None

    return {
        'lcp': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('percentile'),
        'cls': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('percentile'),
        'inp': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('percentile'),
        'lcp_category': field.get('LARGEST_CONTENTFUL_PAINT_MS', {}).get('category'),
        'cls_category': field.get('CUMULATIVE_LAYOUT_SHIFT_SCORE', {}).get('category'),
        'inp_category': field.get('INTERACTION_TO_NEXT_PAINT', {}).get('category'),
    }
```

Field data is only available for URLs with enough Chrome traffic. Low-traffic pages return `None`.

## Error Handling

The API returns 500 errors for [API rate limits and error responses](https://developers.google.com/speed/docs/insights/v5/get-started#limits), not just 429. Treat persistent 500s as throttling:

```python
import requests

def fetch_psi(url: str, api_key: str) -> Optional[dict]:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={'url': url, 'key': api_key},
        timeout=60
    )

    if response.status_code == 429:
        retry_after = response.headers.get('Retry-After', 60)
        print(f'Rate limited. Retry after {retry_after}s')
        return None

    if response.status_code == 500:
        # Undocumented rate limit - wait 60-180s before retrying
        print('Possible rate limit (500). Wait before retrying.')
        return None

    if response.status_code == 400:
        error = response.json()
        print(f"Invalid request: {error['error']['message']}")
        return None

    if not response.ok:
        print(f'PSI API error: {response.status_code}')
        return None

    return response.json()
```

## Async with aiohttp

For better performance when testing multiple URLs. Use [`aiolimiter`](https://aiolimiter.readthedocs.io/) for rate limiting:

```bash
pip install aiohttp aiolimiter
```

```python
import asyncio
import aiohttp
import os
from aiolimiter import AsyncLimiter

API_KEY = os.environ['PSI_API_KEY']

# 240 requests per minute (stay under limit)
limiter = AsyncLimiter(240, 60)

async def fetch_psi_async(session: aiohttp.ClientSession, url: str) -> Optional[dict]:
    async with limiter:
        params = {
            'url': url,
            'key': API_KEY,
            'strategy': 'mobile'
        }

        async with session.get(
            'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
            params=params
        ) as response:
            if response.status == 429:
                print(f'Rate limited for {url}')
                return None
            if response.status == 500:
                print(f'Possible rate limit for {url}')
                return None
            if not response.ok:
                print(f'Error for {url}: {response.status}')
                return None
            return await response.json()

async def analyze_urls(urls: list[str]) -> list[dict]:
    timeout = aiohttp.ClientTimeout(total=60)
    async with aiohttp.ClientSession(timeout=timeout) as session:
        tasks = [fetch_psi_async(session, url) for url in urls]
        return await asyncio.gather(*tasks)

urls = ['https://example.com', 'https://example.com/about']
results = asyncio.run(analyze_urls(urls))
```

`aiohttp.ClientSession` automatically handles connection pooling for better performance.

## Complete Example

```python
import os
import requests
from typing import Optional

API_KEY = os.environ['PSI_API_KEY']

def analyze_url(url: str) -> Optional[dict]:
    response = requests.get(
        'https://www.googleapis.com/pagespeedonline/v5/runPagespeed',
        params={
            'url': url,
            'key': API_KEY,
            'strategy': 'mobile',
            'category': ['performance', 'accessibility']
        }
    )

    if not response.ok:
        print(f'Failed to analyze {url}: {response.status_code}')
        return None

    data = response.json()
    lighthouse = data['lighthouseResult']
    audits = lighthouse['audits']

    return {
        'url': url,
        'performance': round(lighthouse['categories']['performance']['score'] * 100),
        'accessibility': round(lighthouse['categories']['accessibility']['score'] * 100),
        'lcp': audits['largest-contentful-paint']['numericValue'],
        'cls': audits['cumulative-layout-shift']['numericValue'],
        'tbt': audits['total-blocking-time']['numericValue'],
    }

if __name__ == '__main__':
    result = analyze_url('https://example.com')
    print(result)
```

## Next Steps

- [Test multiple URLs](/learn-lighthouse/pagespeed-insights-api/bulk-testing)
- [Handle rate limits](/learn-lighthouse/pagespeed-insights-api/rate-limits)

## Skip the Boilerplate

Building API clients, handling rate limits, and parsing responses is time you're not spending on actual performance improvements.

Unlighthouse handles the complexity and crawls your entire site:

```bash
npx unlighthouse --site https://your-site.com
```

<u-button icon="i-heroicons-rocket-launch" label="Try Unlighthouse" size="lg" to="/">


</u-button>
Library	Best For
`requests`	Simple scripts, familiar API
`httpx`	Sync/async flexibility, HTTP/2 support
`aiohttp`	High concurrency (100+ URLs), best connection pooling