Complete reference for the Portkey AI Gateway Python SDK with unified API access to 200+ LLMs, automatic fallbacks, caching, and full observability. Use when building Python applications that need LLM integration with production-grade reliability.
Portkey Python SDK
The Portkey Python SDK provides a unified interface to 200+ LLMs through the Portkey AI Gateway. Built on top of the OpenAI SDK for seamless compatibility, it adds production-grade features: automatic fallbacks, retries, load balancing, semantic caching, guardrails, and comprehensive observability.
Additional References:
- API Reference - Response structures, error handling
- Advanced Features - Tool calling, embeddings, audio, images
- Framework Integrations - LangChain, LlamaIndex, Strands, Google ADK
- Provider Configuration - Azure, AWS Bedrock, Vertex AI setup
Installation
pip install portkey-ai
# Or with poetry/uv
poetry add portkey-ai
uv add portkey-ai
Quick Start
import os
from portkey_ai import Portkey
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="your-openai-virtual-key"
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)
Authentication
API Key + Virtual Key (Recommended)
Virtual keys securely store provider API keys in Portkey's vault:
import os
from portkey_ai import Portkey
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"], # From app.portkey.ai
virtual_key="openai-virtual-key-xxx" # From app.portkey.ai/virtual-keys
)
Using Config IDs
Pre-configure routing, fallbacks, and caching in the dashboard:
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config="pc-config-xxx" # Config ID from dashboard
)
Chat Completions
Basic Request
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain quantum computing briefly."}
]
)
print(response.choices[0].message.content)
print(f"Tokens used: {response.usage.total_tokens}")
Streaming
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a short story"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
Async Support
import asyncio
from portkey_ai import AsyncPortkey
async def main():
client = AsyncPortkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key"
)
response = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)
asyncio.run(main())
Async Streaming
async def stream_response():
client = AsyncPortkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key"
)
stream = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a poem"}],
stream=True
)
async for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
Gateway Features
Fallbacks
Automatic failover when a provider fails:
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"strategy": {"mode": "fallback"},
"targets": [
{
"virtual_key": "openai-key",
"override_params": {"model": "gpt-4o"}
},
{
"virtual_key": "anthropic-key",
"override_params": {"model": "claude-3-5-sonnet-20241022"}
}
]
}
)
# If OpenAI fails, automatically tries Anthropic
response = client.chat.completions.create(
messages=[{"role": "user", "content": "Hello!"}]
)
Load Balancing
Distribute traffic across providers:
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"strategy": {"mode": "loadbalance"},
"targets": [
{"virtual_key": "openai-key-1", "weight": 0.7},
{"virtual_key": "openai-key-2", "weight": 0.3}
]
}
)
Automatic Retries
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"retry": {
"attempts": 3,
"on_status_codes": [429, 500, 502, 503, 504]
},
"virtual_key": "openai-key"
}
)
Semantic Caching
Reduce costs and latency with intelligent caching:
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"cache": {
"mode": "semantic", # or "simple" for exact match
"max_age": 3600 # TTL in seconds
},
"virtual_key": "openai-key"
}
)
# Similar queries return cached responses
response1 = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is the capital of France?"}]
)
response2 = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me France's capital"}]
) # Returns cached response
Request Timeout
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key",
request_timeout=30 # 30 seconds
)
Observability
Trace IDs
Link related requests for debugging:
import uuid
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key",
trace_id=str(uuid.uuid4())
)
Custom Metadata
Add searchable metadata to requests:
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key",
metadata={
"user_id": "user-123",
"session_id": "session-456",
"environment": "production"
}
)
Per-Request Options
response = client.with_options(
trace_id="unique-trace-id",
metadata={"request_type": "summarization"}
).chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Summarize this..."}]
)
Common Patterns
Multi-turn Conversation
messages = [
{"role": "system", "content": "You are a helpful coding assistant."},
{"role": "user", "content": "What is Python?"},
{"role": "assistant", "content": "Python is a high-level programming language..."},
{"role": "user", "content": "Show me a hello world example."}
]
response = client.chat.completions.create(model="gpt-4o", messages=messages)
JSON Output
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "Extract as JSON with name and age fields."},
{"role": "user", "content": "John is 30 years old."}
],
response_format={"type": "json_object"}
)
# Returns: {"name": "John", "age": 30}
Production Setup with Fallbacks + Caching
def create_production_client():
return Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"strategy": {"mode": "fallback"},
"targets": [
{
"virtual_key": os.environ["OPENAI_VIRTUAL_KEY"],
"override_params": {"model": "gpt-4o"},
"retry": {"attempts": 2, "on_status_codes": [429, 500]}
},
{
"virtual_key": os.environ["ANTHROPIC_VIRTUAL_KEY"],
"override_params": {"model": "claude-3-5-sonnet-20241022"}
}
],
"cache": {"mode": "semantic", "max_age": 3600}
},
trace_id="production-session",
metadata={"environment": "production"}
)
Best Practices
- Use environment variables - Never hardcode API keys
- Implement fallbacks - Always have backup providers for production
- Use streaming - Better UX for long responses
- Add tracing - Enable observability with trace IDs and metadata
- Enable caching - Reduce costs with semantic caching
- Handle errors - Implement retry logic with exponential backoff
Resources
- Dashboard: app.portkey.ai
- Documentation: docs.portkey.ai
- GitHub: github.com/portkey-ai/portkey-python-sdk
- Discord: portkey.ai/discord
You Might Also Like
Related Skills

gog
Google Workspace CLI for Gmail, Calendar, Drive, Contacts, Sheets, and Docs.
openclaw
orpc-contract-first
Guide for implementing oRPC contract-first API patterns in Dify frontend. Triggers when creating new API contracts, adding service endpoints, integrating TanStack Query with typed contracts, or migrating legacy service calls to oRPC. Use for all API layer work in web/contract and web/service directories.
langgenius

