Google AI generateContent
Use LiteLLM to call Google AI's generateContent endpoints for text generation, multimodal interactions, and streaming responses.
Overviewโ
| Feature | Supported | Notes | 
|---|---|---|
| Cost Tracking | โ | |
| Logging | โ | works across all integrations | 
| End-user Tracking | โ | |
| Streaming | โ | |
| Fallbacks | โ | between supported models | 
| Loadbalancing | โ | between supported models | 
Usageโ
LiteLLM Python SDKโ
- Basic Usage
- Sync Usage
Non-streaming exampleโ
Basic Text Generation
from litellm.google_genai import agenerate_content
from google.genai.types import ContentDict, PartDict
import os
# Set API key
os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
contents = ContentDict(
    parts=[
        PartDict(text="Hello, can you tell me a short joke?")
    ],
    role="user",
)
response = await agenerate_content(
    contents=contents,
    model="gemini/gemini-2.0-flash",
    max_tokens=100,
)
print(response)
Streaming exampleโ
Streaming Text Generation
from litellm.google_genai import agenerate_content_stream
from google.genai.types import ContentDict, PartDict
import os
# Set API key
os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
contents = ContentDict(
    parts=[
        PartDict(text="Write a long story about space exploration")
    ],
    role="user",
)
response = await agenerate_content_stream(
    contents=contents,
    model="gemini/gemini-2.0-flash",
    max_tokens=500,
)
async for chunk in response:
    print(chunk)
Sync non-streaming exampleโ
Sync Text Generation
from litellm.google_genai import generate_content
from google.genai.types import ContentDict, PartDict
import os
# Set API key
os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
contents = ContentDict(
    parts=[
        PartDict(text="Hello, can you tell me a short joke?")
    ],
    role="user",
)
response = generate_content(
    contents=contents,
    model="gemini/gemini-2.0-flash",
    max_tokens=100,
)
print(response)
Sync streaming exampleโ
Sync Streaming Text Generation
from litellm.google_genai import generate_content_stream
from google.genai.types import ContentDict, PartDict
import os
# Set API key
os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
contents = ContentDict(
    parts=[
        PartDict(text="Write a long story about space exploration")
    ],
    role="user",
)
response = generate_content_stream(
    contents=contents,
    model="gemini/gemini-2.0-flash",
    max_tokens=500,
)
for chunk in response:
    print(chunk)
LiteLLM Proxy Serverโ
- Setup config.yaml
model_list:
    - model_name: gemini-flash
      litellm_params:
        model: gemini/gemini-2.0-flash
        api_key: os.environ/GEMINI_API_KEY
- Start proxy
litellm --config /path/to/config.yaml
- Test it!
- Google GenAI SDK
- curl
Google GenAI SDK with LiteLLM Proxy
from google.genai import Client
import os
# Configure Google GenAI SDK to use LiteLLM proxy
os.environ["GOOGLE_GEMINI_BASE_URL"] = "http://localhost:4000"
os.environ["GEMINI_API_KEY"] = "sk-1234"
client = Client()
response = client.models.generate_content(
    model="gemini-flash",
    contents=[
        {
            "parts": [{"text": "Write a short story about AI"}],
            "role": "user"
        }
    ],
    config={"max_output_tokens": 100}
)
Generate Contentโ
generateContent via LiteLLM Proxy
curl -L -X POST 'http://localhost:4000/v1beta/models/gemini-flash:generateContent' \
-H 'content-type: application/json' \
-H 'authorization: Bearer sk-1234' \
-d '{
  "contents": [
    {
      "parts": [
        {
          "text": "Write a short story about AI"
        }
      ],
      "role": "user"
    }
  ],
  "generationConfig": {
    "maxOutputTokens": 100
  }
}'
Stream Generate Contentโ
streamGenerateContent via LiteLLM Proxy
curl -L -X POST 'http://localhost:4000/v1beta/models/gemini-flash:streamGenerateContent' \
-H 'content-type: application/json' \
-H 'authorization: Bearer sk-1234' \
-d '{
  "contents": [
    {
      "parts": [
        {
          "text": "Write a long story about space exploration"
        }
      ],
      "role": "user"
    }
  ],
  "generationConfig": {
    "maxOutputTokens": 500
  }
}'