Applications
Its easy to integrate applications that leverage LLMs with Javelin. We have made it easy to seamlessly connect your applications to route all LLM traffic through Javelin with minimal code changes.
Leveraging the Javelin Platform
The core usage of Javelin is to define routes, and then to define what to do at each route. Rather than having your LLM Applications (like Co-Pilot apps etc.,) individually & directly point to the LLM Vendor & Model (like OpenAI, Gemini etc.,), configure the provider/model endpoint to be your Javelin endpoint. This ensures that all applications that leverage AI Models will route their requests through the gateway. Javelin supports all the latest models and providers, so you don't have to make any changes to your application or how requests to models are sent.
See Javelin Configuration section, for details on how to setup routes on the gateway to different models and providers.
See Python SDK for details on how you can easily embed this within your AI Apps.
Querying an LLM
Javelin may send a request to one or more models based on the configured policies and route configurations and return back a response.
REST API
- curl
- Python Requests
First, create a route as shown in the Create Route section.
Once you have created a route, you can query it using the following curl command:
curl 'https://api-dev.javelin.live/v1/query/your_route_name' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer YOUR_OPENAI_API_KEY' \
-H 'x-api-key: YOUR_JAVELIN_API_KEY' \
--data-raw '{
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "SANFRANCISCO is located in?"}
],
"temperature": 0.8
}'
Make sure to replace your_route_name
, YOUR_OPENAI_API_KEY
, and YOUR_JAVELIN_API_KEY
with your actual values.
First, create a route as shown in the Create Route section.
Once you have created a route, you can query it using Python requests:
import requests
import os
import dotenv
dotenv.load_dotenv()
javelin_api_key = os.getenv('JAVELIN_API_KEY')
openai_api_key = os.getenv('OPENAI_API_KEY')
route_name = 'your_route_name'
url = f'https://api-dev.javelin.live/v1/query/{route_name}'
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {openai_api_key}',
'x-api-key': javelin_api_key
}
data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "SANFRANCISCO is located in?"}
],
"temperature": 0.8
}
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
print(response.json())
else:
print(f"Error: {response.status_code}, {response.text}")
Make sure to replace your_route_name
with your actual route name and set the JAVELIN_API_KEY
and OPENAI_API_KEY
environment variables.
Python
- Javelin SDK
- OpenAI
- Azure OpenAI
- LangChain
- OpenAI-Compatible Query Example
- DSPy
- Bedrock
- ...
pip install javelin-sdk
from javelin_sdk import JavelinClient, JavelinConfig, Route
import os
javelin_api_key = os.getenv('JAVELIN_API_KEY')
llm_api_key = os.getenv("OPENAI_API_KEY")
# Create Javelin configuration
config = JavelinConfig(
base_url="https://api-dev.javelin.live",
javelin_api_key=javelin_api_key,
llm_api_key=llm_api_key
)
# Create Javelin client
client = JavelinClient(config)
# Route name to get is {routename} e.g., sampleroute1
query_data = {
"messages": [
{
"role": "system",
"content": "Hello, you are a helpful scientific assistant."
},
{
"role": "user",
"content": "What is the chemical composition of sugar?"
}
],
"temperature": 0.8
}
# Now query the route, for async use 'await client.aquery_route("sampleroute1", query_data)'
response = client.query_route("sampleroute1", query_data)
print(response.model_dump_json(indent=2))
pip install openai
from openai import OpenAI
import os
javelin_api_key = os.environ['JAVELIN_API_KEY']
llm_api_key = os.environ["OPENAI_API_KEY"]
# Javelin Headers
javelin_headers = {
"x-api-key": javelin_api_key, # Javelin API key from admin
"x-javelin-route": "sampleroute1" # Javelin route to use
}
# Create OpenAI Client
client = OpenAI(api_key=llm_api_key,
base_url="https://api-dev.javelin.live/v1/query", # Set Javelin's API base URL for query
default_headers=javelin_headers)
# Query the model
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Hello, you are a helpful scientific assistant"},
{"role": "user", "content": "What is the chemical composition of sugar?"}
]
)
print(completion.model_dump_json(indent=2))
# Streaming Responses
stream = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Hello, you are a helpful scientific assistant."},
{"role": "user", "content": "What is the chemical composition of sugar?"}
],
stream=True
)
for chunk in stream:
print(chunk.choices[0].delta.content or "", end="")
pip install openai
from openai import AzureOpenAI
import os
# Javelin Headers
javelin_api_key = os.environ['JAVELIN_API_KEY']
llm_api_key = os.environ["AZURE_OPENAI_API_KEY"]
javelin_headers = {
"x-api-key": javelin_api_key, # Javelin API key from admin
"x-javelin-route": "sampleroute1" # Javelin route to use
}
client = AzureOpenAI(api_key=llm_api_key,
base_url="https://api-dev.javelin.live/v1/query", # Set Javelin's API base URL for query
default_headers=javelin_headers,
api_version="2023-07-01-preview")
completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Hello, you are a helpful scientific assistant."},
{"role": "user", "content": "What is the chemical composition of sugar?"}
]
)
print(completion.model_dump_json(indent=2))
# Streaming Responses
stream = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Hello, you are a helpful scientific assistant."},
{"role": "user", "content": "What is the chemical composition of sugar?"}
],
stream=True
)
for chunk in stream:
if chunk.choices:
print(chunk.choices[0].delta.content or "", end="")
pip install langchain
pip install langchain-openai
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import os
javelin_api_key = os.getenv('JAVELIN_API_KEY')
llm_api_key = os.getenv("OPENAI_API_KEY")
javelin_headers = {
"x-api-key": javelin_api_key, # Javelin API key from admin
"x-javelin-route": "sample_route1" # Javelin route to use
}
llm = ChatOpenAI(
openai_api_base="https://api-dev.javelin.live/v1/query", # Set Javelin's API base URL for query
openai_api_key=llm_api_key,
model_kwargs={
"extra_headers": javelin_headers
},
)
prompt = ChatPromptTemplate.from_messages([
("system", "Hello, you are a helpful scientific assistant."),
("user", "{input}")
])
output_parser = StrOutputParser()
chain = prompt | llm | output_parser
print(chain.invoke({"input": "What is the chemical composition of sugar?"}))
#This example demonstrates how Javelin uses OpenAI's schema as a standardized interface for different LLM providers.
#By adopting OpenAI's widely-used request/response format, Javelin enables seamless integration with various LLM providers
#(like Anthropic, Bedrock, Mistral, etc.) while maintaining a consistent API structure. This allows developers to use the
#same code pattern regardless of the underlying model provider, with Javelin handling the necessary translations and adaptations behind the scenes.
from javelin_sdk import JavelinClient, JavelinConfig
import os
from typing import Dict, Any
import json
# Helper function to pretty print responses
def print_response(provider: str, response: Dict[str, Any]) -> None:
print(f"
=== Response from {provider} ===")
print(json.dumps(response, indent=2))
# Setup client configuration
config = JavelinConfig(
base_url="https://api-dev.javelin.live",
javelin_api_key=os.getenv('JAVELIN_API_KEY'),
llm_api_key=os.getenv('OPENAI_API_KEY')
)
client = JavelinClient(config)
# Example messages in OpenAI format
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What are the three primary colors?"}
]
# 1. Query OpenAI route
try:
openai_response = client.chat.completions.create(
route="openai_route", # Route configured for OpenAI
messages=messages,
temperature=0.7,
max_tokens=150
)
print_response("OpenAI", openai_response)
except Exception as e:
print(f"OpenAI query failed: {str(e)}")
=== Response from OpenAI ===
"""
{
"id": "chatcmpl-123abc",
"object": "chat.completion",
"created": 1677858242,
"model": "gpt-3.5-turbo",
"usage": {
"prompt_tokens": 42,
"completion_tokens": 38,
"total_tokens": 80
},
"choices": [
{
"message": {
"role": "assistant",
"content": "The three primary colors are red, blue, and yellow."
},
"finish_reason": "stop",
"index": 0
}
]
}
"""
# 2. Query Bedrock route (using same OpenAI format)
try:
bedrock_response = client.chat.completions.create(
route="bedrock_route", # Route configured for Bedrock
messages=messages,
temperature=0.7,
max_tokens=150
)
print_response("Bedrock", bedrock_response)
except Exception as e:
print(f"Bedrock query failed: {str(e)}")
"""
=== Response from Bedrock ===
{
"id": "bedrock-123xyz",
"object": "chat.completion",
"created": 1677858243,
"model": "anthropic.claude-v2",
"usage": {
"prompt_tokens": 42,
"completion_tokens": 41,
"total_tokens": 83
},
"choices": [
{
"message": {
"role": "assistant",
"content": "The three primary colors are red, blue, and yellow. These colors cannot be created by mixing other colors together."
},
"finish_reason": "stop",
"index": 0
}
]
}
"""
# Example using text completions with Llama
try:
llama_response = client.completions.create(
route="bedrockllama", # Route configured for Bedrock Llama
prompt="Write a haiku about programming:",
max_tokens=50,
temperature=0.7,
top_p=0.9,
)
print("=== Llama Text Completion Response ===")
pretty_print(llama_response)
except Exception as e:
print(f"Llama query failed: {str(e)}")
"""
=== Llama Text Completion Response ===
{
"id": "bedrock-comp-123xyz",
"object": "text_completion",
"created": 1677858244,
"model": "meta.llama2-70b",
"choices": [
{
"text": "Code flows like water\nBugs crawl through silent errors\nDebugger saves all",
"index": 0,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 6,
"completion_tokens": 15,
"total_tokens": 21
}
}
"""
Introduction: DSPy: Goodbye Prompting, Hello Programming!
Documentation: DSPy Docs
pip install dspy-ai
import dspy
from dsp import LM
import os
import requests
# Assuming the environment variables are set correctly
javelin_api_key = os.getenv('JAVELIN_API_KEY')
llm_api_key = os.getenv("OPENAI_API_KEY")
class Javelin(LM):
def __init__(self, model, api_key):
self.model = model
self.api_key = api_key
self.provider = "default"
self.kwargs = {
"temperature": 1.0,
"max_tokens": 500,
"top_p": 1.0,
"frequency_penalty": 0.0,
"presence_penalty": 0.0,
"stop": None,
"n": 1,
"logprobs": None,
"logit_bias": None,
"stream": False
}
self.base_url = "https://api-dev.javelin.live/v1/query/" # Set Javelin's API base URL for query
self.javelin_headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer { api_key }",
"x-javelin-route": "openai", # route name configured for OpenAI
"x-api-key": javelin_api_key,
}
self.history = []
def basic_request(self, prompt: str, **kwargs):
headers = self.javelin_headers
data = {
**kwargs,
"model": self.model,
"messages": [
{"role": "user", "content": prompt}
]
}
response = requests.post(self.base_url, headers=headers, json=data)
response = response.json()
self.history.append({
"prompt": prompt,
"response": response,
"kwargs": kwargs,
})
return response
def __call__(self, prompt, only_completed=True, return_sorted=False, **kwargs):
response = self.request(prompt, **kwargs)
if 'choices' in response and len(response['choices']) > 0:
first_choice_content = response['choices'][0]['message']['content']
completions = [first_choice_content]
return completions
else:
return ["No response found."]
javelin = Javelin(model="gpt-4-1106-preview", api_key=llm_api_key)
dspy.configure(lm=javelin)
# Define a module (ChainOfThought) and assign it a signature (return an answer, given a question).
qa = dspy.ChainOfThought('question -> answer')
response = qa(question="You have 3 baskets. The first basket has twice as many apples as the second basket. The third basket has 3 fewer apples than the first basket. If you have a total of 27 apples, how many apples are in each basket?")
print(response)
pip install boto3
import boto3
# Configure boto3 client
client = boto3.client(
service_name="bedrock-runtime",
region_name="us-east-1",
endpoint_url="https://api-dev.javelin.live/v1/",
)
def add_custom_headers(request, **kwargs):
headers = {
"x-api-key": f"{JAVELIN_API_KEY}"
}
request.headers.update(headers)
client.meta.events.register('before-send.*.*', add_custom_headers)
# Example using Claude model via Bedrock
response = client.invoke_model_with_response_stream(
modelId="anthropic.claude-v2:1",
body={
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 100,
"messages": [
{
"content": "What is machine learning?",
"role": "user"
}
]
},
contentType="application/json"
)
for event in response['body']:
print(event)
# Example using Langchain
# Use the boto3 client to create a BedrockLLM
llm = BedrockLLM(
client=client,
model_id="anthropic.claude-v2:1",
model_kwargs={
"max_tokens_to_sample": 256,
"temperature": 0.7,
}
)
stream_generator = llm.stream(prompt_text)
for chunk in stream_generator:
print(chunk)
Learn more about how to setup Bedrock routes to use these examples here.
JavaScript/TypeScript
- OpenAI
- Langchain
- Bedrock
- ...
npm install openai
import OpenAI from "openai";
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
baseURL: "https://api-dev.javelin.live/v1/query",
defaultHeaders: {
"x-api-key": `${process.env.JAVELIN_API_KEY}`,
"x-javelin-route": "sample_route1",
},
});
async function main() {
const completion = await openai.chat.completions.create({
messages: [{ role: "system", content: "You are a helpful assistant." }],
model: "gpt-3.5-turbo",
});
console.log(completion.choices[0]);
}
main();
npm install @langchain/openai
import { ChatOpenAI } from '@langchain/openai';
const llm = new ChatOpenAI({
openAIApiKey: process.env.OPENAI_API_KEY,
configuration: {
basePath: "https://api-dev.javelin.live/v1/query",
defaultHeaders: {
"x-api-key": `${process.env.JAVELIN_API_KEY}`,
"x-javelin-route": "sample_route1",
},
},
});
async function main() {
const response = await llm.invoke("tell me a joke?");
console.log(response);
}
main();
import { BedrockRuntimeClient, InvokeModelCommand, InvokeModelWithResponseStreamCommand } from "@aws-sdk/client-bedrock-runtime";
const customHeaders = {
'x-api-key': JAVELIN_API_KEY
};
const client = new BedrockRuntimeClient({
region: AWS_REGION,
// Use the javelin endpoint for bedrock
endpoint: JAVELIN_ENDPOINT,
credentials: {
accessKeyId: AWS_ACCESS_KEY_ID,
secretAccessKey: AWS_SECRET_ACCESS_KEY,
},
});
// Add custom headers via middleware
client.middlewareStack.add(
(next, context) => async (args) => {
args.request.headers = {
...args.request.headers,
...customHeaders
};
return next(args);
},
{
step: "build"
}
);
// Query the model
const payload = {
anthropic_version: "bedrock-2023-05-31",
max_tokens: 1000,
messages: [
{
role: "user",
content: "What is machine learning?",
},
],
};
const command = new InvokeModelWithResponseStreamCommand({
contentType: "application/json",
body: JSON.stringify(payload),
"anthropic.claude-v2:1",
});
const apiResponse = await client.send(command);
for await (const item of apiResponse.body) {
console.log(item);
}
Learn more about how to setup Bedrock routes to use these examples here.
We have worked on the integrations. Please contact: support@getjavelin.io if you would like to use this feature.