Skip to content

LLM APIs

We provide some APIs to conveniently create, list and inference with LLMs. Under the hood they are Launch model endpoints.

Example

LLM APIs Usage
import os

from rich import print

from launch import LaunchClient
from launch.api_client.model.llm_inference_framework import (
    LLMInferenceFramework,
)
from launch.api_client.model.llm_source import LLMSource

client = LaunchClient(api_key=os.getenv("LAUNCH_API_KEY"), endpoint=os.getenv("LAUNCH_ENDPOINT"))

endpoints = client.list_llm_model_endpoints()

print(endpoints)

endpoint_name = "test-flan-t5-xxl"
client.create_llm_model_endpoint(
    endpoint_name=endpoint_name,
    model_name="flan-t5-xxl",
    source=LLMSource.HUGGING_FACE,
    inference_framework=LLMInferenceFramework.DEEPSPEED,
    inference_framework_image_tag=os.getenv("INFERENCE_FRAMEWORK_IMAGE_TAG"),
    num_shards=4,
    min_workers=1,
    max_workers=1,
    gpus=4,
    endpoint_type="sync",
)

# Wait for the endpoint to be ready

output = client.completions_sync(endpoint_name, prompt="What is Deep Learning?", max_new_tokens=10, temperature=0)
print(output)