LLM APIs¶
We provide some APIs to conveniently create, list and inference with LLMs. Under the hood they are Launch model endpoints.
Example¶
LLM APIs Usage
import os
from rich import print
from launch import LaunchClient
from launch.api_client.model.llm_inference_framework import (
LLMInferenceFramework,
)
from launch.api_client.model.llm_source import LLMSource
client = LaunchClient(api_key=os.getenv("LAUNCH_API_KEY"), endpoint=os.getenv("LAUNCH_ENDPOINT"))
endpoints = client.list_llm_model_endpoints()
print(endpoints)
endpoint_name = "test-flan-t5-xxl"
client.create_llm_model_endpoint(
endpoint_name=endpoint_name,
model_name="flan-t5-xxl",
source=LLMSource.HUGGING_FACE,
inference_framework=LLMInferenceFramework.DEEPSPEED,
inference_framework_image_tag=os.getenv("INFERENCE_FRAMEWORK_IMAGE_TAG"),
num_shards=4,
min_workers=1,
max_workers=1,
gpus=4,
endpoint_type="sync",
)
# Wait for the endpoint to be ready
output = client.completions_sync(endpoint_name, prompt="What is Deep Learning?", max_new_tokens=10, temperature=0)
print(output)