Completions¶
Language Models are trained to predict natural language and provide text outputs as a response to their inputs. The inputs are called prompts and outputs are referred to as completions. Completions hence are at the core of any generation AI application. Use the python SDK to generate intelligent responses to your users' requests in either blocking or streaming fashion. SGP supports a variety of open source and commercial models under the same unified interface (see Supported Models).
Synchronous Completions¶
An example to generate completions with the SGP Python SDk is as follows:
from typing import Literal, Iterable, Union
import dotenv
from scale_egp.sdk.client import EGPClient
ENV_FILE = ".env.local"
dotenv.load_dotenv(ENV_FILE, override=True)
def sync_completion(
egp_client: EGPClient,
model: Union[
Literal[
"gpt-4",
"gpt-4-0613",
"gpt-4-32k",
"gpt-4-32k-0613",
"gpt-3.5-turbo",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"text-davinci-003",
"text-davinci-002",
"text-curie-001",
"text-babbage-001",
"text-ada-001",
"claude-instant-1",
"claude-instant-1.1",
"claude-2",
"claude-2.0",
"llama-7b",
"llama-2-7b",
"llama-2-7b-chat",
"llama-2-13b",
"llama-2-13b-chat",
"llama-2-70b",
"llama-2-70b-chat",
"falcon-7b",
"falcon-7b-instruct",
"falcon-40b",
"falcon-40b-instruct",
"mpt-7b",
"mpt-7b-instruct",
"flan-t5-xxl",
"mistral-7b",
"mistral-7b-instruct",
"mixtral-8x7b",
"mixtral-8x7b-instruct",
"llm-jp-13b-instruct-full",
"llm-jp-13b-instruct-full-dolly",
"zephyr-7b-alpha",
"zephyr-7b-beta",
"codellama-7b",
"codellama-7b-instruct",
"codellama-13b",
"codellama-13b-instruct",
"codellama-34b",
"codellama-34b-instruct",
"codellama-70b",
"codellama-70b-instruct",
],
str,
],
input_prompt: str,
) -> str:
completion = egp_client.completions().create(model=model, prompt=input_prompt)
return completion.completion.text
Token Streaming¶
The SGP Python SDK supports token streaming to reduce perceived latency for certain applications. When streaming, tokens will be sent as data-only server-side events.
from typing import Literal, Iterable, Union
import dotenv
from scale_egp.sdk.client import EGPClient
def stream_completion(
egp_client: EGPClient,
model: Union[
Literal[
"gpt-4",
"gpt-4-0613",
"gpt-4-32k",
"gpt-4-32k-0613",
"gpt-3.5-turbo",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"text-davinci-003",
"text-davinci-002",
"text-curie-001",
"text-babbage-001",
"text-ada-001",
"claude-instant-1",
"claude-instant-1.1",
"claude-2",
"claude-2.0",
"llama-7b",
"llama-2-7b",
"llama-2-7b-chat",
"llama-2-13b",
"llama-2-13b-chat",
"llama-2-70b",
"llama-2-70b-chat",
"falcon-7b",
"falcon-7b-instruct",
"falcon-40b",
"falcon-40b-instruct",
"mpt-7b",
"mpt-7b-instruct",
"flan-t5-xxl",
"mistral-7b",
"mistral-7b-instruct",
"mixtral-8x7b",
"mixtral-8x7b-instruct",
"llm-jp-13b-instruct-full",
"llm-jp-13b-instruct-full-dolly",
"zephyr-7b-alpha",
"zephyr-7b-beta",
"codellama-7b",
"codellama-7b-instruct",
"codellama-13b",
"codellama-13b-instruct",
"codellama-34b",
"codellama-34b-instruct",
"codellama-70b",
"codellama-70b-instruct",
],
str,
],
input_prompt: str,
) -> Iterable[str]:
for completion in egp_client.completions().stream(model=model, prompt=input_prompt):
yield completion.completion.text
import sys
if __name__ == "__main__":
client = EGPClient()
user_input = input("Enter a prompt to submit for a streaming completion request:\n")
generated_text_generator = stream_completion(
egp_client=client,
model="gpt-3.5-turbo",
input_prompt=user_input,
)
print(f"AI Response:")
for generated_text in generated_text_generator:
print(generated_text, end="")
sys.stdout.flush()
print()
See the full Completion SDK reference documentation to learn more.