-
Notifications
You must be signed in to change notification settings - Fork 105
/
Copy pathEndpoints.py
executable file
·58 lines (47 loc) · 1.66 KB
/
Endpoints.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import json
import os
import time
import requests
from openai import OpenAI
class Embed:
def __init__(self):
self.client = OpenAI(
base_url="https://ai.api.nvidia.com/v1/retrieval/nvidia",
api_key=os.getenv("BUILD_NVIDIA_API_KEY"),
)
def invoke(self, text):
return (
self.client.embeddings.create(
input=[text],
model="NV-Embed-QA",
encoding_format="float",
extra_body={"input_type": "query", "truncate": "NONE"},
)
.data[0]
.embedding
)
class LLaMa_405B:
def __init__(self):
self.url = "https://integrate.api.nvidia.com/v1/chat/completions"
self.headers = {
"Content-Type": "application/json",
"Authorization": "Bearer " + os.getenv("BUILD_NVIDIA_API_KEY"),
}
def invoke(self, prompt, schema=None):
self.payload = {
"model": "meta/llama-3.1-405b-instruct",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0,
"stream": False,
"max_tokens": 1024,
}
if schema is not None:
self.payload["nvext"] = schema
session = requests.Session()
response = session.post(self.url, headers=self.headers, json=self.payload)
while response.status_code == 202:
request_id = response.headers.get("NVCF-REQID")
fetch_url = fetch_url_format + request_id
response = session.get(fetch_url, headers=headers)
response_body = response.json()
return response_body["choices"][0]["message"]["content"]