Skip to content

Commit

Permalink
Add caching to Nova client (#3264)
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanmai authored Jan 10, 2025
1 parent fa6cca3 commit f2c914c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
14 changes: 11 additions & 3 deletions src/helm/clients/bedrock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,22 @@ def convert_request_to_raw_request(self, request: Request) -> Dict:

def make_request(self, request: Request) -> RequestResult:
raw_request = self.convert_request_to_raw_request(request)
response = self.bedrock_client.converse(**raw_request)
cache_key = CachingClient.make_cache_key(raw_request, request)

def do_it() -> Dict[Any, Any]:
return self.bedrock_client.converse(**raw_request)

response, cached = self.cache.get(cache_key, do_it)

completions = self.convert_raw_response_to_completions(response, request)
dt = datetime.strptime(response["ResponseMetadata"]["HTTPHeaders"]["date"], "%a, %d %b %Y %H:%M:%S GMT")
# Use API reported latency rather than client measured latency
request_time = response["metrics"]["latencyMs"] / 1000

return RequestResult(
success=True,
cached=False,
request_time=(response["metrics"]["latencyMs"] / 1000),
cached=cached,
request_time=request_time,
request_datetime=int(dt.timestamp()),
completions=completions,
embedding=[],
Expand Down
2 changes: 1 addition & 1 deletion src/helm/common/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ class RequestResult:
"""Whether the request was actually cached"""

request_time: Optional[float] = None
"""How long did the request take?"""
"""How long the request took in seconds"""

request_datetime: Optional[int] = None
"""When was the request sent?
Expand Down

0 comments on commit f2c914c

Please sign in to comment.