Skip to content

Commit

Permalink
Merge branch 'release/0.7.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
GabeLambda committed Nov 18, 2019
2 parents ea9ab1e + 4cee36c commit f50ac14
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 57 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.0
current_version = 0.7.0
commit = False
tag = False

Expand Down
61 changes: 61 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,65 @@

Caching for microcosm microservices.

Provides a generic caching client, as well as as some common decorators / patterns to manage result caching

In general, caching is useful because:
* lookups are costly (either because they incur a network or computational cost)
* data may rarely change, relative to how often it's read

[![CircleCI](https://circleci.com/gh/globality-corp/microcosm-caching.svg?style=svg&circle-token=4d985d6947b5d753c6f3b779a2475f389e7c0ef1)](https://circleci.com/gh/globality-corp/microcosm-caching)

## Usage ##
This library exposes a `resource_cache` component in its entry points, automatically configuring a caching client
for general use for direct cache manipulation.

Common patterns have emerged out of common usages of this component, however, which we generalize into a general caching
strategy via several decorators.

### Decorators ###
`cached`:
```python
from typing import Type
from marshmallow import Schema

def cached(component, schema: Type[Schema], cache_prefix: str, ttl: int = DEFAULT_TTL):
pass

# Example usage
return cached(component, ExampleSchema, "prefix")(component.func)
```
This performs a basic "get and set" for a result from a decorated function.

`invalidates` / `invalidates_batch`
```python
from typing import List

def invalidates(component, invalidations: List[Invalidation], cache_prefix, lock_ttl=DEFAULT_LOCK_TTL):
pass
# example usage
return invalidates(component, cache_prefix=CACHE_PREFIX, invalidations=[
Invalidation(
ExampleSchema,
arguments=["example_id"],
)
])(component.func)
```
This will allow for the invalidation of a set of keys based on the *input* params of the function, such that they match
how a given resource is cached.

This may be useful if you want to invalidate a resource based on the creation of another resource (e.g. the creation of
another associated event with that resource).

This does add a limitation that those given params need to provide some link to the related resource. This may be
difficult in some cases, as that parameter may not exist.

An additional detail to the above is the actual invalidation strategy. Instead of directly deleting the cache key,
invalidation will render a given resource as uncacheable for a period of time, during which caching reads won't do anything.

This is done because we commonly cache in conjunction with another service such as a database, meaning that a
given request may not be done until that other service has completed (imagine a case where we invalidate, try to commit,
and find that that operation takes longer than we expect). If we were to simply delete, that would allow an interleaved
read to re-cache the now-stale data, if it happened to read right between the cache delete and a database commit.

So, as an important caveat: if that secondary operation starts taking longer than the lock TTL, you may find yourself
possibly caching stale data.
29 changes: 29 additions & 0 deletions microcosm_caching/build_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Store build information.
"""
from dataclasses import dataclass
from typing import Optional

from microcosm.api import defaults, typed


@dataclass
class BuildInfo:
build_num: Optional[str]
sha1: Optional[str]


@defaults(
build_num=typed(str, default_value=None),
sha1=typed(str, default_value=None),
)
def configure_build_info(graph):
"""
Configure build info
"""
return BuildInfo(
build_num=graph.config.build_info.build_num,
sha1=graph.config.build_info.sha1,
)
41 changes: 31 additions & 10 deletions microcosm_caching/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
Type,
)

from marshmallow import Schema
from marshmallow import EXCLUDE, Schema
from microcosm.errors import NotBoundError
from pymemcache.exceptions import MemcacheError

from microcosm_caching.base import CacheBase
from microcosm_caching.build_info import BuildInfo


DEFAULT_TTL = 60 * 60 # Cache for an hour by default
Expand All @@ -29,6 +30,11 @@ def get_metrics(graph):
return None


def get_build_version(graph) -> Optional[str]:
build_info: BuildInfo = graph.build_info
return build_info.sha1


@dataclass
class Invalidation:
schema: Type[Schema]
Expand Down Expand Up @@ -58,18 +64,24 @@ def from_kwargs(self, kwargs) -> Dict[str, Any]:
return invalidation_kwargs


def cache_key(cache_prefix, schema, args, kwargs) -> str:
def cache_key(cache_prefix, schema, args, kwargs, version: Optional[str] = None) -> str:
"""
Hash a key according to the schema and input args.
"""
key = (schema.__name__,) + args
key += tuple(sorted((a, b) for a, b in kwargs.items()))

return sha1(f"{cache_prefix}:{key}".encode("utf-8")).hexdigest()
return sha1(f"{cache_prefix}:{version}:{key}".encode("utf-8")).hexdigest()


def cached(component, schema: Type[Schema], cache_prefix: str, ttl: int = DEFAULT_TTL):
def cached(
component,
schema: Type[Schema],
cache_prefix: Optional[str] = None,
ttl: int = DEFAULT_TTL,
schema_version: Optional[str] = None,
):
"""
Caches the result of a decorated component function, given that the both the underlying
function and the component itself adhere to a given structure.
Expand All @@ -90,8 +102,10 @@ def cached(component, schema: Type[Schema], cache_prefix: str, ttl: int = DEFAUL
:param component: A microcosm-based component
:param schema: The schema corresponding to the response type of the component
:param cache_prefix: Namespace to use for cache keys
:param cache_prefix: Namespace to use for cache keys. Defaults to the name attached to the graph
:param ttl: How long to cache the underlying resource
:param schema_version: The version of this schema. Used as part of the cache key. If not supplied,
will default to the build version, if supplied
:return: the resource (i.e. loaded schema instance)
"""
logger: Logger = getattr(component, "logger")
Expand All @@ -100,6 +114,9 @@ def cached(component, schema: Type[Schema], cache_prefix: str, ttl: int = DEFAUL
metrics = get_metrics(graph)
resource_cache: CacheBase = graph.resource_cache

version = schema_version or get_build_version(graph)
cache_prefix = cache_prefix or graph.metadata.name

def retrieve_from_cache(key: str):
start_time = perf_counter()

Expand Down Expand Up @@ -145,7 +162,7 @@ def cache(*args, **kwargs) -> Schema:
return func(*args, **kwargs)

try:
key = cache_key(cache_prefix, schema, args, kwargs)
key = cache_key(cache_prefix, schema, args, kwargs, version)
cached_resource = retrieve_from_cache(key)
if not cached_resource:
resource = func(*args, **kwargs)
Expand All @@ -155,7 +172,7 @@ def cache(*args, **kwargs) -> Schema:
# NB: We're caching the serialized format of the resource, meaning
# we need to do a (wasteful) load here to enable it to be dumped correctly
# later on in the flow. This could probably be made more efficient
return schema().load(cached_resource, unknown="exclude")
return schema().load(cached_resource, unknown=EXCLUDE)
except (MemcacheError, ConnectionRefusedError) as error:
logger.warning("Unable to retrieve/save cache data", extra=dict(error=error))
return func(*args, **kwargs)
Expand All @@ -167,8 +184,9 @@ def cache(*args, **kwargs) -> Schema:
def invalidates(
component,
invalidations: List[Invalidation],
cache_prefix,
lock_ttl=DEFAULT_LOCK_TTL
cache_prefix: Optional[str] = None,
lock_ttl=DEFAULT_LOCK_TTL,
schema_version: Optional[str] = None,
):
"""
Invalidates a set of prescribed keys, based on a combination of:
Expand All @@ -185,6 +203,9 @@ def invalidates(
metrics = get_metrics(graph)
resource_cache: CacheBase = graph.resource_cache

version = schema_version or get_build_version(graph)
cache_prefix = cache_prefix or graph.metadata.name

def delete_from_cache(values) -> None:
"""
"Delete" from cache by locking writes to a key for a designated
Expand Down Expand Up @@ -219,7 +240,7 @@ def cache(*args, **kwargs) -> Schema:
invalidation_kwargs = invalidation.from_kwargs(kwargs)

# NB: We assume that we don't cache via args
key = cache_key(cache_prefix, invalidation.schema, (), invalidation_kwargs)
key = cache_key(cache_prefix, invalidation.schema, (), invalidation_kwargs, version)
values[key] = None

result = func(*args, **kwargs)
Expand Down
23 changes: 23 additions & 0 deletions microcosm_caching/tests/test_build_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Unit tests for build_info
"""
from hamcrest import assert_that, is_
from microcosm.api import create_object_graph, load_from_dict


class TestBuildInfo:

def test_cached(self):
graph = create_object_graph(
"test",
testing=True,
loader=load_from_dict(dict(
build_info=dict(sha1="asdf1234", build_num="5"),
)),
)
graph.use(
"build_info",
)

assert_that(graph.build_info.sha1, is_("asdf1234"))
Loading

0 comments on commit f50ac14

Please sign in to comment.