Merge branch 'release/0.7.0'

globality-corp · Nov 18, 2019 · f50ac14 · f50ac14
2 parents ea9ab1e + 4cee36c
commit f50ac14
Show file tree

Hide file tree

Showing 8 changed files with 243 additions and 57 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.6.0
+current_version = 0.7.0
 commit = False
 tag = False
 

diff --git a/README.md b/README.md
@@ -2,4 +2,65 @@
 
 Caching for microcosm microservices.
 
+Provides a generic caching client, as well as as some common decorators / patterns to manage result caching
+
+In general, caching is useful because:
+* lookups are costly (either because they incur a network or computational cost)
+* data may rarely change, relative to how often it's read
+
 [![CircleCI](https://circleci.com/gh/globality-corp/microcosm-caching.svg?style=svg&circle-token=4d985d6947b5d753c6f3b779a2475f389e7c0ef1)](https://circleci.com/gh/globality-corp/microcosm-caching)
+
+## Usage ##
+This library exposes a `resource_cache` component in its entry points, automatically configuring a caching client
+for general use for direct cache manipulation.
+
+Common patterns have emerged out of common usages of this component, however, which we generalize into a general caching
+strategy via several decorators.
+
+### Decorators ###
+`cached`:
+```python
+from typing import Type
+from marshmallow import Schema
+
+def cached(component, schema: Type[Schema], cache_prefix: str, ttl: int = DEFAULT_TTL):
+    pass
+
+# Example usage
+return cached(component, ExampleSchema, "prefix")(component.func)
+```
+This performs a basic "get and set" for a result from a decorated function.
+
+`invalidates` / `invalidates_batch`
+```python
+from typing import List
+
+def invalidates(component, invalidations: List[Invalidation], cache_prefix, lock_ttl=DEFAULT_LOCK_TTL):
+    pass
+# example usage
+return invalidates(component, cache_prefix=CACHE_PREFIX, invalidations=[
+    Invalidation(
+        ExampleSchema,
+        arguments=["example_id"],
+    )
+])(component.func)
+```
+This will allow for the invalidation of a set of keys based on the *input* params of the function, such that they match
+how a given resource is cached.
+
+This may be useful if you want to invalidate a resource based on the creation of another resource (e.g. the creation of
+another associated event with that resource).
+
+This does add a limitation that those given params need to provide some link to the related resource. This may be
+difficult in some cases, as that parameter may not exist.
+
+An additional detail to the above is the actual invalidation strategy. Instead of directly deleting the cache key,
+invalidation will render a given resource as uncacheable for a period of time, during which caching reads won't do anything.
+
+This is done because we commonly cache in conjunction with another service such as a database, meaning that a
+given request may not be done until that other service has completed (imagine a case where we invalidate, try to commit,
+and find that that operation takes longer than we expect). If we were to simply delete, that would allow an interleaved
+read to re-cache the now-stale data, if it happened to read right between the cache delete and a database commit.
+
+So, as an important caveat: if that secondary operation starts taking longer than the lock TTL, you may find yourself
+possibly caching stale data.
diff --git a/microcosm_caching/build_info.py b/microcosm_caching/build_info.py
@@ -0,0 +1,29 @@
+"""
+Store build information.
+
+"""
+from dataclasses import dataclass
+from typing import Optional
+
+from microcosm.api import defaults, typed
+
+
+@dataclass
+class BuildInfo:
+    build_num: Optional[str]
+    sha1: Optional[str]
+
+
+@defaults(
+    build_num=typed(str, default_value=None),
+    sha1=typed(str, default_value=None),
+)
+def configure_build_info(graph):
+    """
+    Configure build info
+
+    """
+    return BuildInfo(
+        build_num=graph.config.build_info.build_num,
+        sha1=graph.config.build_info.sha1,
+    )
diff --git a/microcosm_caching/decorators.py b/microcosm_caching/decorators.py
@@ -11,11 +11,12 @@
     Type,
 )
 
-from marshmallow import Schema
+from marshmallow import EXCLUDE, Schema
 from microcosm.errors import NotBoundError
 from pymemcache.exceptions import MemcacheError
 
 from microcosm_caching.base import CacheBase
+from microcosm_caching.build_info import BuildInfo
 
 
 DEFAULT_TTL = 60 * 60  # Cache for an hour by default
@@ -29,6 +30,11 @@ def get_metrics(graph):
         return None
 
 
+def get_build_version(graph) -> Optional[str]:
+    build_info: BuildInfo = graph.build_info
+    return build_info.sha1
+
+
 @dataclass
 class Invalidation:
     schema: Type[Schema]
@@ -58,18 +64,24 @@ def from_kwargs(self, kwargs) -> Dict[str, Any]:
         return invalidation_kwargs
 
 
-def cache_key(cache_prefix, schema, args, kwargs) -> str:
+def cache_key(cache_prefix, schema, args, kwargs, version: Optional[str] = None) -> str:
     """
     Hash a key according to the schema and input args.
 
     """
     key = (schema.__name__,) + args
     key += tuple(sorted((a, b) for a, b in kwargs.items()))
 
-    return sha1(f"{cache_prefix}:{key}".encode("utf-8")).hexdigest()
+    return sha1(f"{cache_prefix}:{version}:{key}".encode("utf-8")).hexdigest()
 
 
-def cached(component, schema: Type[Schema], cache_prefix: str, ttl: int = DEFAULT_TTL):
+def cached(
+    component,
+    schema: Type[Schema],
+    cache_prefix: Optional[str] = None,
+    ttl: int = DEFAULT_TTL,
+    schema_version: Optional[str] = None,
+):
     """
     Caches the result of a decorated component function, given that the both the underlying
     function and the component itself adhere to a given structure.
@@ -90,8 +102,10 @@ def cached(component, schema: Type[Schema], cache_prefix: str, ttl: int = DEFAUL
 
     :param component: A microcosm-based component
     :param schema: The schema corresponding to the response type of the component
-    :param cache_prefix: Namespace to use for cache keys
+    :param cache_prefix: Namespace to use for cache keys. Defaults to the name attached to the graph
     :param ttl: How long to cache the underlying resource
+    :param schema_version: The version of this schema. Used as part of the cache key. If not supplied,
+                           will default to the build version, if supplied
     :return: the resource (i.e. loaded schema instance)
     """
     logger: Logger = getattr(component, "logger")
@@ -100,6 +114,9 @@ def cached(component, schema: Type[Schema], cache_prefix: str, ttl: int = DEFAUL
     metrics = get_metrics(graph)
     resource_cache: CacheBase = graph.resource_cache
 
+    version = schema_version or get_build_version(graph)
+    cache_prefix = cache_prefix or graph.metadata.name
+
     def retrieve_from_cache(key: str):
         start_time = perf_counter()
 
@@ -145,7 +162,7 @@ def cache(*args, **kwargs) -> Schema:
                 return func(*args, **kwargs)
 
             try:
-                key = cache_key(cache_prefix, schema, args, kwargs)
+                key = cache_key(cache_prefix, schema, args, kwargs, version)
                 cached_resource = retrieve_from_cache(key)
                 if not cached_resource:
                     resource = func(*args, **kwargs)
@@ -155,7 +172,7 @@ def cache(*args, **kwargs) -> Schema:
                 # NB: We're caching the serialized format of the resource, meaning
                 # we need to do a (wasteful) load here to enable it to be dumped correctly
                 # later on in the flow. This could probably be made more efficient
-                return schema().load(cached_resource, unknown="exclude")
+                return schema().load(cached_resource, unknown=EXCLUDE)
             except (MemcacheError, ConnectionRefusedError) as error:
                 logger.warning("Unable to retrieve/save cache data", extra=dict(error=error))
                 return func(*args, **kwargs)
@@ -167,8 +184,9 @@ def cache(*args, **kwargs) -> Schema:
 def invalidates(
     component,
     invalidations: List[Invalidation],
-    cache_prefix,
-    lock_ttl=DEFAULT_LOCK_TTL
+    cache_prefix: Optional[str] = None,
+    lock_ttl=DEFAULT_LOCK_TTL,
+    schema_version: Optional[str] = None,
 ):
     """
     Invalidates a set of prescribed keys, based on a combination of:
@@ -185,6 +203,9 @@ def invalidates(
     metrics = get_metrics(graph)
     resource_cache: CacheBase = graph.resource_cache
 
+    version = schema_version or get_build_version(graph)
+    cache_prefix = cache_prefix or graph.metadata.name
+
     def delete_from_cache(values) -> None:
         """
         "Delete" from cache by locking writes to a key for a designated
@@ -219,7 +240,7 @@ def cache(*args, **kwargs) -> Schema:
                 invalidation_kwargs = invalidation.from_kwargs(kwargs)
 
                 # NB: We assume that we don't cache via args
-                key = cache_key(cache_prefix, invalidation.schema, (), invalidation_kwargs)
+                key = cache_key(cache_prefix, invalidation.schema, (), invalidation_kwargs, version)
                 values[key] = None
 
             result = func(*args, **kwargs)

diff --git a/microcosm_caching/tests/test_build_info.py b/microcosm_caching/tests/test_build_info.py
@@ -0,0 +1,23 @@
+"""
+Unit tests for build_info
+
+"""
+from hamcrest import assert_that, is_
+from microcosm.api import create_object_graph, load_from_dict
+
+
+class TestBuildInfo:
+
+    def test_cached(self):
+        graph = create_object_graph(
+            "test",
+            testing=True,
+            loader=load_from_dict(dict(
+                build_info=dict(sha1="asdf1234", build_num="5"),
+            )),
+        )
+        graph.use(
+            "build_info",
+        )
+
+        assert_that(graph.build_info.sha1, is_("asdf1234"))