-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcache_manager.py
92 lines (73 loc) · 2.62 KB
/
cache_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
Cache management utilities for Keywords4CV.
"""
import json
import os
import xxhash
import psutil
from typing import Dict, Any, Optional
from cachetools import LRUCache
# Constants
DEFAULT_CACHE_SIZE = 5000
DEFAULT_CACHE_SALT = "default_secret_salt"
CACHE_VERSION = "1.0.0"
def get_cache_salt(config: Dict[str, Any]) -> str:
"""
Retrieves the cache salt, prioritizing environment variables, then config, then a default.
Args:
config: The configuration dictionary
Returns:
str: The cache salt to use for hashing operations
"""
return os.environ.get(
"K4CV_CACHE_SALT",
config.get("caching", {}).get("cache_salt", DEFAULT_CACHE_SALT),
)
def calculate_optimal_cache_size(config: Dict[str, Any]) -> int:
"""
Calculate the optimal cache size based on available memory and configuration.
Args:
config: The configuration dictionary
Returns:
int: The calculated optimal cache size
"""
base_cache_size = config.get("caching", {}).get("cache_size", DEFAULT_CACHE_SIZE)
scaling_factor = config.get("hardware_limits", {}).get("memory_scaling_factor", 0.3)
if scaling_factor:
available_mb = psutil.virtual_memory().available / (1024 * 1024)
dynamic_size = int(available_mb / scaling_factor)
return min(base_cache_size, dynamic_size)
return base_cache_size
class ConfigHasher:
"""
Handles configuration hashing with intelligent cache invalidation.
"""
@staticmethod
def hash_config(
config: Dict[str, Any], salt: str, sections: Optional[list] = None
) -> str:
"""
Create a hash of relevant configuration sections.
Args:
config: Configuration dictionary
salt: Salt value for the hash
sections: Specific sections to include (if None, includes commonly cached sections)
Returns:
str: Hexadecimal hash of the configuration
"""
if sections is None:
sections = [
"stop_words",
"stop_words_add",
"stop_words_exclude",
"text_processing",
"caching",
"validation",
"keyword_categories",
]
relevant_config = {}
for section in sections:
if section in config:
relevant_config[section] = config.get(section)
config_str = json.dumps(relevant_config, sort_keys=True)
return xxhash.xxh3_64(f"{salt}_{config_str}".encode("utf-8")).hexdigest()