Skip to content

Commit

Permalink
Merge pull request #102 from codelion/fix-args
Browse files Browse the repository at this point in the history
Fix args
  • Loading branch information
codelion authored Nov 25, 2024
2 parents 572a0c5 + 90bef2e commit 451f4bf
Show file tree
Hide file tree
Showing 3 changed files with 294 additions and 3 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ response = client.chat.completions.create(
| Plugin | Slug | Description |
| ----------------------- | ------------------ | ---------------------------------------------------------------------------------------------- |
| Router | `router` | Uses the [optillm-bert-uncased](https://huggingface.co/codelion/optillm-bert-uncased) model to route requests to different approaches based on the user prompt |
| Chain-of-Code | `coc` | Implements a chain of code approach that combines CoT with code execution and LLM based code simulation |
| Memory | `memory` | Implements a short term memory layer, enables you to use unbounded context length with any LLM |
| Privacy | `privacy` | Anonymize PII data in request and deanonymize it back to original value in response |
| Read URLs | `readurls` | Reads all URLs found in the request, fetches the content at the URL and adds it to the context |
Expand Down Expand Up @@ -290,6 +291,20 @@ Authorization: Bearer your_secret_api_key
```
## SOTA results on benchmarks with optillm

### coc-claude-3-5-sonnet-20241022 on AIME 2024 pass@1 (Nov 2024)

| Model | Score |
|-------|-----:|
| o1-mini | 56.67 |
| coc-claude-3-5-sonnet-20241022 | 46.67 |
| coc-gemini/gemini-exp-1121 | 46.67 |
| o1-preview | 40.00 |
| f1-preview | 40.00 |
| gemini-exp-1114 | 36.67 |
| claude-3-5-sonnet-20241022 | 20.00 |
| gemini-1.5-pro-002 | 20.00 |
| gemini-1.5-flash-002 | 16.67 |

### readurls&memory-gpt-4o-mini on Google FRAMES Benchmark (Oct 2024)
| Model | Accuracy |
| ----- | -------- |
Expand Down Expand Up @@ -324,6 +339,7 @@ called patchflows. We saw huge performance gains across all the supported patchf

## References

- [Chain of Code: Reasoning with a Language Model-Augmented Code Emulator](https://arxiv.org/abs/2312.04474) - [Implementation](https://github.com/codelion/optillm/blob/main/optillm/plugins/coc_plugin.py)
- [Entropy Based Sampling and Parallel CoT Decoding](https://github.com/xjdr-alt/entropix) - [Implementation](https://github.com/codelion/optillm/blob/main/optillm/entropy_decoding.py)
- [Fact, Fetch, and Reason: A Unified Evaluation of Retrieval-Augmented Generation](https://arxiv.org/abs/2409.12941) - [Evaluation script](https://github.com/codelion/optillm/blob/main/scripts/eval_frames_benchmark.py)
- [Writing in the Margins: Better Inference Pattern for Long Context Retrieval](https://www.arxiv.org/abs/2408.14906) - [Inspired the implementation of the memory plugin](https://github.com/codelion/optillm/blob/main/optillm/plugins/memory_plugin.py)
Expand Down
10 changes: 7 additions & 3 deletions optillm.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,10 @@ def proxy():
model = data.get('model', server_config['model'])

optillm_approach = data.get('optillm_approach', server_config['approach'])
logger.debug(data)
server_config['mcts_depth'] = data.get('mcts_depth', server_config['mcts_depth'])
server_config['mcts_exploration' ] = data.get('mcts_exploration', server_config['mcts_exploration'])
server_config['mcts_simulations'] = data.get('mcts_simulations', server_config['mcts_simulations'])

system_prompt, initial_query, message_optillm_approach = parse_conversation(messages)

Expand Down Expand Up @@ -522,7 +526,7 @@ def parse_args():
# Define arguments and their corresponding environment variables
args_env = [
("--optillm-api-key", "OPTILLM_API_KEY", str, "", "Optional API key for client authentication to optillm"),
("--approach", "OPTILLM_APPROACH", str, "auto", "Inference approach to use", known_approaches),
("--approach", "OPTILLM_APPROACH", str, "auto", "Inference approach to use", known_approaches + list(plugin_approaches.keys())),
("--mcts-simulations", "OPTILLM_SIMULATIONS", int, 2, "Number of MCTS simulations"),
("--mcts-exploration", "OPTILLM_EXPLORATION", float, 0.2, "Exploration weight for MCTS"),
("--mcts-depth", "OPTILLM_DEPTH", int, 1, "Simulation depth for MCTS"),
Expand Down Expand Up @@ -571,10 +575,10 @@ def parse_args():

def main():
global server_config
args = parse_args()

# Call this function at the start of main()
load_plugins()
args = parse_args()

# Update server_config with all argument values
server_config.update(vars(args))

Expand Down
271 changes: 271 additions & 0 deletions optillm/plugins/coc_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
import re
import logging
from typing import Tuple, Dict, Any, List
import ast
import traceback
import math
import importlib
import json

logger = logging.getLogger(__name__)

# Plugin identifier
SLUG = "coc"

# Maximum attempts to fix code
MAX_FIX_ATTEMPTS = 3

# List of allowed modules for execution
ALLOWED_MODULES = {
'math': math,
'numpy': 'numpy', # String indicates module should be imported in execution context
}

# Initial code generation prompt
CHAIN_OF_CODE_PROMPT = '''
Write Python code to solve this problem. The code should:
1. Break down the problem into clear computational steps
2. Use standard Python features and math operations
3. Store the final result in a variable named 'answer'
4. Include error handling where appropriate
5. Be complete and executable
Format your response using:
```python
[Your complete Python program here]
```
'''

# Code fix prompt
CODE_FIX_PROMPT = '''
The following Python code failed to execute. Fix the code to make it work.
Original code:
```python
{code}
```
Error encountered:
{error}
Please provide a complete, fixed version of the code that:
1. Addresses the error message
2. Maintains the same logic and approach
3. Stores the final result in 'answer'
4. Is complete and executable
Return only the fixed code in a code block:
```python
[Your fixed code here]
```
'''

# Simulation prompt
SIMULATION_PROMPT = '''
The following Python code could not be executed directly. Analyze the code and determine what the answer would be.
Pay special attention to:
1. The core computational logic, ignoring any visualization or display code
2. The key mathematical operations that determine the final answer
3. Any logic that affects the 'answer' variable
Code to analyze:
```python
{code}
```
Runtime error encountered:
{error}
Return ONLY the final value that would be in the 'answer' variable. Return just the value, no explanations.
'''

def extract_code_blocks(text: str) -> List[str]:
"""Extract Python code blocks from text."""
pattern = r'```python\s*(.*?)\s*```'
matches = re.findall(pattern, text, re.DOTALL)
blocks = [m.strip() for m in matches]
logger.info(f"Extracted {len(blocks)} code blocks")
for i, block in enumerate(blocks):
logger.info(f"Code block {i+1}:\n{block}")
return blocks

def sanitize_code(code: str) -> str:
"""Prepare code for execution by adding necessary imports and safety checks."""
# Add standard imports
imports = "\n".join(f"import {mod}" for mod in ALLOWED_MODULES)

# Remove or modify problematic visualization code
lines = code.split('\n')
safe_lines = []
for line in lines:
# Skip matplotlib-related imports and plotting commands
if any(x in line.lower() for x in ['matplotlib', 'plt.', '.plot(', '.show(', 'figure', 'subplot']):
continue
# Keep the line if it's not visualization-related
safe_lines.append(line)

safe_code = '\n'.join(safe_lines)

# Add safety wrapper
wrapper = f"""
{imports}
def safe_execute():
import numpy as np # Always allow numpy
{safe_code.replace('\n', '\n ')}
return answer if 'answer' in locals() else None
result = safe_execute()
answer = result
"""
return wrapper

def execute_code(code: str) -> Tuple[Any, str]:
"""Attempt to execute the code and return result or error."""
logger.info("Attempting to execute code")
logger.info(f"Code:\n{code}")

try:
# Create a clean environment
execution_env = {}

# Execute the code as-is
exec(code, execution_env)

# Look for answer variable
if 'answer' in execution_env:
answer = execution_env['answer']
logger.info(f"Execution successful. Answer: {answer}")
return answer, None
else:
error = "Code executed but did not produce an answer variable"
logger.warning(error)
return None, error

except Exception as e:
error = str(e)
logger.error(f"Execution failed: {error}")
return None, error

def generate_fixed_code(original_code: str, error: str, client, model: str) -> Tuple[str, int]:
"""Ask LLM to fix the broken code."""
logger.info("Requesting code fix from LLM")
logger.info(f"Original error: {error}")

response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": CODE_FIX_PROMPT.format(
code=original_code, error=error)},
{"role": "user", "content": "Fix the code to make it work."}
],
temperature=0.2
)

fixed_code = response.choices[0].message.content
code_blocks = extract_code_blocks(fixed_code)

if code_blocks:
logger.info("Received fixed code from LLM")
return code_blocks[0], response.usage.completion_tokens
else:
logger.warning("No code block found in LLM response")
return None, response.usage.completion_tokens

def simulate_execution(code: str, error: str, client, model: str) -> Tuple[Any, int]:
"""Ask LLM to simulate code execution."""
logger.info("Attempting code simulation with LLM")

response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": SIMULATION_PROMPT.format(
code=code, error=error)},
{"role": "user", "content": "Simulate this code and return the final answer value."}
],
temperature=0.2
)

try:
result = response.choices[0].message.content.strip()
# Try to convert to appropriate type
try:
answer = ast.literal_eval(result)
except:
answer = result
logger.info(f"Simulation successful. Result: {answer}")
return answer, response.usage.completion_tokens
except Exception as e:
logger.error(f"Failed to parse simulation result: {str(e)}")
return None, response.usage.completion_tokens

def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]:
"""Main Chain of Code execution function."""
logger.info("Starting Chain of Code execution")
logger.info(f"Query: {initial_query}")

# Initial code generation
messages = [
{"role": "system", "content": system_prompt + "\n" + CHAIN_OF_CODE_PROMPT},
{"role": "user", "content": initial_query}
]

response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7
)
total_tokens = response.usage.completion_tokens

# Extract initial code
code_blocks = extract_code_blocks(response.choices[0].message.content)
if not code_blocks:
logger.warning("No code blocks found in response")
return response.choices[0].message.content, total_tokens

current_code = code_blocks[0]
fix_attempts = 0
last_error = None

# Strategy 1: Direct execution and fix attempts
while fix_attempts < MAX_FIX_ATTEMPTS:
fix_attempts += 1
logger.info(f"Execution attempt {fix_attempts}/{MAX_FIX_ATTEMPTS}")

# Try to execute current code
answer, error = execute_code(current_code)

# If successful, return the answer
if error is None:
logger.info(f"Successful execution on attempt {fix_attempts}")
return str(answer), total_tokens

last_error = error

# If we hit max attempts, break to try simulation
if fix_attempts >= MAX_FIX_ATTEMPTS:
logger.warning(f"Failed after {fix_attempts} fix attempts")
break

# Otherwise, try to get fixed code from LLM
logger.info(f"Requesting code fix, attempt {fix_attempts}")
fixed_code, fix_tokens = generate_fixed_code(current_code, error, client, model)
total_tokens += fix_tokens

if fixed_code:
current_code = fixed_code
else:
logger.error("Failed to get fixed code from LLM")
break

# Strategy 2: If all execution attempts failed, try simulation
logger.info("All execution attempts failed, trying simulation")
simulated_answer, sim_tokens = simulate_execution(current_code, last_error, client, model)
total_tokens += sim_tokens

if simulated_answer is not None:
logger.info("Successfully got answer from simulation")
return str(simulated_answer), total_tokens

# If we get here, everything failed
logger.warning("All strategies failed")
return f"Error: Could not solve problem after all attempts. Last error: {last_error}", total_tokens

0 comments on commit 451f4bf

Please sign in to comment.