Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
fbce5b0
Update hierarchical_reflective_optimizer.py
vincentkoc Nov 25, 2025
a97014e
Update few_shot_bayesian_optimizer.py
vincentkoc Nov 25, 2025
e6c6301
Update evaluation_ops.py
vincentkoc Nov 25, 2025
3f28b5c
Update _llm_calls.py
vincentkoc Nov 25, 2025
7599d75
Update _llm_calls.py
vincentkoc Nov 25, 2025
0436631
Update base_optimizer.py
vincentkoc Nov 25, 2025
6b0a766
Update base_optimizer.py
vincentkoc Nov 25, 2025
281fc8c
Update base_optimizer.py
vincentkoc Nov 25, 2025
cebd3c0
Update base_optimizer.py
vincentkoc Nov 25, 2025
2001b83
Update gepa_optimizer.py
vincentkoc Nov 25, 2025
feeb635
Update gepa_optimizer.py
vincentkoc Nov 25, 2025
0c9ef00
Update _llm_calls.py
vincentkoc Nov 25, 2025
58853ab
Update evaluation_ops.py
vincentkoc Nov 25, 2025
030a9ee
Update hierarchical_reflective_optimizer.py
vincentkoc Nov 25, 2025
69940f9
Update gepa_optimizer.py
vincentkoc Nov 25, 2025
4b80f08
Update few_shot_bayesian_optimizer.py
vincentkoc Nov 25, 2025
b800388
Update evaluation_ops.py
vincentkoc Nov 25, 2025
45044dd
Update evaluation_ops.py
vincentkoc Nov 25, 2025
c50bbfc
Update evaluation_ops.py
vincentkoc Nov 25, 2025
4118c66
Update few_shot_bayesian_optimizer.py
vincentkoc Nov 25, 2025
e1f2296
Update gepa_optimizer.py
vincentkoc Nov 25, 2025
7d6b9ef
Update base_optimizer.py
vincentkoc Nov 25, 2025
8fa2a37
Update base_optimizer.py
vincentkoc Nov 25, 2025
4ec3355
Update _llm_calls.py
vincentkoc Nov 25, 2025
3b8dbbd
Update optimizable_agent.py
vincentkoc Nov 25, 2025
29fa52b
chore: update
vincentkoc Nov 25, 2025
fafc851
Update sdks/opik_optimizer/src/opik_optimizer/_llm_calls.py
vincentkoc Nov 25, 2025
c17d6be
Update sdks/opik_optimizer/src/opik_optimizer/base_optimizer.py
vincentkoc Nov 25, 2025
4294a83
Update sdks/opik_optimizer/src/opik_optimizer/optimizable_agent.py
vincentkoc Nov 25, 2025
56a5e22
fix
vincentkoc Nov 25, 2025
88e4c98
Update base_optimizer.py
vincentkoc Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 88 additions & 6 deletions sdks/opik_optimizer/src/opik_optimizer/_llm_calls.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Any
from collections.abc import Sequence
from pydantic import BaseModel, ValidationError as PydanticValidationError
import json
import logging
import os
import sys
from types import FrameType

Expand Down Expand Up @@ -39,6 +41,57 @@ def _increment_llm_counter_if_needed() -> None:
frame = frame.f_back


def _get_optimizer_short_name_from_stack() -> str | None:
"""
Walk up the call stack to find the optimizer and return its short name.
"""
try:
from .base_optimizer import BaseOptimizer
except Exception:
return None

try:
frame: FrameType | None = sys._getframe()
except ValueError:
return None

while frame is not None:
optimizer_candidate = frame.f_locals.get("self")
if isinstance(optimizer_candidate, BaseOptimizer):
return optimizer_candidate._get_optimizer_short_name()
frame = frame.f_back

return None


def _tag_trace_from_stack(
phase: str | None = "Prompt Optimization",
extra_tags: Sequence[str] | None = None,
) -> None:
"""
Walk up the call stack and tag the current trace using the optimizer helper.
"""
try:
from .base_optimizer import BaseOptimizer
except Exception:
return

try:
frame: FrameType | None = sys._getframe()
except ValueError:
return

while frame is not None:
optimizer_candidate = frame.f_locals.get("self")
if isinstance(optimizer_candidate, BaseOptimizer):
try:
optimizer_candidate._tag_trace(phase=phase, extra_tags=extra_tags)
except Exception:
pass
break
frame = frame.f_back


def _build_call_time_params(
temperature: float | None = None,
max_tokens: int | None = None,
Expand Down Expand Up @@ -104,8 +157,27 @@ def _prepare_model_params(
# Merge optimizer's model_parameters with call-time overrides
merged_params = {**model_parameters, **call_time_params}

# Add Opik monitoring wrapper
final_params = opik_litellm_monitor.try_add_opik_monitoring_to_params(merged_params)
# Add Opik monitoring wrapper unless tracing is disabled or API key is missing.
# FIXME: Tracing without a valid API key is not supported by Opik; we skip attaching
# the monitor to avoid 401/402s. To enable tracing locally, set OPIK_API_KEY (and
# OPIK_BASE_URL/OPIK_WORKSPACE if using a non-default host). Otherwise set
# OPIK_TRACK_DISABLE=true to silence tracing.
tracing_disabled = os.getenv("OPIK_TRACK_DISABLE", "false").lower() in (
"true",
"1",
"yes",
"on",
)
api_key_present = bool(os.getenv("OPIK_API_KEY"))
if tracing_disabled or not api_key_present:
final_params = merged_params.copy()
else:
try:
final_params = opik_litellm_monitor.try_add_opik_monitoring_to_params(
merged_params
)
except Exception:
final_params = merged_params.copy()

# Add reasoning metadata if applicable
if is_reasoning and "metadata" in final_params:
Expand All @@ -122,10 +194,18 @@ def _prepare_model_params(

# Add tags if optimization_id is available
if optimization_id:
opik_metadata["tags"] = [
optimization_id,
"Prompt Optimization",
]
tags = []
# Add optimizer short name if available
optimizer_short_name = _get_optimizer_short_name_from_stack()
if optimizer_short_name:
tags.append(optimizer_short_name)
tags.extend(
[
optimization_id,
"Prompt Optimization",
]
)
opik_metadata["tags"] = tags

# Add structured output support
if response_model is not None:
Expand Down Expand Up @@ -257,6 +337,7 @@ def call_model(
Otherwise, returns the raw string response.
"""
_increment_llm_counter_if_needed()
_tag_trace_from_stack()

# Build dict of call-time LiteLLM parameter overrides (non-None only)
call_time_params = _build_call_time_params(
Expand Down Expand Up @@ -334,6 +415,7 @@ async def call_model_async(
Otherwise, returns the raw string response.
"""
_increment_llm_counter_if_needed()
_tag_trace_from_stack()

# Build dict of call-time LiteLLM parameter overrides (non-None only)
call_time_params = _build_call_time_params(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from ....api_objects import chat_prompt
from ....mcp_utils.mcp_workflow import MCPExecutionConfig
import opik
from opik import opik_context
import copy

if TYPE_CHECKING: # pragma: no cover - typing only
Expand Down Expand Up @@ -63,6 +62,7 @@ def evaluate_prompt(
)
try:
agent = optimizer._instantiate_agent(new_prompt)
optimizer._set_agent_trace_phase(agent, "Evaluation") # type: ignore[attr-defined]
except Exception:
return 0.0

Expand All @@ -73,16 +73,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:

if mcp_execution_config is None:
model_output = agent.invoke(messages)

# Add tags to trace for optimization tracking
if (
hasattr(optimizer, "current_optimization_id")
and optimizer.current_optimization_id
):
opik_context.update_current_trace(
tags=[optimizer.current_optimization_id, "Evaluation"]
)

return {"llm_output": model_output}

coordinator = mcp_execution_config.coordinator
Expand Down Expand Up @@ -121,15 +111,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
else:
final_response = raw_model_output

# Add tags to trace for optimization tracking
if (
hasattr(optimizer, "current_optimization_id")
and optimizer.current_optimization_id
):
opik_context.update_current_trace(
tags=[optimizer.current_optimization_id, "Evaluation"]
)

return {"llm_output": final_response.strip()}

score = task_evaluator.evaluate(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import optuna.pruners

import opik
from opik import Dataset, opik_context
from opik import Dataset
from pydantic import BaseModel

from ... import base_optimizer, _llm_calls, helpers
Expand Down Expand Up @@ -847,6 +847,7 @@ def _build_task_from_messages(
new_prompt = prompt.copy()
new_prompt.set_messages(messages)
agent = self._instantiate_agent(new_prompt)
self._set_agent_trace_phase(agent, "Evaluation") # type: ignore[attr-defined]

def llm_task(dataset_item: dict[str, Any]) -> dict[str, Any]:
"""
Expand All @@ -868,12 +869,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, Any]:

result = agent.invoke(messages, seed=self.seed)

# Add tags to trace for optimization tracking
if self.current_optimization_id:
opik_context.update_current_trace(
tags=[self.current_optimization_id, "Evaluation"]
)

return {"llm_output": result}

return llm_task
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections.abc import Callable

import opik
from opik import Dataset, opik_context
from opik import Dataset
from opik.evaluation import evaluator as opik_evaluator
from opik.evaluation.metrics.score_result import ScoreResult

Expand Down Expand Up @@ -622,14 +622,11 @@ def _tie_break(idx: int) -> tuple[float, float, int]:
final_llm_agent = self._create_agent_for_prompt(
final_prompt, project_name=analysis_project_name
)
self._set_agent_trace_phase(final_llm_agent, "Evaluation") # type: ignore[attr-defined]

def final_llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
messages = final_prompt.get_messages(dataset_item)
raw = final_llm_agent.invoke(messages)
if self.current_optimization_id:
opik_context.update_current_trace(
tags=[self.current_optimization_id, "Evaluation"]
)
return {"llm_output": raw.strip()}

metric_class = _create_metric_class(metric)
Expand Down Expand Up @@ -670,6 +667,7 @@ def final_llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
agent = self._create_agent_for_prompt(
analysis_prompt, project_name=analysis_project_name
)
self._set_agent_trace_phase(agent, "Evaluation") # type: ignore[attr-defined]
for item in train_items:
messages = analysis_prompt.get_messages(item)
output_text = agent.invoke(messages).strip()
Expand Down Expand Up @@ -893,17 +891,12 @@ def _evaluate_prompt_logged(
prompt.model_kwargs = self.model_parameters

agent = self._create_agent_for_prompt(prompt)
self._set_agent_trace_phase(agent, "Evaluation") # type: ignore[attr-defined]

def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
messages = prompt.get_messages(dataset_item)
raw = agent.invoke(messages)

# Add tags to trace for optimization tracking
if self.current_optimization_id:
opik_context.update_current_trace(
tags=[self.current_optimization_id, "Evaluation"]
)

return {"llm_output": raw.strip()}

configuration_updates = helpers.drop_none({"gepa": extra_metadata})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import cast

import opik
from opik import opik_context
from opik.evaluation.evaluation_result import EvaluationResult
from opik.evaluation import evaluator as opik_evaluator

Expand Down Expand Up @@ -159,6 +158,7 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
messages = new_prompt.get_messages(dataset_item)
new_prompt.set_messages(messages)
agent = self._instantiate_agent(prompt=new_prompt)
self._set_agent_trace_phase(agent, "Evaluation") # type: ignore[attr-defined]

try:
logger.debug(
Expand All @@ -177,12 +177,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:

cleaned_model_output = raw_model_output.strip()

# Add tags to trace for optimization tracking
if self.current_optimization_id:
opik_context.update_current_trace(
tags=[self.current_optimization_id, "Evaluation"]
)

result = {
"llm_output": cleaned_model_output,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import random

import opik
from opik import opik_context

from ....api_objects import chat_prompt
from .... import task_evaluator, helpers
Expand Down Expand Up @@ -104,6 +103,7 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
messages = new_prompt.get_messages(dataset_item)
new_prompt.set_messages(messages)
agent = optimizer._instantiate_agent(new_prompt)
optimizer._set_agent_trace_phase(agent, "Evaluation") # type: ignore[attr-defined]

if mcp_config is not None:
coordinator = mcp_config.coordinator
Expand Down Expand Up @@ -174,15 +174,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:

cleaned_model_output = raw_model_output.strip()

# Add tags to trace for optimization tracking
if optimizer.current_optimization_id:
opik_context.update_current_trace(
tags=[
optimizer.current_optimization_id,
"Evaluation",
]
)

result = {
"llm_output": cleaned_model_output,
}
Expand Down
Loading