comet-ml · vincentkoc · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
@@ -1,7 +1,9 @@
 from typing import Any
+from collections.abc import Sequence
 from pydantic import BaseModel, ValidationError as PydanticValidationError
 import json
 import logging
+import os
 import sys
 from types import FrameType
 
@@ -39,6 +41,57 @@ def _increment_llm_counter_if_needed() -> None:
         frame = frame.f_back
 
 
+def _get_optimizer_short_name_from_stack() -> str | None:
+    """
+    Walk up the call stack to find the optimizer and return its short name.
+    """
+    try:
+        from .base_optimizer import BaseOptimizer
+    except Exception:
+        return None
+
+    try:
+        frame: FrameType | None = sys._getframe()
+    except ValueError:
+        return None
+
+    while frame is not None:
+        optimizer_candidate = frame.f_locals.get("self")
+        if isinstance(optimizer_candidate, BaseOptimizer):
+            return optimizer_candidate._get_optimizer_short_name()
+        frame = frame.f_back
+
+    return None
+
+
+def _tag_trace_from_stack(
+    phase: str | None = "Prompt Optimization",
+    extra_tags: Sequence[str] | None = None,
+) -> None:
+    """
+    Walk up the call stack and tag the current trace using the optimizer helper.
+    """
+    try:
+        from .base_optimizer import BaseOptimizer
+    except Exception:
+        return
+
+    try:
+        frame: FrameType | None = sys._getframe()
+    except ValueError:
+        return
+
+    while frame is not None:
+        optimizer_candidate = frame.f_locals.get("self")
+        if isinstance(optimizer_candidate, BaseOptimizer):
+            try:
+                optimizer_candidate._tag_trace(phase=phase, extra_tags=extra_tags)
+            except Exception:
+                pass
+            break
+        frame = frame.f_back
+
+
 def _build_call_time_params(
     temperature: float | None = None,
     max_tokens: int | None = None,
@@ -104,8 +157,27 @@ def _prepare_model_params(
     # Merge optimizer's model_parameters with call-time overrides
     merged_params = {**model_parameters, **call_time_params}
 
-    # Add Opik monitoring wrapper
-    final_params = opik_litellm_monitor.try_add_opik_monitoring_to_params(merged_params)
+    # Add Opik monitoring wrapper unless tracing is disabled or API key is missing.
+    # FIXME: Tracing without a valid API key is not supported by Opik; we skip attaching
+    # the monitor to avoid 401/402s. To enable tracing locally, set OPIK_API_KEY (and
+    # OPIK_BASE_URL/OPIK_WORKSPACE if using a non-default host). Otherwise set
+    # OPIK_TRACK_DISABLE=true to silence tracing.
+    tracing_disabled = os.getenv("OPIK_TRACK_DISABLE", "false").lower() in (
+        "true",
+        "1",
+        "yes",
+        "on",
+    )
+    api_key_present = bool(os.getenv("OPIK_API_KEY"))
+    if tracing_disabled or not api_key_present:
+        final_params = merged_params.copy()
+    else:
+        try:
+            final_params = opik_litellm_monitor.try_add_opik_monitoring_to_params(
+                merged_params
+            )
+        except Exception:
+            final_params = merged_params.copy()
 
     # Add reasoning metadata if applicable
     if is_reasoning and "metadata" in final_params:
@@ -122,10 +194,18 @@ def _prepare_model_params(
 
     # Add tags if optimization_id is available
     if optimization_id:
-        opik_metadata["tags"] = [
-            optimization_id,
-            "Prompt Optimization",
-        ]
+        tags = []
+        # Add optimizer short name if available
+        optimizer_short_name = _get_optimizer_short_name_from_stack()
+        if optimizer_short_name:
+            tags.append(optimizer_short_name)
+        tags.extend(
+            [
+                optimization_id,
+                "Prompt Optimization",
+            ]
+        )
+        opik_metadata["tags"] = tags
 
     # Add structured output support
     if response_model is not None:
@@ -257,6 +337,7 @@ def call_model(
         Otherwise, returns the raw string response.
     """
     _increment_llm_counter_if_needed()
+    _tag_trace_from_stack()
 
     # Build dict of call-time LiteLLM parameter overrides (non-None only)
     call_time_params = _build_call_time_params(
@@ -334,6 +415,7 @@ async def call_model_async(
         Otherwise, returns the raw string response.
     """
     _increment_llm_counter_if_needed()
+    _tag_trace_from_stack()
 
     # Build dict of call-time LiteLLM parameter overrides (non-None only)
     call_time_params = _build_call_time_params(

@@ -6,7 +6,6 @@
 from ....api_objects import chat_prompt
 from ....mcp_utils.mcp_workflow import MCPExecutionConfig
 import opik
-from opik import opik_context
 import copy
 
 if TYPE_CHECKING:  # pragma: no cover - typing only
@@ -63,6 +62,7 @@ def evaluate_prompt(
     )
     try:
         agent = optimizer._instantiate_agent(new_prompt)
+        optimizer._set_agent_trace_phase(agent, "Evaluation")  # type: ignore[attr-defined]
     except Exception:
         return 0.0
 
@@ -73,16 +73,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
 
         if mcp_execution_config is None:
             model_output = agent.invoke(messages)
-
-            # Add tags to trace for optimization tracking
-            if (
-                hasattr(optimizer, "current_optimization_id")
-                and optimizer.current_optimization_id
-            ):
-                opik_context.update_current_trace(
-                    tags=[optimizer.current_optimization_id, "Evaluation"]
-                )
-
             return {"llm_output": model_output}
 
         coordinator = mcp_execution_config.coordinator
@@ -121,15 +111,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
         else:
             final_response = raw_model_output
 
-        # Add tags to trace for optimization tracking
-        if (
-            hasattr(optimizer, "current_optimization_id")
-            and optimizer.current_optimization_id
-        ):
-            opik_context.update_current_trace(
-                tags=[optimizer.current_optimization_id, "Evaluation"]
-            )
-
         return {"llm_output": final_response.strip()}
 
     score = task_evaluator.evaluate(

@@ -13,7 +13,7 @@
 import optuna.pruners
 
 import opik
-from opik import Dataset, opik_context
+from opik import Dataset
 from pydantic import BaseModel
 
 from ... import base_optimizer, _llm_calls, helpers
@@ -847,6 +847,7 @@ def _build_task_from_messages(
         new_prompt = prompt.copy()
         new_prompt.set_messages(messages)
         agent = self._instantiate_agent(new_prompt)
+        self._set_agent_trace_phase(agent, "Evaluation")  # type: ignore[attr-defined]
 
         def llm_task(dataset_item: dict[str, Any]) -> dict[str, Any]:
             """
@@ -868,12 +869,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, Any]:
 
             result = agent.invoke(messages, seed=self.seed)
 
-            # Add tags to trace for optimization tracking
-            if self.current_optimization_id:
-                opik_context.update_current_trace(
-                    tags=[self.current_optimization_id, "Evaluation"]
-                )
-
             return {"llm_output": result}
 
         return llm_task
@@ -4,7 +4,7 @@
 from collections.abc import Callable
 
 import opik
-from opik import Dataset, opik_context
+from opik import Dataset
 from opik.evaluation import evaluator as opik_evaluator
 from opik.evaluation.metrics.score_result import ScoreResult
 
@@ -622,14 +622,11 @@ def _tie_break(idx: int) -> tuple[float, float, int]:
                     final_llm_agent = self._create_agent_for_prompt(
                         final_prompt, project_name=analysis_project_name
                     )
+                    self._set_agent_trace_phase(final_llm_agent, "Evaluation")  # type: ignore[attr-defined]
 
                     def final_llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
                         messages = final_prompt.get_messages(dataset_item)
                         raw = final_llm_agent.invoke(messages)
-                        if self.current_optimization_id:
-                            opik_context.update_current_trace(
-                                tags=[self.current_optimization_id, "Evaluation"]
-                            )
                         return {"llm_output": raw.strip()}
 
                     metric_class = _create_metric_class(metric)
@@ -670,6 +667,7 @@ def final_llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
             agent = self._create_agent_for_prompt(
                 analysis_prompt, project_name=analysis_project_name
             )
+            self._set_agent_trace_phase(agent, "Evaluation")  # type: ignore[attr-defined]
             for item in train_items:
                 messages = analysis_prompt.get_messages(item)
                 output_text = agent.invoke(messages).strip()
@@ -893,17 +891,12 @@ def _evaluate_prompt_logged(
             prompt.model_kwargs = self.model_parameters
 
         agent = self._create_agent_for_prompt(prompt)
+        self._set_agent_trace_phase(agent, "Evaluation")  # type: ignore[attr-defined]
 
         def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
             messages = prompt.get_messages(dataset_item)
             raw = agent.invoke(messages)
 
-            # Add tags to trace for optimization tracking
-            if self.current_optimization_id:
-                opik_context.update_current_trace(
-                    tags=[self.current_optimization_id, "Evaluation"]
-                )
-
             return {"llm_output": raw.strip()}
 
         configuration_updates = helpers.drop_none({"gepa": extra_metadata})

@@ -2,7 +2,6 @@
 from typing import cast
 
 import opik
-from opik import opik_context
 from opik.evaluation.evaluation_result import EvaluationResult
 from opik.evaluation import evaluator as opik_evaluator
 
@@ -159,6 +158,7 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
             messages = new_prompt.get_messages(dataset_item)
             new_prompt.set_messages(messages)
             agent = self._instantiate_agent(prompt=new_prompt)
+            self._set_agent_trace_phase(agent, "Evaluation")  # type: ignore[attr-defined]
 
             try:
                 logger.debug(
@@ -177,12 +177,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
 
             cleaned_model_output = raw_model_output.strip()
 
-            # Add tags to trace for optimization tracking
-            if self.current_optimization_id:
-                opik_context.update_current_trace(
-                    tags=[self.current_optimization_id, "Evaluation"]
-                )
-
             result = {
                 "llm_output": cleaned_model_output,
             }

@@ -10,7 +10,6 @@
 import random
 
 import opik
-from opik import opik_context
 
 from ....api_objects import chat_prompt
 from .... import task_evaluator, helpers
@@ -104,6 +103,7 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
         messages = new_prompt.get_messages(dataset_item)
         new_prompt.set_messages(messages)
         agent = optimizer._instantiate_agent(new_prompt)
+        optimizer._set_agent_trace_phase(agent, "Evaluation")  # type: ignore[attr-defined]
 
         if mcp_config is not None:
             coordinator = mcp_config.coordinator
@@ -174,15 +174,6 @@ def llm_task(dataset_item: dict[str, Any]) -> dict[str, str]:
 
             cleaned_model_output = raw_model_output.strip()
 
-        # Add tags to trace for optimization tracking
-        if optimizer.current_optimization_id:
-            opik_context.update_current_trace(
-                tags=[
-                    optimizer.current_optimization_id,
-                    "Evaluation",
-                ]
-            )
-
         result = {
             "llm_output": cleaned_model_output,
         }