diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/buildResponseMetadata.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/buildResponseMetadata.ts index adf5de7171523..f74f5c3a0e45e 100644 --- a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/buildResponseMetadata.ts +++ b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/buildResponseMetadata.ts @@ -1,6 +1,7 @@ import type { EngineResponse } from 'n8n-workflow'; import { buildSteps } from '@utils/agent-execution'; +import type { TokenUsageData } from '@utils/agent-execution/AgentTokenTracker'; import type { RequestResponseMetadata } from '../types'; @@ -12,18 +13,46 @@ import type { RequestResponseMetadata } from '../types'; * * @param response - The optional engine response from previous tool execution * @param itemIndex - The current item index being processed - * @returns Metadata object with previousRequests and iterationCount + * @param currentTokens - Optional token usage from the current iteration + * @returns Metadata object with previousRequests, iterationCount, and accumulated tokens * */ export function buildResponseMetadata( response: EngineResponse | undefined, itemIndex: number, + currentTokens?: TokenUsageData, ): RequestResponseMetadata { const currentIterationCount = response?.metadata?.iterationCount ?? 0; + const previousRequests = buildSteps(response, itemIndex); + + // Merge current iteration tokens with previous accumulated tokens + let accumulatedTokens: TokenUsageData | undefined; + if (currentTokens || response?.metadata?.accumulatedTokens) { + const prev = response?.metadata?.accumulatedTokens ?? { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + isEstimate: false, + }; + const curr = currentTokens ?? { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + isEstimate: false, + }; + + accumulatedTokens = { + promptTokens: prev.promptTokens + curr.promptTokens, + completionTokens: prev.completionTokens + curr.completionTokens, + totalTokens: prev.totalTokens + curr.totalTokens, + isEstimate: prev.isEstimate || curr.isEstimate, + }; + } return { - previousRequests: buildSteps(response, itemIndex), itemIndex, + previousRequests, iterationCount: currentIterationCount + 1, + accumulatedTokens, }; } diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/prepareItemContext.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/prepareItemContext.ts index 8ff0e5577c12e..f2e95a46d387b 100644 --- a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/prepareItemContext.ts +++ b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/prepareItemContext.ts @@ -4,6 +4,7 @@ import { NodeOperationError } from 'n8n-workflow'; import type { IExecuteFunctions, ISupplyDataFunctions, EngineResponse } from 'n8n-workflow'; import { buildSteps, type ToolCallData } from '@utils/agent-execution'; +import { AgentTokenTracker } from '@utils/agent-execution/AgentTokenTracker'; import { getPromptInputByType } from '@utils/helpers'; import { getOptionalOutputParser } from '@utils/output_parsers/N8nOutputParser'; import type { N8nOutputParser } from '@utils/output_parsers/N8nOutputParser'; @@ -22,6 +23,7 @@ export type ItemContext = { prompt: ChatPromptTemplate; options: AgentOptions; outputParser: N8nOutputParser | undefined; + tokenTracker: AgentTokenTracker; }; /** @@ -66,6 +68,9 @@ export async function prepareItemContext( }); const prompt: ChatPromptTemplate = preparePrompt(messages); + // Create token tracker for this item + const tokenTracker = new AgentTokenTracker(); + return { itemIndex, input, @@ -74,5 +79,6 @@ export async function prepareItemContext( prompt, options, outputParser, + tokenTracker, }; } diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/runAgent.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/runAgent.ts index 6a6f9a46a9bd1..9cbc6bf2286d7 100644 --- a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/runAgent.ts +++ b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/runAgent.ts @@ -14,6 +14,7 @@ import { createEngineRequests, saveToMemory, } from '@utils/agent-execution'; +import { modelPricingService } from '@utils/modelPricing'; import { SYSTEM_MESSAGE } from '../../prompt'; import type { AgentResult, RequestResponseMetadata } from '../types'; @@ -21,6 +22,51 @@ import { buildResponseMetadata } from './buildResponseMetadata'; import type { ItemContext } from './prepareItemContext'; type RunAgentResult = AgentResult | EngineRequest; + +/** + * Helper function to add cost calculation to token usage + */ +async function addCostToTokenUsage( + tokenUsage: { + promptTokens: number; + completionTokens: number; + totalTokens: number; + isEstimate: boolean; + }, + model: BaseChatModel, +): Promise { + // Get model name from the model instance + // Try multiple properties as different LLM providers use different naming + const modelAny = model as any; + + const modelNameRaw = + modelAny.modelName || // Most common (OpenAI, Anthropic, etc.) + modelAny.model || // Alternative property + modelAny.name || // Some providers + modelAny._modelName || // Private property fallback + modelAny.caller?.model || // Nested in caller + modelAny.client?.modelName || // Nested in client + modelAny._modelType?.() || // Method fallback + 'unknown'; + + // Ensure modelName is a string + const modelName = + typeof modelNameRaw === 'string' ? modelNameRaw : String(modelNameRaw || 'unknown'); + + // Calculate cost + const estimatedCost = await modelPricingService.calculateCost( + modelName, + tokenUsage.promptTokens, + tokenUsage.completionTokens, + ); + + return { + ...tokenUsage, + estimatedCost, + modelName, + }; +} + /** * Runs the agent for a single item, choosing between streaming or non-streaming execution. * Handles both regular execution and execution after tool calls. @@ -50,7 +96,10 @@ export async function runAgent( formatting_instructions: 'IMPORTANT: For your response to user, you MUST use the `format_final_json_response` tool with your complete answer formatted according to the required schema. Do not attempt to format the JSON manually - always use this tool. Your response will be rejected if it is not properly formatted through this tool. Only use this tool once you are ready to provide your final answer.', }; - const executeOptions = { signal: ctx.getExecutionCancelSignal() }; + const executeOptions = { + signal: ctx.getExecutionCancelSignal(), + callbacks: [itemContext.tokenTracker], + }; // Check if streaming is actually available const isStreamingAvailable = 'isStreaming' in ctx ? ctx.isStreaming?.() : undefined; @@ -85,22 +134,42 @@ export async function runAgent( // If result contains tool calls, build the request object like the normal flow if (result.toolCalls && result.toolCalls.length > 0) { const actions = await createEngineRequests(result.toolCalls, itemIndex, tools); + const currentTokens = itemContext.tokenTracker.getAccumulatedTokens(); return { actions, - metadata: buildResponseMetadata(response, itemIndex), + metadata: buildResponseMetadata(response, itemIndex, currentTokens), }; } + // Add token usage to final streaming result + const currentTokens = itemContext.tokenTracker.getAccumulatedTokens(); + // Merge with previous accumulated tokens if this is a continuation + if (response?.metadata?.accumulatedTokens) { + const mergedTokens = { + promptTokens: response.metadata.accumulatedTokens.promptTokens + currentTokens.promptTokens, + completionTokens: + response.metadata.accumulatedTokens.completionTokens + currentTokens.completionTokens, + totalTokens: response.metadata.accumulatedTokens.totalTokens + currentTokens.totalTokens, + isEstimate: response.metadata.accumulatedTokens.isEstimate || currentTokens.isEstimate, + }; + result.tokenUsage = await addCostToTokenUsage(mergedTokens, model); + } else if (currentTokens.totalTokens > 0) { + result.tokenUsage = await addCostToTokenUsage(currentTokens, model); + } + return result; } else { // Handle regular execution const chatHistory = await loadMemory(memory, model, options.maxTokensFromMemory); - const modelResponse = await executor.invoke({ - ...invokeParams, - chat_history: chatHistory, - }); + const modelResponse = await executor.invoke( + { + ...invokeParams, + chat_history: chatHistory, + }, + executeOptions, + ); if ('returnValues' in modelResponse) { // Save conversation to memory including any tool call context @@ -126,15 +195,32 @@ export async function runAgent( if (options.returnIntermediateSteps && steps.length > 0) { result.intermediateSteps = steps; } + // Add token usage to final result + const currentTokens = itemContext.tokenTracker.getAccumulatedTokens(); + // Merge with previous accumulated tokens if this is a continuation + if (response?.metadata?.accumulatedTokens) { + const mergedTokens = { + promptTokens: + response.metadata.accumulatedTokens.promptTokens + currentTokens.promptTokens, + completionTokens: + response.metadata.accumulatedTokens.completionTokens + currentTokens.completionTokens, + totalTokens: response.metadata.accumulatedTokens.totalTokens + currentTokens.totalTokens, + isEstimate: response.metadata.accumulatedTokens.isEstimate || currentTokens.isEstimate, + }; + result.tokenUsage = await addCostToTokenUsage(mergedTokens, model); + } else if (currentTokens.totalTokens > 0) { + result.tokenUsage = await addCostToTokenUsage(currentTokens, model); + } return result; } // If response contains tool calls, we need to return this in the right format const actions = await createEngineRequests(modelResponse, itemIndex, tools); + const currentTokens = itemContext.tokenTracker.getAccumulatedTokens(); return { actions, - metadata: buildResponseMetadata(response, itemIndex), + metadata: buildResponseMetadata(response, itemIndex, currentTokens), }; } } diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/tests/runAgent.test.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/tests/runAgent.test.ts index 9585ef245e14e..2eafb4b5e0581 100644 --- a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/tests/runAgent.test.ts +++ b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/tests/runAgent.test.ts @@ -5,8 +5,9 @@ import type { Tool } from 'langchain/tools'; import type { IExecuteFunctions, INode, EngineResponse } from 'n8n-workflow'; import * as agentExecution from '@utils/agent-execution'; +import { AgentTokenTracker } from '@utils/agent-execution/AgentTokenTracker'; -import type { RequestResponseMetadata } from '../../types'; +import type { RequestResponseMetadata, AgentResult } from '../../types'; import type { ItemContext } from '../prepareItemContext'; import { runAgent } from '../runAgent'; @@ -59,6 +60,7 @@ describe('runAgent - iteration count tracking', () => { returnIntermediateSteps: false, }, outputParser: undefined, + tokenTracker: new AgentTokenTracker(), }; jest.spyOn(agentExecution, 'loadMemory').mockResolvedValue([]); @@ -114,6 +116,7 @@ describe('runAgent - iteration count tracking', () => { returnIntermediateSteps: false, }, outputParser: undefined, + tokenTracker: new AgentTokenTracker(), }; const response: EngineResponse = { @@ -172,6 +175,7 @@ describe('runAgent - iteration count tracking', () => { enableStreaming: true, }, outputParser: undefined, + tokenTracker: new AgentTokenTracker(), }; const mockContext = mock({ @@ -234,6 +238,7 @@ describe('runAgent - iteration count tracking', () => { returnIntermediateSteps: false, }, outputParser: undefined, + tokenTracker: new AgentTokenTracker(), }; // Mock the agent to return a final result (no tool calls) @@ -248,4 +253,124 @@ describe('runAgent - iteration count tracking', () => { expect(result).not.toHaveProperty('actions'); expect(result).not.toHaveProperty('metadata'); }); + + it('should include token usage in final result when tokens are tracked', async () => { + const mockExecutor = mock({ + invoke: jest.fn().mockResolvedValue({ + returnValues: { + output: 'Final answer', + }, + }), + }); + const mockModel = mock(); + const mockTokenTracker = new AgentTokenTracker(); + + // Manually simulate token tracking + jest.spyOn(mockTokenTracker, 'getAccumulatedTokens').mockReturnValue({ + promptTokens: 100, + completionTokens: 50, + totalTokens: 150, + isEstimate: false, + }); + + const itemContext: ItemContext = { + itemIndex: 0, + input: 'test input', + steps: [], + tools: [], + prompt: mock(), + options: { + maxIterations: 10, + returnIntermediateSteps: false, + }, + outputParser: undefined, + tokenTracker: mockTokenTracker, + }; + + jest.spyOn(agentExecution, 'loadMemory').mockResolvedValue([]); + jest.spyOn(agentExecution, 'saveToMemory').mockResolvedValue(); + mockContext.getExecutionCancelSignal.mockReturnValue(new AbortController().signal); + + const result = await runAgent(mockContext, mockExecutor, itemContext, mockModel, undefined); + + expect(result).toHaveProperty('output'); + expect(result).toHaveProperty('tokenUsage'); + expect((result as AgentResult).tokenUsage).toMatchObject({ + promptTokens: 100, + completionTokens: 50, + totalTokens: 150, + isEstimate: false, + }); + }); + + it('should merge token usage with previous accumulated tokens', async () => { + const mockExecutor = mock({ + invoke: jest.fn().mockResolvedValue({ + returnValues: { + output: 'Final answer', + }, + }), + }); + const mockModel = mock(); + const mockTokenTracker = new AgentTokenTracker(); + + // Simulate current iteration tokens + jest.spyOn(mockTokenTracker, 'getAccumulatedTokens').mockReturnValue({ + promptTokens: 50, + completionTokens: 30, + totalTokens: 80, + isEstimate: false, + }); + + const itemContext: ItemContext = { + itemIndex: 0, + input: 'test input', + steps: [], + tools: [], + prompt: mock(), + options: { + maxIterations: 10, + returnIntermediateSteps: false, + }, + outputParser: undefined, + tokenTracker: mockTokenTracker, + }; + + // Simulate previous accumulated tokens from earlier iterations + const response: EngineResponse = { + actionResponses: [], + metadata: { + itemIndex: 0, + previousRequests: [], + iterationCount: 2, + accumulatedTokens: { + promptTokens: 100, + completionTokens: 60, + totalTokens: 160, + isEstimate: false, + }, + }, + }; + + jest.spyOn(agentExecution, 'loadMemory').mockResolvedValue([]); + jest.spyOn(agentExecution, 'saveToMemory').mockResolvedValue(); + mockContext.getExecutionCancelSignal.mockReturnValue(new AbortController().signal); + + const result = await runAgent( + mockContext, + mockExecutor, + itemContext, + mockModel, + undefined, + response, + ); + + expect(result).toHaveProperty('tokenUsage'); + expect((result as AgentResult).tokenUsage).toMatchObject({ + promptTokens: 150, // 100 + 50 + completionTokens: 90, // 60 + 30 + totalTokens: 240, // 160 + 80 + isEstimate: false, + }); + }); }); diff --git a/packages/@n8n/nodes-langchain/utils/agent-execution/AgentTokenTracker.ts b/packages/@n8n/nodes-langchain/utils/agent-execution/AgentTokenTracker.ts new file mode 100644 index 0000000000000..d4f5f63a31517 --- /dev/null +++ b/packages/@n8n/nodes-langchain/utils/agent-execution/AgentTokenTracker.ts @@ -0,0 +1,66 @@ +import { BaseCallbackHandler } from '@langchain/core/callbacks/base'; +import type { LLMResult } from '@langchain/core/outputs'; + +export type TokenUsageData = { + promptTokens: number; + completionTokens: number; + totalTokens: number; + isEstimate: boolean; +}; + +export class AgentTokenTracker extends BaseCallbackHandler { + name = 'AgentTokenTracker'; + + private tokens: TokenUsageData[] = []; + + async handleLLMEnd(output: LLMResult): Promise { + // Extract token usage from LLMResult + // Some providers use 'tokenUsage', others use 'estimatedTokenUsage' + const tokenUsage = (output?.llmOutput?.tokenUsage || output?.llmOutput?.estimatedTokenUsage) as + | { + completionTokens?: number; + promptTokens?: number; + totalTokens?: number; + } + | undefined; + + if (tokenUsage && tokenUsage.completionTokens && tokenUsage.promptTokens) { + // Actual token data from API + const isEstimate = !!output?.llmOutput?.estimatedTokenUsage && !output?.llmOutput?.tokenUsage; + this.tokens.push({ + promptTokens: tokenUsage.promptTokens, + completionTokens: tokenUsage.completionTokens, + totalTokens: + tokenUsage.totalTokens ?? tokenUsage.promptTokens + tokenUsage.completionTokens, + isEstimate, + }); + } + } + + getAccumulatedTokens(): TokenUsageData { + if (this.tokens.length === 0) { + return { + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + isEstimate: false, + }; + } + + const aggregated = this.tokens.reduce( + (acc, curr) => ({ + promptTokens: acc.promptTokens + curr.promptTokens, + completionTokens: acc.completionTokens + curr.completionTokens, + totalTokens: acc.totalTokens + curr.totalTokens, + isEstimate: acc.isEstimate || curr.isEstimate, + }), + { promptTokens: 0, completionTokens: 0, totalTokens: 0, isEstimate: false }, + ); + + return aggregated; + } + + reset(): void { + this.tokens = []; + } +} diff --git a/packages/@n8n/nodes-langchain/utils/agent-execution/test/AgentTokenTracker.test.ts b/packages/@n8n/nodes-langchain/utils/agent-execution/test/AgentTokenTracker.test.ts new file mode 100644 index 0000000000000..98f57ed1f7fb2 --- /dev/null +++ b/packages/@n8n/nodes-langchain/utils/agent-execution/test/AgentTokenTracker.test.ts @@ -0,0 +1,165 @@ +import type { LLMResult } from '@langchain/core/outputs'; + +import { AgentTokenTracker } from '../AgentTokenTracker'; + +describe('AgentTokenTracker', () => { + let tracker: AgentTokenTracker; + + beforeEach(() => { + tracker = new AgentTokenTracker(); + }); + + describe('handleLLMEnd', () => { + it('should capture token usage from LLM output', async () => { + const mockOutput: LLMResult = { + generations: [], + llmOutput: { + tokenUsage: { + promptTokens: 10, + completionTokens: 20, + totalTokens: 30, + }, + }, + }; + + await tracker.handleLLMEnd(mockOutput); + + const tokens = tracker.getAccumulatedTokens(); + expect(tokens).toEqual({ + promptTokens: 10, + completionTokens: 20, + totalTokens: 30, + isEstimate: false, + }); + }); + + it('should calculate totalTokens if not provided', async () => { + const mockOutput: LLMResult = { + generations: [], + llmOutput: { + tokenUsage: { + promptTokens: 15, + completionTokens: 25, + }, + }, + }; + + await tracker.handleLLMEnd(mockOutput); + + const tokens = tracker.getAccumulatedTokens(); + expect(tokens.totalTokens).toBe(40); + }); + + it('should accumulate tokens across multiple LLM calls', async () => { + const mockOutput1: LLMResult = { + generations: [], + llmOutput: { + tokenUsage: { + promptTokens: 10, + completionTokens: 20, + totalTokens: 30, + }, + }, + }; + + const mockOutput2: LLMResult = { + generations: [], + llmOutput: { + tokenUsage: { + promptTokens: 5, + completionTokens: 15, + totalTokens: 20, + }, + }, + }; + + await tracker.handleLLMEnd(mockOutput1); + await tracker.handleLLMEnd(mockOutput2); + + const tokens = tracker.getAccumulatedTokens(); + expect(tokens).toEqual({ + promptTokens: 15, + completionTokens: 35, + totalTokens: 50, + isEstimate: false, + }); + }); + + it('should ignore LLM calls without token usage', async () => { + const mockOutputWithoutTokens: LLMResult = { + generations: [], + llmOutput: {}, + }; + + await tracker.handleLLMEnd(mockOutputWithoutTokens); + + const tokens = tracker.getAccumulatedTokens(); + expect(tokens).toEqual({ + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + isEstimate: false, + }); + }); + + it('should ignore incomplete token usage data', async () => { + const mockOutputIncomplete: LLMResult = { + generations: [], + llmOutput: { + tokenUsage: { + promptTokens: 10, + // Missing completionTokens + }, + }, + }; + + await tracker.handleLLMEnd(mockOutputIncomplete); + + const tokens = tracker.getAccumulatedTokens(); + expect(tokens).toEqual({ + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + isEstimate: false, + }); + }); + }); + + describe('getAccumulatedTokens', () => { + it('should return zero tokens when no LLM calls have been made', () => { + const tokens = tracker.getAccumulatedTokens(); + expect(tokens).toEqual({ + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + isEstimate: false, + }); + }); + }); + + describe('reset', () => { + it('should clear all accumulated tokens', async () => { + const mockOutput: LLMResult = { + generations: [], + llmOutput: { + tokenUsage: { + promptTokens: 10, + completionTokens: 20, + totalTokens: 30, + }, + }, + }; + + await tracker.handleLLMEnd(mockOutput); + tracker.reset(); + + const tokens = tracker.getAccumulatedTokens(); + expect(tokens).toEqual({ + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + isEstimate: false, + }); + }); + }); +}); diff --git a/packages/@n8n/nodes-langchain/utils/agent-execution/types.ts b/packages/@n8n/nodes-langchain/utils/agent-execution/types.ts index ac24f3ed16090..88bb9a48b0b69 100644 --- a/packages/@n8n/nodes-langchain/utils/agent-execution/types.ts +++ b/packages/@n8n/nodes-langchain/utils/agent-execution/types.ts @@ -46,6 +46,17 @@ export type AgentResult = { toolCalls?: ToolCallRequest[]; /** Intermediate steps showing the agent's reasoning */ intermediateSteps?: ToolCallData[]; + /** Token usage information from LLM calls */ + tokenUsage?: { + promptTokens: number; + completionTokens: number; + totalTokens: number; + isEstimate: boolean; + /** Estimated cost in USD (null if pricing unavailable) */ + estimatedCost?: number | null; + /** Model name used for cost calculation */ + modelName?: string; + }; }; /** @@ -58,4 +69,11 @@ export type RequestResponseMetadata = { previousRequests?: ToolCallData[]; /** Current iteration count (for max iterations enforcement) */ iterationCount?: number; + /** Accumulated token usage across all LLM calls */ + accumulatedTokens?: { + promptTokens: number; + completionTokens: number; + totalTokens: number; + isEstimate: boolean; + }; }; diff --git a/packages/@n8n/nodes-langchain/utils/modelPricing.ts b/packages/@n8n/nodes-langchain/utils/modelPricing.ts new file mode 100644 index 0000000000000..b2f72fbe0e146 --- /dev/null +++ b/packages/@n8n/nodes-langchain/utils/modelPricing.ts @@ -0,0 +1,222 @@ +/** + * Model Pricing Service + * Fetches and caches pricing data from LiteLLM's pricing database via CDN + * Includes fallback pricing for most common models + */ + +type ModelPricing = { + input_cost_per_token?: number; + output_cost_per_token?: number; + input_cost_per_million_tokens?: number; + output_cost_per_million_tokens?: number; +}; + +type PricingDatabase = Record; + +/** + * Fallback pricing for most common models (updated as of January 2025) + * Prices are per million tokens + */ +const FALLBACK_PRICING: PricingDatabase = { + // OpenAI Models + 'gpt-4': { + input_cost_per_million_tokens: 30, + output_cost_per_million_tokens: 60, + }, + 'gpt-4-turbo': { + input_cost_per_million_tokens: 10, + output_cost_per_million_tokens: 30, + }, + 'gpt-4o': { + input_cost_per_million_tokens: 2.5, + output_cost_per_million_tokens: 10, + }, + 'gpt-4o-mini': { + input_cost_per_million_tokens: 0.15, + output_cost_per_million_tokens: 0.6, + }, + 'gpt-3.5-turbo': { + input_cost_per_million_tokens: 0.5, + output_cost_per_million_tokens: 1.5, + }, + // Anthropic Models + 'claude-3-opus-20240229': { + input_cost_per_million_tokens: 15, + output_cost_per_million_tokens: 75, + }, + 'claude-3-sonnet-20240229': { + input_cost_per_million_tokens: 3, + output_cost_per_million_tokens: 15, + }, + 'claude-3-haiku-20240307': { + input_cost_per_million_tokens: 0.25, + output_cost_per_million_tokens: 1.25, + }, + 'claude-3-5-sonnet-20241022': { + input_cost_per_million_tokens: 3, + output_cost_per_million_tokens: 15, + }, + // Google Models + 'gemini-pro': { + input_cost_per_million_tokens: 0.5, + output_cost_per_million_tokens: 1.5, + }, + 'gemini-1.5-pro': { + input_cost_per_million_tokens: 1.25, + output_cost_per_million_tokens: 5, + }, + 'gemini-1.5-flash': { + input_cost_per_million_tokens: 0.075, + output_cost_per_million_tokens: 0.3, + }, +}; + +class ModelPricingService { + private static instance: ModelPricingService; + private pricingData: PricingDatabase | null = null; + private fetchPromise: Promise | null = null; + private lastFetchTime: number = 0; + private readonly CACHE_DURATION = 24 * 60 * 60 * 1000; // 24 hours + private readonly PRICING_URL = + 'https://cdn.jsdelivr.net/gh/BerriAI/litellm@main/model_prices_and_context_window.json'; + + private constructor() {} + + static getInstance(): ModelPricingService { + if (!ModelPricingService.instance) { + ModelPricingService.instance = new ModelPricingService(); + } + return ModelPricingService.instance; + } + + /** + * Fetches pricing data from LiteLLM via CDN + * Uses cache if data is fresh (< 24 hours old) + */ + private async fetchPricingData(): Promise { + // Return existing promise if fetch is in progress + if (this.fetchPromise) { + return await this.fetchPromise; + } + + // Use cached data if it's fresh + const now = Date.now(); + if (this.pricingData && now - this.lastFetchTime < this.CACHE_DURATION) { + return; + } + + // Fetch new data + this.fetchPromise = (async () => { + try { + const response = await fetch(this.PRICING_URL); + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + this.pricingData = await response.json(); + this.lastFetchTime = Date.now(); + } catch (error) { + // If fetch fails, use fallback data + if (!this.pricingData) { + this.pricingData = FALLBACK_PRICING; + console.warn('Failed to fetch model pricing data, using fallback:', error); + } + } finally { + this.fetchPromise = null; + } + })(); + + return await this.fetchPromise; + } + + /** + * Get pricing for a specific model + */ + private async getModelPricing(modelName: string): Promise { + await this.fetchPricingData(); + + // Validate modelName is a string + if (typeof modelName !== 'string' || !modelName) { + return null; + } + + if (!this.pricingData) { + // Use fallback if fetch completely failed + return await Promise.resolve(FALLBACK_PRICING[modelName] || null); + } + + // Try exact match first + if (this.pricingData[modelName]) { + return this.pricingData[modelName]; + } + + // Try common variations + const variations = [ + modelName.toLowerCase(), + `openai/${modelName}`, + `anthropic/${modelName}`, + `google/${modelName}`, + `azure/${modelName}`, + ]; + + for (const variant of variations) { + if (this.pricingData[variant]) { + return this.pricingData[variant]; + } + } + + // Check fallback as last resort + return await Promise.resolve(FALLBACK_PRICING[modelName] || null); + } + + /** + * Calculate cost for given token usage + * @param modelName The model identifier + * @param promptTokens Number of prompt/input tokens + * @param completionTokens Number of completion/output tokens + * @returns Estimated cost in USD, or null if pricing unavailable + */ + async calculateCost( + modelName: string, + promptTokens: number, + completionTokens: number, + ): Promise { + const pricing = await this.getModelPricing(modelName); + + if (!pricing) { + return null; + } + + // LiteLLM can store pricing in two formats: + // 1. Per-token pricing (input_cost_per_token, output_cost_per_token) + // 2. Per-million tokens (input_cost_per_million_tokens, output_cost_per_million_tokens) + + let inputCost = 0; + let outputCost = 0; + + if (pricing.input_cost_per_token !== undefined) { + inputCost = promptTokens * pricing.input_cost_per_token; + } else if (pricing.input_cost_per_million_tokens !== undefined) { + inputCost = (promptTokens / 1_000_000) * pricing.input_cost_per_million_tokens; + } + + if (pricing.output_cost_per_token !== undefined) { + outputCost = completionTokens * pricing.output_cost_per_token; + } else if (pricing.output_cost_per_million_tokens !== undefined) { + outputCost = (completionTokens / 1_000_000) * pricing.output_cost_per_million_tokens; + } + + return inputCost + outputCost; + } + + /** + * Force refresh pricing data (useful for testing) + */ + async refresh(): Promise { + this.pricingData = null; + this.lastFetchTime = 0; + await this.fetchPricingData(); + } +} + +// Export singleton instance +export const modelPricingService = ModelPricingService.getInstance(); diff --git a/packages/@n8n/nodes-langchain/utils/test/modelPricing.test.ts b/packages/@n8n/nodes-langchain/utils/test/modelPricing.test.ts new file mode 100644 index 0000000000000..d98c23cc05475 --- /dev/null +++ b/packages/@n8n/nodes-langchain/utils/test/modelPricing.test.ts @@ -0,0 +1,231 @@ +import { modelPricingService } from '../modelPricing'; + +describe('ModelPricingService', () => { + beforeEach(() => { + // Reset the singleton instance state before each test + // @ts-expect-error - accessing private property for testing + modelPricingService.pricingData = null; + // @ts-expect-error - accessing private property for testing + modelPricingService.lastFetchTime = 0; + }); + + describe('calculateCost', () => { + describe('fallback pricing', () => { + it('should calculate cost for GPT-4o-mini', async () => { + const cost = await modelPricingService.calculateCost('gpt-4o-mini', 1000, 500); + + // GPT-4o-mini: $0.15 per million input tokens, $0.60 per million output tokens + // (1000 / 1_000_000) * 0.15 + (500 / 1_000_000) * 0.60 = 0.00015 + 0.0003 = 0.00045 + expect(cost).toBe(0.00045); + }); + + it('should calculate cost for GPT-4', async () => { + const cost = await modelPricingService.calculateCost('gpt-4', 1000, 500); + + // GPT-4: $30 per million input tokens, $60 per million output tokens + // (1000 / 1_000_000) * 30 + (500 / 1_000_000) * 60 = 0.03 + 0.03 = 0.06 + expect(cost).toBeCloseTo(0.06, 10); + }); + + it('should calculate cost for GPT-4 Turbo', async () => { + const cost = await modelPricingService.calculateCost('gpt-4-turbo', 1000, 500); + + // GPT-4 Turbo: $10 per million input tokens, $30 per million output tokens + // (1000 / 1_000_000) * 10 + (500 / 1_000_000) * 30 = 0.01 + 0.015 = 0.025 + expect(cost).toBe(0.025); + }); + + it('should calculate cost for GPT-4o', async () => { + const cost = await modelPricingService.calculateCost('gpt-4o', 1000, 500); + + // GPT-4o: $2.5 per million input tokens, $10 per million output tokens + // (1000 / 1_000_000) * 2.5 + (500 / 1_000_000) * 10 = 0.0025 + 0.005 = 0.0075 + expect(cost).toBe(0.0075); + }); + + it('should calculate cost for GPT-3.5 Turbo', async () => { + const cost = await modelPricingService.calculateCost('gpt-3.5-turbo', 1000, 500); + + // GPT-3.5 Turbo: $0.5 per million input tokens, $1.5 per million output tokens + // (1000 / 1_000_000) * 0.5 + (500 / 1_000_000) * 1.5 = 0.0005 + 0.00075 = 0.00125 + expect(cost).toBe(0.00125); + }); + + it('should calculate cost for Claude 3 Opus', async () => { + const cost = await modelPricingService.calculateCost('claude-3-opus-20240229', 1000, 500); + + // Claude 3 Opus: $15 per million input tokens, $75 per million output tokens + // (1000 / 1_000_000) * 15 + (500 / 1_000_000) * 75 = 0.015 + 0.0375 = 0.0525 + expect(cost).toBe(0.0525); + }); + + it('should calculate cost for Claude 3 Sonnet', async () => { + const cost = await modelPricingService.calculateCost('claude-3-sonnet-20240229', 1000, 500); + + // Claude 3 Sonnet: $3 per million input tokens, $15 per million output tokens + // (1000 / 1_000_000) * 3 + (500 / 1_000_000) * 15 = 0.003 + 0.0075 = 0.0105 + expect(cost).toBeCloseTo(0.0105, 10); + }); + + it('should calculate cost for Claude 3 Haiku', async () => { + const cost = await modelPricingService.calculateCost('claude-3-haiku-20240307', 1000, 500); + + // Claude 3 Haiku: $0.25 per million input tokens, $1.25 per million output tokens + // (1000 / 1_000_000) * 0.25 + (500 / 1_000_000) * 1.25 = 0.00025 + 0.000625 = 0.000875 + expect(cost).toBe(0.000875); + }); + + it('should calculate cost for Claude 3.5 Sonnet', async () => { + const cost = await modelPricingService.calculateCost( + 'claude-3-5-sonnet-20241022', + 1000, + 500, + ); + + // Claude 3.5 Sonnet: $3 per million input tokens, $15 per million output tokens + // (1000 / 1_000_000) * 3 + (500 / 1_000_000) * 15 = 0.003 + 0.0075 = 0.0105 + expect(cost).toBeCloseTo(0.0105, 10); + }); + + it('should calculate cost for Gemini Pro', async () => { + const cost = await modelPricingService.calculateCost('gemini-pro', 1000, 500); + + // Gemini Pro: $0.5 per million input tokens, $1.5 per million output tokens + // (1000 / 1_000_000) * 0.5 + (500 / 1_000_000) * 1.5 = 0.0005 + 0.00075 = 0.00125 + expect(cost).toBe(0.00125); + }); + + it('should calculate cost for Gemini 1.5 Pro', async () => { + const cost = await modelPricingService.calculateCost('gemini-1.5-pro', 1000, 500); + + // Gemini 1.5 Pro: $1.25 per million input tokens, $5 per million output tokens + // (1000 / 1_000_000) * 1.25 + (500 / 1_000_000) * 5 = 0.00125 + 0.0025 = 0.00375 + expect(cost).toBe(0.00375); + }); + + it('should calculate cost for Gemini 1.5 Flash', async () => { + const cost = await modelPricingService.calculateCost('gemini-1.5-flash', 1000, 500); + + // Gemini 1.5 Flash: $0.075 per million input tokens, $0.3 per million output tokens + // (1000 / 1_000_000) * 0.075 + (500 / 1_000_000) * 0.3 = 0.000075 + 0.00015 = 0.000225 + expect(cost).toBe(0.000225); + }); + }); + + describe('unknown models', () => { + it('should return null for unknown model', async () => { + const cost = await modelPricingService.calculateCost('unknown-model-xyz', 1000, 500); + expect(cost).toBeNull(); + }); + + it('should return null for empty model name', async () => { + const cost = await modelPricingService.calculateCost('', 1000, 500); + expect(cost).toBeNull(); + }); + }); + + describe('edge cases', () => { + it('should handle zero tokens', async () => { + const cost = await modelPricingService.calculateCost('gpt-4o-mini', 0, 0); + expect(cost).toBe(0); + }); + + it('should handle only input tokens', async () => { + const cost = await modelPricingService.calculateCost('gpt-4o-mini', 1000, 0); + + // Only input cost: (1000 / 1_000_000) * 0.15 = 0.00015 + expect(cost).toBe(0.00015); + }); + + it('should handle only output tokens', async () => { + const cost = await modelPricingService.calculateCost('gpt-4o-mini', 0, 500); + + // Only output cost: (500 / 1_000_000) * 0.60 = 0.0003 + expect(cost).toBe(0.0003); + }); + + it('should handle large token counts', async () => { + const cost = await modelPricingService.calculateCost('gpt-4o-mini', 1_000_000, 500_000); + + // (1_000_000 / 1_000_000) * 0.15 + (500_000 / 1_000_000) * 0.60 = 0.15 + 0.30 = 0.45 + expect(cost).toBeCloseTo(0.45, 10); + }); + }); + + describe('realistic scenarios', () => { + it('should calculate cost for short conversation', async () => { + // Typical short prompt + response + const cost = await modelPricingService.calculateCost('gpt-4o-mini', 50, 100); + + // (50 / 1_000_000) * 0.15 + (100 / 1_000_000) * 0.60 = 0.0000075 + 0.00006 = 0.0000675 + expect(cost).toBeCloseTo(0.0000675, 10); + }); + + it('should calculate cost for medium conversation', async () => { + // Medium-length conversation + const cost = await modelPricingService.calculateCost('gpt-4o-mini', 500, 300); + + // (500 / 1_000_000) * 0.15 + (300 / 1_000_000) * 0.60 = 0.000075 + 0.00018 = 0.000255 + expect(cost).toBeCloseTo(0.000255, 10); + }); + + it('should calculate cost for long document analysis', async () => { + // Long document processing + const cost = await modelPricingService.calculateCost('gpt-4o', 10000, 2000); + + // (10000 / 1_000_000) * 2.5 + (2000 / 1_000_000) * 10 = 0.025 + 0.02 = 0.045 + expect(cost).toBe(0.045); + }); + + it('should calculate cost for multi-turn agent conversation', async () => { + // Multi-turn conversation with tool calls + const cost = await modelPricingService.calculateCost( + 'claude-3-5-sonnet-20241022', + 2500, + 1500, + ); + + // (2500 / 1_000_000) * 3 + (1500 / 1_000_000) * 15 = 0.0075 + 0.0225 = 0.03 + expect(cost).toBe(0.03); + }); + }); + + describe('cost comparison', () => { + it('should show GPT-4 is more expensive than GPT-4o-mini', async () => { + const costGpt4 = await modelPricingService.calculateCost('gpt-4', 1000, 500); + const costGpt4oMini = await modelPricingService.calculateCost('gpt-4o-mini', 1000, 500); + + expect(costGpt4).toBeGreaterThan(costGpt4oMini!); + }); + + it('should show Claude 3 Opus is more expensive than Claude 3 Haiku', async () => { + const costOpus = await modelPricingService.calculateCost( + 'claude-3-opus-20240229', + 1000, + 500, + ); + const costHaiku = await modelPricingService.calculateCost( + 'claude-3-haiku-20240307', + 1000, + 500, + ); + + expect(costOpus).toBeGreaterThan(costHaiku!); + }); + + it('should show Gemini 1.5 Pro is more expensive than Gemini 1.5 Flash', async () => { + const costPro = await modelPricingService.calculateCost('gemini-1.5-pro', 1000, 500); + const costFlash = await modelPricingService.calculateCost('gemini-1.5-flash', 1000, 500); + + expect(costPro).toBeGreaterThan(costFlash!); + }); + }); + }); + + describe('refresh', () => { + it('should allow manual refresh of pricing data', async () => { + // This test mainly ensures the method exists and doesn't throw + await expect(modelPricingService.refresh()).resolves.not.toThrow(); + }); + }); +});