Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion app/lib/.server/llm/api-key.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,15 @@ export function getAPIKey(cloudflareEnv: Env) {
* The `cloudflareEnv` is only used when deployed or when previewing locally.
* In development the environment variables are available through `env`.
*/
return env.ANTHROPIC_API_KEY || cloudflareEnv.ANTHROPIC_API_KEY;
const apiKey =
env.NVIDIA_API_KEY ||
cloudflareEnv.NVIDIA_API_KEY ||
env.ANTHROPIC_API_KEY ||
cloudflareEnv.ANTHROPIC_API_KEY;

if (!apiKey) {
throw new Error('Missing NVIDIA_API_KEY environment variable');
Comment on lines +8 to +15

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Require NVIDIA API key instead of falling back to Anthropic

The new streaming implementation targets NVIDIA’s OpenAI-compatible endpoint, yet getAPIKey still returns ANTHROPIC_API_KEY when no NVIDIA key is set. In environments that only configured the old Anthropic key (the previous default), the call will proceed with that key and fail at runtime with an opaque 401 instead of failing fast with a clear configuration error. Now that Env declares NVIDIA_API_KEY as required, this function should throw when that key is missing rather than silently accepting an incompatible one.

Useful? React with 👍 / 👎.

}

return apiKey;
}
2 changes: 1 addition & 1 deletion app/lib/.server/llm/constants.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// see https://docs.anthropic.com/en/docs/about-claude/models
// Maximum completion tokens for the configured model.
export const MAX_TOKENS = 8192;

// limits the number of model responses that can be returned in a single request
Expand Down
27 changes: 21 additions & 6 deletions app/lib/.server/llm/model.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
import { createAnthropic } from '@ai-sdk/anthropic';
import OpenAI from 'openai';

export function getAnthropicModel(apiKey: string) {
const anthropic = createAnthropic({
apiKey,
});
const BASE_URL = 'https://integrate.api.nvidia.com/v1';
const MODEL_NAME = 'moonshotai/kimi-k2-instruct-0905';

return anthropic('claude-3-5-sonnet-20240620');
let cachedClient: { apiKey: string; client: OpenAI } | undefined;

export function getOpenAIClient(apiKey: string) {
if (!cachedClient || cachedClient.apiKey !== apiKey) {
cachedClient = {
apiKey,
client: new OpenAI({
apiKey,
baseURL: BASE_URL,
}),
};
}

return cachedClient.client;
}

export function getModelName() {
return MODEL_NAME;
}
204 changes: 190 additions & 14 deletions app/lib/.server/llm/stream-text.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { streamText as _streamText, convertToCoreMessages } from 'ai';
import { formatStreamPart } from 'ai';
import type {
ChatCompletionChunk,
ChatCompletionMessageParam,
ChatCompletionToolChoiceOption,
} from 'openai/resources/chat/completions';
import { getAPIKey } from '~/lib/.server/llm/api-key';
import { getAnthropicModel } from '~/lib/.server/llm/model';
import { getModelName, getOpenAIClient } from '~/lib/.server/llm/model';
import { MAX_TOKENS } from './constants';
import { getSystemPrompt } from './prompts';

Expand All @@ -19,17 +24,188 @@ interface Message {

export type Messages = Message[];

export type StreamingOptions = Omit<Parameters<typeof _streamText>[0], 'model'>;

export function streamText(messages: Messages, env: Env, options?: StreamingOptions) {
return _streamText({
model: getAnthropicModel(getAPIKey(env)),
system: getSystemPrompt(),
maxTokens: MAX_TOKENS,
headers: {
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
},
messages: convertToCoreMessages(messages),
...options,
type FinishReason =
| 'stop'
| 'length'
| 'content-filter'
| 'tool-calls'
| 'error'
| 'other'
| 'unknown';

export interface StreamingOptions {
temperature?: number;
topP?: number;
toolChoice?: ChatCompletionToolChoiceOption;
onFinish?: (result: { text: string; finishReason: FinishReason }) => void | Promise<void>;
}

const encoder = new TextEncoder();

export async function streamText(messages: Messages, env: Env, options: StreamingOptions = {}) {
const client = getOpenAIClient(getAPIKey(env));

const openAIMessages: ChatCompletionMessageParam[] = [
{ role: 'system', content: getSystemPrompt() },
...convertMessages(messages),
];

const response = await client.chat.completions.create({
model: getModelName(),
messages: openAIMessages,
max_tokens: MAX_TOKENS,
temperature: options.temperature ?? 0,
top_p: options.topP ?? 0.9,
stream: true,
stream_options: { include_usage: true },
tool_choice: options.toolChoice,
});

return new OpenAIStreamTextResult(response, options.onFinish);
}

class OpenAIStreamTextResult {
private consumed = false;

constructor(
private readonly response: AsyncIterable<ChatCompletionChunk>,
private readonly onFinish?: StreamingOptions['onFinish'],
) {}

toAIStream() {
if (this.consumed) {
throw new Error('Stream has already been consumed.');
}

this.consumed = true;

return new ReadableStream<Uint8Array>({
start: async (controller) => {
let aggregatedText = '';
let finishReason: string | null = null;
let usage: { promptTokens: number; completionTokens: number } | null = null;

try {
for await (const chunk of this.response) {
const choice = chunk.choices[0];

if (!choice) {
continue;
}

const textDelta = extractDeltaText(choice.delta);

if (textDelta.length > 0) {
aggregatedText += textDelta;
controller.enqueue(encoder.encode(formatStreamPart('text', textDelta)));
}

if (choice.finish_reason) {
finishReason = choice.finish_reason;
}

if (chunk.usage) {
usage = {
promptTokens: chunk.usage.prompt_tokens ?? 0,
completionTokens: chunk.usage.completion_tokens ?? 0,
};
}
}

const normalizedFinishReason = normalizeFinishReason(finishReason);
const usagePayload =
usage ?? {
promptTokens: 0,
completionTokens: 0,
};

controller.enqueue(
encoder.encode(
formatStreamPart('finish_message', {
finishReason: normalizedFinishReason,
usage: usagePayload,
}),
),
);

if (this.onFinish) {
await this.onFinish({
text: aggregatedText,
finishReason: normalizedFinishReason,
});
}

controller.close();
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
controller.enqueue(encoder.encode(formatStreamPart('error', JSON.stringify(message))));
controller.error(error);
}
},
});
}
}

function convertMessages(messages: Messages): ChatCompletionMessageParam[] {
return messages.map((message) => ({
role: message.role,
content: message.content,
}));
}

function extractDeltaText(
delta: ChatCompletionChunk['choices'][number]['delta'],
): string {
const content = delta?.content;

if (!content) {
return '';
}

if (typeof content === 'string') {
return content;
}

if (Array.isArray(content)) {
return (content as Array<string | Record<string, unknown>>)
.map((part) => {
if (typeof part === 'string') {
return part;
}

if (isTextPart(part)) {
return part.text;
}

return '';
})
.join('');
}

return '';
}

function normalizeFinishReason(finishReason: string | null): FinishReason {
switch (finishReason) {
case 'stop':
return 'stop';
case 'length':
return 'length';
case 'content_filter':
return 'content-filter';
case 'tool_calls':
case 'function_call':
return 'tool-calls';
case 'error':
return 'error';
case null:
case undefined:
return 'unknown';
default:
return 'other';
}
}

function isTextPart(part: Record<string, unknown>): part is { text: string } {
return typeof part.text === 'string';
}
2 changes: 1 addition & 1 deletion app/lib/fetch.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
type CommonRequest = Omit<RequestInit, 'body'> & { body?: URLSearchParams };

export async function request(url: string, init?: CommonRequest) {
if (import.meta.env.DEV) {
if (process.env.NODE_ENV !== 'production' && typeof window === 'undefined') {
const nodeFetch = await import('node-fetch');
const https = await import('node:https');

Expand Down
2 changes: 1 addition & 1 deletion app/lib/hooks/useMessageParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export function useMessageParser() {
const parseMessages = useCallback((messages: Message[], isLoading: boolean) => {
let reset = false;

if (import.meta.env.DEV && !isLoading) {
if (process.env.NODE_ENV !== 'production' && !isLoading) {
reset = true;
messageParser.reset();
}
Expand Down
2 changes: 1 addition & 1 deletion app/lib/stores/theme.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export const DEFAULT_THEME = 'light';
export const themeStore = atom<Theme>(initStore());

function initStore() {
if (!import.meta.env.SSR) {
if (typeof window !== 'undefined') {
const persistedTheme = localStorage.getItem(kTheme) as Theme | undefined;
const themeAttribute = document.querySelector('html')?.getAttribute('data-theme');

Expand Down
13 changes: 11 additions & 2 deletions app/utils/logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,16 @@ interface Logger {
setLevel: (level: DebugLevel) => void;
}

let currentLevel: DebugLevel = import.meta.env.VITE_LOG_LEVEL ?? import.meta.env.DEV ? 'debug' : 'info';
const DEBUG_LEVELS: DebugLevel[] = ['trace', 'debug', 'info', 'warn', 'error'];
const isProduction = process.env.NODE_ENV === 'production';
const envDebugLevel =
(process.env.NEXT_PUBLIC_LOG_LEVEL as DebugLevel | undefined) ??
(process.env.LOG_LEVEL as DebugLevel | undefined);
Comment on lines +14 to +18

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Guard logger against missing process.env in browsers

This module now reads process.env directly to determine the log level. Unlike the previous import.meta.env constants, process is not available in the browser or in the default Cloudflare Worker runtime, so importing this file will throw ReferenceError: process is not defined before any logging can occur. Because the logger is imported by many client-side stores and hooks, this would crash the UI in non‑Node environments. Consider reverting to compile‑time env injection or guarding the access behind a feature check.

Useful? React with 👍 / 👎.

const resolvedEnvLevel = envDebugLevel && DEBUG_LEVELS.includes(envDebugLevel as DebugLevel)
? (envDebugLevel as DebugLevel)
: undefined;

let currentLevel: DebugLevel = resolvedEnvLevel ?? (isProduction ? 'info' : 'debug');

const isWorker = 'HTMLRewriter' in globalThis;
const supportsColor = !isWorker;
Expand All @@ -37,7 +46,7 @@ export function createScopedLogger(scope: string): Logger {
}

function setLevel(level: DebugLevel) {
if ((level === 'trace' || level === 'debug') && import.meta.env.PROD) {
if ((level === 'trace' || level === 'debug') && isProduction) {
return;
}

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
"node": ">=18.18.0"
},
"dependencies": {
"@ai-sdk/anthropic": "^0.0.39",
"@codemirror/autocomplete": "^6.17.0",
"@codemirror/commands": "^6.6.0",
"@codemirror/lang-cpp": "^6.0.2",
Expand Down Expand Up @@ -74,6 +73,7 @@
"remix-island": "^0.2.0",
"remix-utils": "^7.6.0",
"shiki": "^1.9.1",
"openai": "^4.56.0",
"unist-util-visit": "^5.0.0"
},
"devDependencies": {
Expand Down
Loading
Loading