sst · tylergannon · Nov 26, 2025 · Nov 26, 2025
diff --git a/flake.lock b/flake.lock
diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts
@@ -1,7 +1,37 @@
 export namespace Token {
-  const CHARS_PER_TOKEN = 4
+  // Characters per token ratios by category, derived from typical BPE tokenizer behavior on code
 
-  export function estimate(input: string) {
-    return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN))
+  // Digits tokenize poorly - often split into individual digits or small groups
+  const DIGITS_RATIO = 1 / 1.9
+
+  // Punctuation and symbols (brackets, operators, etc.) - most are single tokens,
+  // though some pairs merge (e.g., ->, !=, ::)
+  const PUNCTUATION_RATIO = 1 / 1.2
+
+  // Whitespace - leading indentation often merges (4 spaces → 1 token),
+  // but isolated spaces typically don't
+  const WHITESPACE_RATIO = 1 / 2.5
+
+  // Letters and other characters - keywords compress well, identifiers less so
+  const DEFAULT_RATIO = 1 / 3.5
+
+  // Adjustment multiplier for tuning estimates up (>1) or down (<1)
+  // Set via OPENCODE_TOKEN_FACTOR environment variable
+  const FACTOR = parseFloat(process.env.OPENCODE_TOKEN_FACTOR || "1.0") || 1.0
+
+  export function estimate(input: string): number {
+    let count = 0
+    for (const char of input || "") {
+      if (/\p{N}/u.test(char)) {
+        count += DIGITS_RATIO
+      } else if (/\p{P}|\p{S}/u.test(char)) {
+        count += PUNCTUATION_RATIO
+      } else if (/\s/.test(char)) {
+        count += WHITESPACE_RATIO
+      } else {
+        count += DEFAULT_RATIO
+      }
+    }
+    return Math.trunc(count * FACTOR)
   }
 }
diff --git a/packages/web/src/content/docs/config.mdx b/packages/web/src/content/docs/config.mdx
@@ -424,3 +424,18 @@ These are useful for:
 - Keeping sensitive data like API keys in separate files.
 - Including large instruction files without cluttering your config.
 - Sharing common configuration snippets across multiple config files.
+
+### Token Estimation
+
+Opencode estimates token count for tool results using a weighted heuristic,
+which introduces a small margin of error that can vary between different
+languages and different types of content.
+
+You can use the `OPENCODE_TOKEN_FACTOR` environment variable to tune token estimation.
+Set it to a value greater than 1.0 to increase estimates (more conservative)
+or less than 1.0 to decrease estimates (allow more content).
+
+```bash
+export OPENCODE_TOKEN_FACTOR=1.1 # Estimate token counts 10% more conservatively
+opencode run "Hello world"
+```