Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 33 additions & 3 deletions packages/opencode/src/util/token.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,37 @@
export namespace Token {
const CHARS_PER_TOKEN = 4
// Characters per token ratios by category, derived from typical BPE tokenizer behavior on code

export function estimate(input: string) {
return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN))
// Digits tokenize poorly - often split into individual digits or small groups
const DIGITS_RATIO = 1 / 1.9

// Punctuation and symbols (brackets, operators, etc.) - most are single tokens,
// though some pairs merge (e.g., ->, !=, ::)
const PUNCTUATION_RATIO = 1 / 1.2

// Whitespace - leading indentation often merges (4 spaces → 1 token),
// but isolated spaces typically don't
const WHITESPACE_RATIO = 1 / 2.5

// Letters and other characters - keywords compress well, identifiers less so
const DEFAULT_RATIO = 1 / 3.5

// Adjustment multiplier for tuning estimates up (>1) or down (<1)
// Set via OPENCODE_TOKEN_FACTOR environment variable
const FACTOR = parseFloat(process.env.OPENCODE_TOKEN_FACTOR || "1.0") || 1.0

export function estimate(input: string): number {
let count = 0
for (const char of input || "") {
if (/\p{N}/u.test(char)) {
count += DIGITS_RATIO
} else if (/\p{P}|\p{S}/u.test(char)) {
count += PUNCTUATION_RATIO
} else if (/\s/.test(char)) {
count += WHITESPACE_RATIO
} else {
count += DEFAULT_RATIO
}
}
return Math.trunc(count * FACTOR)
}
}
15 changes: 15 additions & 0 deletions packages/web/src/content/docs/config.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -424,3 +424,18 @@ These are useful for:
- Keeping sensitive data like API keys in separate files.
- Including large instruction files without cluttering your config.
- Sharing common configuration snippets across multiple config files.

### Token Estimation

Opencode estimates token count for tool results using a weighted heuristic,
which introduces a small margin of error that can vary between different
languages and different types of content.

You can use the `OPENCODE_TOKEN_FACTOR` environment variable to tune token estimation.
Set it to a value greater than 1.0 to increase estimates (more conservative)
or less than 1.0 to decrease estimates (allow more content).

```bash
export OPENCODE_TOKEN_FACTOR=1.1 # Estimate token counts 10% more conservatively
opencode run "Hello world"
```