Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 25 additions & 26 deletions unsloth/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,14 +884,7 @@ def install_llama_cpp_old(version = -10):
pass

# Check if successful
if not (
os.path.exists("llama.cpp/llama-quantize.exe") or
os.path.exists("llama.cpp/llama-quantize") or
os.path.exists("llama.cpp/quantize.exe") or
os.path.exists("llama.cpp/quantize") or
os.path.exists("llama.cpp/build/bin/llama-quantize") or
os.path.exists("llama.cpp/build/bin/quantize")
):
if not (get_local_llamacpp_quantize_location()):
raise RuntimeError(
"Unsloth: The file 'llama.cpp/llama-quantize' or `llama.cpp/quantize` does not exist.\n"\
"We've also double checked the building directory under 'llama.cpp/build/bin/'.\n"\
Expand Down Expand Up @@ -947,6 +940,27 @@ def get_executable(executables):
return None
pass

def get_local_llamacpp_quantize_location():
# Look for llama.cpp quantize binary in a local installation
# Careful llama.cpp/quantize changed to llama.cpp/llama-quantize
# and llama.cpp/main changed to llama.cpp/llama-cli
# See https://github.com/ggerganov/llama.cpp/pull/7809
possible_paths = [
os.path.join(".", "llama.cpp", "quantize.exe"),
os.path.join(".", "llama.cpp", "quantize"),
os.path.join(".", "llama.cpp", "llama-quantize.exe"),
os.path.join(".", "llama.cpp", "llama-quantize"),
os.path.join(".", "llama.cpp", "build", "bin", "llama-quantize"),
os.path.join(".", "llama.cpp", "build", "bin", "quantize"),
os.path.join(".", "llama.cpp", "build", "bin", "Release", "llama-quantize.exe"),
os.path.join(".", "llama.cpp", "build", "bin", "Release", "quantize.exe")
]

for path in possible_paths:
if os.path.exists(path):
return path
return None


def save_to_gguf(
model_type : str,
Expand Down Expand Up @@ -1073,23 +1087,8 @@ def save_to_gguf(
install_llama_cpp_blocking()
pass

# Careful llama.cpp/quantize changed to llama.cpp/llama-quantize
# and llama.cpp/main changed to llama.cpp/llama-cli
# See https://github.com/ggerganov/llama.cpp/pull/7809
quantize_location = None
if os.path.exists("llama.cpp/quantize.exe"):
quantize_location = "llama.cpp/quantize.exe"
elif os.path.exists("llama.cpp/quantize"):
quantize_location = "llama.cpp/quantize"
elif os.path.exists("llama.cpp/llama-quantize.exe"):
quantize_location = "llama.cpp/llama-quantize.exe"
elif os.path.exists("llama.cpp/llama-quantize"):
quantize_location = "llama.cpp/llama-quantize"
elif os.path.exists("llama.cpp/build/bin/llama-quantize"):
quantize_location = "llama.cpp/build/bin/llama-quantize"
elif os.path.exists("llama.cpp/build/bin/quantize"):
quantize_location = "llama.cpp/build/bin/quantize"
else:
quantize_location = get_local_llamacpp_quantize_location()
if quantize_location is None:
raise RuntimeError(
"Unsloth: The file 'llama.cpp/llama-quantize' or `llama.cpp/quantize` does not exist.\n"\
"We've also double checked the building directory under 'llama.cpp/build/bin/'.\n"\
Expand Down Expand Up @@ -1239,7 +1238,7 @@ def save_to_gguf(
print(f"Unsloth: [2] Converting GGUF 16bit into {quant_method}. This might take 20 minutes...")
final_location = str((Path(model_directory) / f"unsloth.{quant_method.upper()}.gguf").absolute())

command = f"./{quantize_location} {full_precision_location} "\
command = f"{quantize_location} {full_precision_location} "\
f"{final_location} {quant_method} {n_cpus}"

try_execute([command,], force_complete = True)
Expand Down