diff --git a/justlm_llama.hpp b/justlm_llama.hpp index 2be0c3c..d3ca43b 100644 --- a/justlm_llama.hpp +++ b/justlm_llama.hpp @@ -150,7 +150,7 @@ class LLaMAInference final : public Inference { llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false}; // Sample repeat penalty auto n_repeat_last = std::min(state->tokens.size(), params.n_repeat_last); - llama_sample_repetition_penalty(state->ctx, &candidates_p, params.n_repeat_last?(state->tokens.data()+state->tokens.size()-n_repeat_last):nullptr, n_repeat_last, params.repeat_penalty); + llama_sample_repetition_penalties(state->ctx, &candidates_p, params.n_repeat_last?(state->tokens.data()+state->tokens.size()-n_repeat_last):nullptr, n_repeat_last, params.repeat_penalty, 1.0f, 1.0f); // Might be wrong // Grammar sampling if (state->grammar) { llama_sample_grammar(state->ctx, &candidates_p, state->grammar); @@ -212,7 +212,7 @@ public: state->tokens.resize(old_token_count+state->prompt.size()); // Run tokenizer - const auto token_count = llama_tokenize(state->model, prompt.c_str(), prompt.size(), state->tokens.data()+old_token_count, state->tokens.size()-old_token_count, was_empty); + const auto token_count = llama_tokenize(state->model, prompt.c_str(), prompt.size(), state->tokens.data()+old_token_count, state->tokens.size()-old_token_count, was_empty, false); state->tokens.resize(old_token_count+token_count); // Make sure token limit isn't being hit @@ -243,13 +243,13 @@ public: LM_COTHROW(e.what(), ""); } - if (id == llama_token_eos(state->ctx)) { + if (id == llama_token_eos(state->model)) { if (eos_count++ == params.n_eos_ignores) { abort = true; continue; } state->tokens.push_back(0); - llama_tokenize(state->model, "\n", 1, &state->tokens.back(), 1, false); + llama_tokenize(state->model, "\n", 1, &state->tokens.back(), 1, false, false); id = state->tokens.back(); } else { // Add token diff --git a/llama.cpp-mainline b/llama.cpp-mainline index 019ba1d..a75fa57 160000 --- a/llama.cpp-mainline +++ b/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit 019ba1dcd0c7775a5ac0f7442634a330eb0173cc +Subproject commit a75fa576abba9d37f463580c379e4bbf1e1ad03c diff --git a/llama.cpp.cmake b/llama.cpp.cmake index b6f9361..8a88f93 100644 --- a/llama.cpp.cmake +++ b/llama.cpp.cmake @@ -274,33 +274,27 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) endif() endif() - set(GGML_SOURCES_QUANT_K ) set(GGML_METAL_SOURCES ) - if (LLAMA_K_QUANTS) - set(GGML_SOURCES_QUANT_K - ${DIRECTORY}/k_quants.h - ${DIRECTORY}/k_quants.c) - if (LLAMA_METAL) - find_library(FOUNDATION_LIBRARY Foundation REQUIRED) - find_library(METAL_FRAMEWORK Metal REQUIRED) - find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) - find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED) + if (LLAMA_METAL) + find_library(FOUNDATION_LIBRARY Foundation REQUIRED) + find_library(METAL_FRAMEWORK Metal REQUIRED) + find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) + find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED) - set(GGML_METAL_SOURCES ${DIRECTORY}/ggml-metal.m ${DIRECTORY}/ggml-metal.h) - # get full path to the file - #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") + set(GGML_METAL_SOURCES ${DIRECTORY}/ggml-metal.m ${DIRECTORY}/ggml-metal.h) + # get full path to the file + #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") - # copy ggml-metal.metal to bin directory - configure_file(${DIRECTORY}/ggml-metal.metal bin/ggml-metal.metal COPYONLY) + # copy ggml-metal.metal to bin directory + configure_file(${DIRECTORY}/ggml-metal.metal bin/ggml-metal.metal COPYONLY) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} - ${FOUNDATION_LIBRARY} - ${METAL_FRAMEWORK} - ${METALKIT_FRAMEWORK} - ${METALPERFORMANCE_FRAMEWORK} - ) - endif() + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} + ${FOUNDATION_LIBRARY} + ${METAL_FRAMEWORK} + ${METALKIT_FRAMEWORK} + ${METALPERFORMANCE_FRAMEWORK} + ) endif() set(GGML_SOURCES @@ -308,7 +302,10 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA) ${DIRECTORY}/ggml.h ${DIRECTORY}/ggml-alloc.c ${DIRECTORY}/ggml-alloc.h - ${GGML_SOURCES_QUANT_K} + ${DIRECTORY}/ggml-quants.c + ${DIRECTORY}/ggml-quants.h + ${DIRECTORY}/ggml-backend.c + ${DIRECTORY}/ggml-backend.h} ${GGML_SOURCES_CUDA} ${GGML_METAL_SOURCES} ${GGML_OPENCL_SOURCES})