mirror of
https://gitlab.com/niansa/libjustlm.git
synced 2025-03-06 20:49:17 +01:00
Updated llama.cpp-mainline
This commit is contained in:
parent
fc5e4f5aa1
commit
ef5df1dc31
3 changed files with 25 additions and 28 deletions
|
@ -150,7 +150,7 @@ class LLaMAInference final : public Inference {
|
||||||
llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};
|
llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};
|
||||||
// Sample repeat penalty
|
// Sample repeat penalty
|
||||||
auto n_repeat_last = std::min<size_t>(state->tokens.size(), params.n_repeat_last);
|
auto n_repeat_last = std::min<size_t>(state->tokens.size(), params.n_repeat_last);
|
||||||
llama_sample_repetition_penalty(state->ctx, &candidates_p, params.n_repeat_last?(state->tokens.data()+state->tokens.size()-n_repeat_last):nullptr, n_repeat_last, params.repeat_penalty);
|
llama_sample_repetition_penalties(state->ctx, &candidates_p, params.n_repeat_last?(state->tokens.data()+state->tokens.size()-n_repeat_last):nullptr, n_repeat_last, params.repeat_penalty, 1.0f, 1.0f); // Might be wrong
|
||||||
// Grammar sampling
|
// Grammar sampling
|
||||||
if (state->grammar) {
|
if (state->grammar) {
|
||||||
llama_sample_grammar(state->ctx, &candidates_p, state->grammar);
|
llama_sample_grammar(state->ctx, &candidates_p, state->grammar);
|
||||||
|
@ -212,7 +212,7 @@ public:
|
||||||
state->tokens.resize(old_token_count+state->prompt.size());
|
state->tokens.resize(old_token_count+state->prompt.size());
|
||||||
|
|
||||||
// Run tokenizer
|
// Run tokenizer
|
||||||
const auto token_count = llama_tokenize(state->model, prompt.c_str(), prompt.size(), state->tokens.data()+old_token_count, state->tokens.size()-old_token_count, was_empty);
|
const auto token_count = llama_tokenize(state->model, prompt.c_str(), prompt.size(), state->tokens.data()+old_token_count, state->tokens.size()-old_token_count, was_empty, false);
|
||||||
state->tokens.resize(old_token_count+token_count);
|
state->tokens.resize(old_token_count+token_count);
|
||||||
|
|
||||||
// Make sure token limit isn't being hit
|
// Make sure token limit isn't being hit
|
||||||
|
@ -243,13 +243,13 @@ public:
|
||||||
LM_COTHROW(e.what(), "");
|
LM_COTHROW(e.what(), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (id == llama_token_eos(state->ctx)) {
|
if (id == llama_token_eos(state->model)) {
|
||||||
if (eos_count++ == params.n_eos_ignores) {
|
if (eos_count++ == params.n_eos_ignores) {
|
||||||
abort = true;
|
abort = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
state->tokens.push_back(0);
|
state->tokens.push_back(0);
|
||||||
llama_tokenize(state->model, "\n", 1, &state->tokens.back(), 1, false);
|
llama_tokenize(state->model, "\n", 1, &state->tokens.back(), 1, false, false);
|
||||||
id = state->tokens.back();
|
id = state->tokens.back();
|
||||||
} else {
|
} else {
|
||||||
// Add token
|
// Add token
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 019ba1dcd0c7775a5ac0f7442634a330eb0173cc
|
Subproject commit a75fa576abba9d37f463580c379e4bbf1e1ad03c
|
|
@ -274,33 +274,27 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(GGML_SOURCES_QUANT_K )
|
|
||||||
set(GGML_METAL_SOURCES )
|
set(GGML_METAL_SOURCES )
|
||||||
if (LLAMA_K_QUANTS)
|
|
||||||
set(GGML_SOURCES_QUANT_K
|
|
||||||
${DIRECTORY}/k_quants.h
|
|
||||||
${DIRECTORY}/k_quants.c)
|
|
||||||
|
|
||||||
if (LLAMA_METAL)
|
if (LLAMA_METAL)
|
||||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||||
find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)
|
find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED)
|
||||||
|
|
||||||
set(GGML_METAL_SOURCES ${DIRECTORY}/ggml-metal.m ${DIRECTORY}/ggml-metal.h)
|
set(GGML_METAL_SOURCES ${DIRECTORY}/ggml-metal.m ${DIRECTORY}/ggml-metal.h)
|
||||||
# get full path to the file
|
# get full path to the file
|
||||||
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
|
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
|
||||||
|
|
||||||
# copy ggml-metal.metal to bin directory
|
# copy ggml-metal.metal to bin directory
|
||||||
configure_file(${DIRECTORY}/ggml-metal.metal bin/ggml-metal.metal COPYONLY)
|
configure_file(${DIRECTORY}/ggml-metal.metal bin/ggml-metal.metal COPYONLY)
|
||||||
|
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
|
||||||
${FOUNDATION_LIBRARY}
|
${FOUNDATION_LIBRARY}
|
||||||
${METAL_FRAMEWORK}
|
${METAL_FRAMEWORK}
|
||||||
${METALKIT_FRAMEWORK}
|
${METALKIT_FRAMEWORK}
|
||||||
${METALPERFORMANCE_FRAMEWORK}
|
${METALPERFORMANCE_FRAMEWORK}
|
||||||
)
|
)
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(GGML_SOURCES
|
set(GGML_SOURCES
|
||||||
|
@ -308,7 +302,10 @@ function(include_ggml DIRECTORY SUFFIX WITH_LLAMA)
|
||||||
${DIRECTORY}/ggml.h
|
${DIRECTORY}/ggml.h
|
||||||
${DIRECTORY}/ggml-alloc.c
|
${DIRECTORY}/ggml-alloc.c
|
||||||
${DIRECTORY}/ggml-alloc.h
|
${DIRECTORY}/ggml-alloc.h
|
||||||
${GGML_SOURCES_QUANT_K}
|
${DIRECTORY}/ggml-quants.c
|
||||||
|
${DIRECTORY}/ggml-quants.h
|
||||||
|
${DIRECTORY}/ggml-backend.c
|
||||||
|
${DIRECTORY}/ggml-backend.h}
|
||||||
${GGML_SOURCES_CUDA}
|
${GGML_SOURCES_CUDA}
|
||||||
${GGML_METAL_SOURCES}
|
${GGML_METAL_SOURCES}
|
||||||
${GGML_OPENCL_SOURCES})
|
${GGML_OPENCL_SOURCES})
|
||||||
|
|
Loading…
Add table
Reference in a new issue