1
0
Fork 0
mirror of https://gitlab.com/niansa/discord_llama.git synced 2025-03-06 20:48:25 +01:00

Don't regenerate init_cache each time

This commit is contained in:
niansa 2023-04-23 20:43:23 +02:00
parent cd2b7183a8
commit 06b815db08

View file

@ -11,6 +11,7 @@
#include <array>
#include <vector>
#include <unordered_map>
#include <filesystem>
#include <sstream>
#include <mutex>
#include <memory>
@ -213,7 +214,8 @@ class Bot {
texts.timeout = llm_translate_from_en(texts.timeout);
texts.translated = true;
}
// Inference for init cache TODO: Don't recreate on each startup
// Inference for init cache
if (!std::filesystem::exists("init_cache")) {
LM::Inference llm(config.inference_model, llm_get_params());
std::ofstream f("init_cache", std::ios::binary);
// Add initial context
@ -235,6 +237,7 @@ class Bot {
// Serialize end result
llm.serialize(f);
}
}
// Must run in llama thread
void prompt_add_msg(const dpp::message& msg) {
ENSURE_LLM_THREAD();