mirror of
https://gitlab.com/niansa/libjustlm.git
synced 2025-03-06 20:49:17 +01:00
Initial commit
This commit is contained in:
commit
aaddcc0cbd
21 changed files with 4037 additions and 0 deletions
74
.gitignore
vendored
Normal file
74
.gitignore
vendored
Normal file
|
@ -0,0 +1,74 @@
|
|||
# This file is used to ignore files which are generated
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
*~
|
||||
*.autosave
|
||||
*.a
|
||||
*.core
|
||||
*.moc
|
||||
*.o
|
||||
*.obj
|
||||
*.orig
|
||||
*.rej
|
||||
*.so
|
||||
*.so.*
|
||||
*_pch.h.cpp
|
||||
*_resource.rc
|
||||
*.qm
|
||||
.#*
|
||||
*.*#
|
||||
core
|
||||
!core/
|
||||
tags
|
||||
.DS_Store
|
||||
.directory
|
||||
*.debug
|
||||
Makefile*
|
||||
*.prl
|
||||
*.app
|
||||
moc_*.cpp
|
||||
ui_*.h
|
||||
qrc_*.cpp
|
||||
Thumbs.db
|
||||
*.res
|
||||
*.rc
|
||||
/.qmake.cache
|
||||
/.qmake.stash
|
||||
|
||||
# qtcreator generated files
|
||||
*.pro.user*
|
||||
CMakeLists.txt.user*
|
||||
|
||||
# xemacs temporary files
|
||||
*.flc
|
||||
|
||||
# Vim temporary files
|
||||
.*.swp
|
||||
|
||||
# Visual Studio generated files
|
||||
*.ib_pdb_index
|
||||
*.idb
|
||||
*.ilk
|
||||
*.pdb
|
||||
*.sln
|
||||
*.suo
|
||||
*.vcproj
|
||||
*vcproj.*.*.user
|
||||
*.ncb
|
||||
*.sdf
|
||||
*.opensdf
|
||||
*.vcxproj
|
||||
*vcxproj.*
|
||||
|
||||
# MinGW generated files
|
||||
*.Debug
|
||||
*.Release
|
||||
|
||||
# Python byte code
|
||||
*.pyc
|
||||
|
||||
# Binaries
|
||||
# --------
|
||||
*.dll
|
||||
*.exe
|
||||
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
[submodule "llama.cpp"]
|
||||
path = llama.cpp
|
||||
url = https://github.com/ggerganov/llama.cpp.git
|
24
CMakeLists.txt
Normal file
24
CMakeLists.txt
Normal file
|
@ -0,0 +1,24 @@
|
|||
cmake_minimum_required(VERSION 3.14)
|
||||
|
||||
project(libjustlm LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
add_library(libjustlm STATIC
|
||||
libjustlm_core.cpp
|
||||
justlm.hpp
|
||||
)
|
||||
|
||||
set(LM_BACKEND "llama.cpp" CACHE STRING "The language model backend to use")
|
||||
|
||||
if (LM_BACKEND STREQUAL "libnc gpt2")
|
||||
add_library(libjustlm_gpt2 STATIC libjustlm_gpt2.cpp gpt2/arith.c gpt2/cp_utils.c gpt2/gpt2tc.c)
|
||||
target_link_libraries(libjustlm_gpt2 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/gpt2/libnc.so pthread)
|
||||
elseif (LM_BACKEND STREQUAL "llama.cpp")
|
||||
add_subdirectory(llama.cpp)
|
||||
add_library(libjustlm_llama STATIC libjustlm_llama.cpp)
|
||||
target_link_libraries(libjustlm_llama PRIVATE llama)
|
||||
else()
|
||||
message(FATAL_ERROR "LM_BACKEND '${LM_BACKEND}' is unsupported. Please use either 'libnc gpt2' or 'llama.cpp'.")
|
||||
endif()
|
1
gpt2/VERSION
Normal file
1
gpt2/VERSION
Normal file
|
@ -0,0 +1 @@
|
|||
2021-04-24
|
301
gpt2/arith.c
Normal file
301
gpt2/arith.c
Normal file
|
@ -0,0 +1,301 @@
|
|||
/*
|
||||
* Arithmetic coder
|
||||
*
|
||||
* Copyright (c) 2018-2021 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <inttypes.h>
|
||||
#include <assert.h>
|
||||
#include <time.h>
|
||||
#include <getopt.h>
|
||||
|
||||
#include "cutils.h"
|
||||
#include "arith.h"
|
||||
|
||||
#define RANGE_MIN_BITS 16
|
||||
#define RANGE_MIN ((0xff << (RANGE_MIN_BITS - 8)) + 1)
|
||||
#define RANGE_MAX (0xff << RANGE_MIN_BITS)
|
||||
|
||||
//#define DUMP_PUT_BIT
|
||||
//#define DUMP_GET_BIT
|
||||
|
||||
void put_bit_init(PutBitState *s, uint8_t *buf, int buf_size,
|
||||
PutBitWriteFunc *write_func, void *opaque)
|
||||
{
|
||||
s->low = 0;
|
||||
s->range = RANGE_MAX;
|
||||
s->current_byte = 0xff;
|
||||
s->n_bytes = 0;
|
||||
s->buf = buf;
|
||||
s->buf_size = buf_size;
|
||||
s->idx = 0;
|
||||
s->write_func = write_func;
|
||||
s->opaque = opaque;
|
||||
s->byte_count = 0;
|
||||
assert(PROB_UNIT <= RANGE_MIN);
|
||||
}
|
||||
|
||||
static void put_byte(PutBitState *s, int v)
|
||||
{
|
||||
s->buf[s->idx++] = v;
|
||||
if (unlikely(s->idx == s->buf_size)) {
|
||||
s->byte_count += s->idx;
|
||||
s->write_func(s->opaque, s->buf, s->idx);
|
||||
s->idx = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* 0 <= v <= 0x1fe. The current output stream contains n_bytes with:
|
||||
current_byte, then (n_bytes - 1) x 0xff
|
||||
*/
|
||||
static void put_val(PutBitState *s, int v)
|
||||
{
|
||||
uint32_t carry, b;
|
||||
|
||||
#ifdef DUMP_PUT_BIT
|
||||
printf(" out=%d\n", v);
|
||||
#endif
|
||||
if (v == 0xff) {
|
||||
s->n_bytes++;
|
||||
} else {
|
||||
if (s->n_bytes > 0) {
|
||||
carry = v >> 8;
|
||||
put_byte(s, s->current_byte + carry);
|
||||
b = (0xff + carry) & 0xff;
|
||||
while (s->n_bytes > 1) {
|
||||
put_byte(s, b);
|
||||
s->n_bytes--;
|
||||
}
|
||||
}
|
||||
s->n_bytes = 1;
|
||||
s->current_byte = v;
|
||||
}
|
||||
}
|
||||
|
||||
static void put_val_flush(PutBitState *s)
|
||||
{
|
||||
if (s->n_bytes > 0) {
|
||||
put_val(s, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void put_bit_renorm(PutBitState *s)
|
||||
{
|
||||
uint32_t v;
|
||||
/* after renormalisation:
|
||||
0 <= low <= RANGE_MAX
|
||||
RANGE_MIN <= range <= RANGE_MAX
|
||||
In the worst case before normalisation:
|
||||
low_max = 2 * RANGE_MAX hence v <= 0x1fe
|
||||
*/
|
||||
while (s->range < RANGE_MIN) {
|
||||
v = s->low >> RANGE_MIN_BITS;
|
||||
put_val(s, v);
|
||||
s->low = (s->low & ((1 << RANGE_MIN_BITS) - 1)) << 8;
|
||||
s->range <<= 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* 0 < prob0 < PROB_UNIT */
|
||||
void put_bit(PutBitState *s, int prob0, int bit)
|
||||
{
|
||||
int range0;
|
||||
|
||||
assert(s->range >= RANGE_MIN);
|
||||
range0 = ((uint64_t)s->range * prob0) >> PROB_UNIT_BITS;
|
||||
assert(range0 > 0);
|
||||
assert(range0 < s->range);
|
||||
#if defined(DUMP_PUT_BIT)
|
||||
{
|
||||
static int count;
|
||||
printf("%d: range=%d b=%d range0=%d low=%d\n",
|
||||
count++, s->range, bit, range0, s->low);
|
||||
}
|
||||
#endif
|
||||
if (!bit) {
|
||||
s->range = range0;
|
||||
} else {
|
||||
s->low += range0;
|
||||
s->range -= range0;
|
||||
}
|
||||
|
||||
put_bit_renorm(s);
|
||||
}
|
||||
|
||||
void put_bit_raw(PutBitState *s, int bit)
|
||||
{
|
||||
int range0;
|
||||
|
||||
assert(s->range >= RANGE_MIN);
|
||||
range0 = s->range >> 1;
|
||||
if (!bit) {
|
||||
s->range = range0;
|
||||
} else {
|
||||
s->low += range0;
|
||||
s->range -= range0;
|
||||
}
|
||||
|
||||
put_bit_renorm(s);
|
||||
}
|
||||
|
||||
/* return the minimum number of bits to be able to correctly decode */
|
||||
int64_t put_bit_flush(PutBitState *s)
|
||||
{
|
||||
int n, val, mask;
|
||||
|
||||
/* force larger range */
|
||||
if (s->range < (1 << RANGE_MIN_BITS)) {
|
||||
put_val(s, s->low >> RANGE_MIN_BITS);
|
||||
s->low = (s->low & ((1 << RANGE_MIN_BITS) - 1)) << 8;
|
||||
s->range <<= 8;
|
||||
}
|
||||
|
||||
/* largest n such as 2^n <= range */
|
||||
n = 0;
|
||||
while ((1 << (n + 1)) <= s->range)
|
||||
n++;
|
||||
assert(n >= RANGE_MIN_BITS && n <= (RANGE_MIN_BITS + 7));
|
||||
|
||||
val = s->low;
|
||||
mask = (1 << n) - 1;
|
||||
if ((val & mask) != 0)
|
||||
val = (val + (1 << n)) & ~mask;
|
||||
assert(val >= s->low && val < s->low + s->range);
|
||||
|
||||
put_val(s, val >> RANGE_MIN_BITS);
|
||||
put_val_flush(s);
|
||||
if (s->idx > 0) {
|
||||
s->byte_count += s->idx;
|
||||
s->write_func(s->opaque, s->buf, s->idx);
|
||||
s->idx = 0;
|
||||
}
|
||||
return (s->byte_count - 1) * 8 + (RANGE_MIN_BITS + 8 - n);
|
||||
}
|
||||
|
||||
/* return the approximate number of written bits */
|
||||
int64_t put_bit_get_bit_count(PutBitState *s)
|
||||
{
|
||||
int n;
|
||||
n = 0;
|
||||
while ((1 << (n + 1)) <= s->range)
|
||||
n++;
|
||||
return (s->byte_count + s->idx) * 8 + (RANGE_MIN_BITS + 7 - n);
|
||||
}
|
||||
|
||||
/****************************************/
|
||||
|
||||
static void refill(GetBitState *s)
|
||||
{
|
||||
s->range <<= 8;
|
||||
s->low <<= 8;
|
||||
if (s->idx >= s->buf_len) {
|
||||
if (!s->read_func)
|
||||
return; /* pad with zeros */
|
||||
s->buf_len = s->read_func(s->opaque, s->buf, s->buf_size);
|
||||
s->byte_count += s->buf_len;
|
||||
s->idx = 0;
|
||||
}
|
||||
#ifdef DUMP_GET_BIT
|
||||
printf(" in=%d\n", s->buf[s->idx]);
|
||||
#endif
|
||||
s->low += s->buf[s->idx++];
|
||||
}
|
||||
|
||||
void get_bit_init(GetBitState *s, uint8_t *buf, size_t buf_size,
|
||||
GetBitReadFunc *read_func, void *opaque)
|
||||
{
|
||||
int i;
|
||||
s->buf_size = buf_size;
|
||||
s->buf = buf;
|
||||
s->read_func = read_func;
|
||||
s->opaque = opaque;
|
||||
if (read_func) {
|
||||
s->buf_len = 0;
|
||||
} else {
|
||||
/* prefilled buffer */
|
||||
s->buf_len = s->buf_size;
|
||||
}
|
||||
s->byte_count = s->buf_len;
|
||||
s->range = 0;
|
||||
s->low = 0;
|
||||
s->idx = 0;
|
||||
for(i = 0; i <= RANGE_MIN_BITS; i += 8) {
|
||||
refill(s);
|
||||
}
|
||||
s->range = RANGE_MAX;
|
||||
}
|
||||
|
||||
/* 0 < prob0 < PROB_UNIT */
|
||||
int get_bit(GetBitState *s, int prob0)
|
||||
{
|
||||
int b, range0;
|
||||
|
||||
assert(s->range >= RANGE_MIN);
|
||||
range0 = ((uint64_t)s->range * prob0) >> PROB_UNIT_BITS;
|
||||
assert(range0 > 0);
|
||||
assert(range0 < s->range);
|
||||
b = s->low >= range0;
|
||||
#ifdef DUMP_GET_BIT
|
||||
{
|
||||
static int count;
|
||||
printf("%d: range=%d b=%d range0=%d low=%d\n", count++, s->range, b, range0, s->low);
|
||||
}
|
||||
#endif
|
||||
if (b) {
|
||||
s->low -= range0;
|
||||
s->range -= range0;
|
||||
} else {
|
||||
s->range = range0;
|
||||
}
|
||||
while (s->range < RANGE_MIN)
|
||||
refill(s);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* no context */
|
||||
int get_bit_raw(GetBitState *s)
|
||||
{
|
||||
int b, range0;
|
||||
range0 = s->range >> 1;
|
||||
b = s->low >= range0;
|
||||
if (b) {
|
||||
s->low -= range0;
|
||||
s->range -= range0;
|
||||
} else {
|
||||
s->range = range0;
|
||||
}
|
||||
if (s->range < RANGE_MIN)
|
||||
refill(s);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* return the approximate number of read bits */
|
||||
int64_t get_bit_get_bit_count(GetBitState *s)
|
||||
{
|
||||
int n;
|
||||
n = 0;
|
||||
while ((1 << (n + 1)) <= s->range)
|
||||
n++;
|
||||
return (s->byte_count - s->buf_len + s->idx) * 8 - n;
|
||||
}
|
73
gpt2/arith.h
Normal file
73
gpt2/arith.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Arithmetic coder
|
||||
*
|
||||
* Copyright (c) 2018-2019 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#ifndef ARITH_H
|
||||
#define ARITH_H
|
||||
|
||||
#define PROB_UNIT_BITS 15
|
||||
#define PROB_UNIT (1 << PROB_UNIT_BITS)
|
||||
|
||||
typedef void PutBitWriteFunc(void *opaque, const uint8_t *buf, size_t buf_size);
|
||||
|
||||
typedef struct {
|
||||
uint32_t range;
|
||||
uint32_t low;
|
||||
uint8_t current_byte;
|
||||
uint32_t n_bytes;
|
||||
uint8_t *buf;
|
||||
size_t buf_size;
|
||||
size_t idx; /* current position in bytes */
|
||||
PutBitWriteFunc *write_func;
|
||||
void *opaque;
|
||||
uint64_t byte_count;
|
||||
} PutBitState;
|
||||
|
||||
void put_bit_init(PutBitState *s, uint8_t *buf, int buf_size,
|
||||
PutBitWriteFunc *write_func, void *opaque);
|
||||
void put_bit(PutBitState *s, int prob0, int bit);
|
||||
void put_bit_raw(PutBitState *s, int bit);
|
||||
int64_t put_bit_flush(PutBitState *s);
|
||||
int64_t put_bit_get_bit_count(PutBitState *s);
|
||||
|
||||
/* return the number of read bytes */
|
||||
typedef ssize_t GetBitReadFunc(void *opaque, uint8_t *buf, size_t buf_size);
|
||||
|
||||
typedef struct {
|
||||
uint8_t *buf;
|
||||
int buf_len;
|
||||
int buf_size;
|
||||
int idx;
|
||||
uint32_t low;
|
||||
uint32_t range;
|
||||
GetBitReadFunc *read_func;
|
||||
void *opaque;
|
||||
uint64_t byte_count;
|
||||
} GetBitState;
|
||||
|
||||
void get_bit_init(GetBitState *s, uint8_t *buf, size_t buf_size,
|
||||
GetBitReadFunc *read_func, void *opaque);
|
||||
int get_bit(GetBitState *s, int prob0);
|
||||
int get_bit_raw(GetBitState *s);
|
||||
int64_t get_bit_get_bit_count(GetBitState *s);
|
||||
|
||||
#endif /* ARITH_H */
|
316
gpt2/cp_utils.c
Normal file
316
gpt2/cp_utils.c
Normal file
|
@ -0,0 +1,316 @@
|
|||
/*
|
||||
* Compression utilities
|
||||
*
|
||||
* Copyright (c) 2018-2019 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <inttypes.h>
|
||||
#include <assert.h>
|
||||
#include <time.h>
|
||||
#include <getopt.h>
|
||||
#include <stdarg.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/stat.h>
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#endif
|
||||
|
||||
#include "cutils.h"
|
||||
#include "libnc.h"
|
||||
#include "cp_utils.h"
|
||||
|
||||
void fatal_error(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
fprintf(stderr, "Fatal error: ");
|
||||
vfprintf(stderr, fmt, ap);
|
||||
fprintf(stderr, "\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int64_t get_time_ms(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (int64_t)tv.tv_sec * 1000 + (tv.tv_usec / 1000U);
|
||||
#else
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (int64_t)ts.tv_sec * 1000 + (ts.tv_nsec / 1000000U);
|
||||
#endif
|
||||
}
|
||||
|
||||
void fput_u8(FILE *f, uint8_t v)
|
||||
{
|
||||
fputc(v, f);
|
||||
}
|
||||
|
||||
int fget_u8(FILE *f, uint8_t *pv)
|
||||
{
|
||||
int c;
|
||||
c = fgetc(f);
|
||||
if (c < 0)
|
||||
return -1;
|
||||
*pv = c;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fput_be16(FILE *f, uint16_t v)
|
||||
{
|
||||
fputc(v >> 8, f);
|
||||
fputc(v >> 0, f);
|
||||
}
|
||||
|
||||
int fget_be16(FILE *f, uint16_t *pv)
|
||||
{
|
||||
uint8_t buf[2];
|
||||
if (fread(buf, 1, sizeof(buf), f) != sizeof(buf))
|
||||
return -1;
|
||||
*pv = (buf[0] << 8) |
|
||||
(buf[1] << 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fput_be32(FILE *f, uint32_t v)
|
||||
{
|
||||
fputc(v >> 24, f);
|
||||
fputc(v >> 16, f);
|
||||
fputc(v >> 8, f);
|
||||
fputc(v >> 0, f);
|
||||
}
|
||||
|
||||
int fget_be32(FILE *f, uint32_t *pv)
|
||||
{
|
||||
uint8_t buf[4];
|
||||
if (fread(buf, 1, sizeof(buf), f) != sizeof(buf))
|
||||
return -1;
|
||||
*pv = (buf[0] << 24) |
|
||||
(buf[1] << 16) |
|
||||
(buf[2] << 8) |
|
||||
(buf[3] << 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fput_sgd_opt(FILE *f, const SGDOptParams *p)
|
||||
{
|
||||
fput_u8(f, p->algo);
|
||||
switch(p->algo) {
|
||||
case SGD_OPT_BASIC:
|
||||
break;
|
||||
case SGD_OPT_ADAM:
|
||||
fput_f32(f, p->u.adam.beta1);
|
||||
fput_f32(f, p->u.adam.beta2);
|
||||
fput_f32(f, p->u.adam.eps);
|
||||
fput_f32(f, p->u.adam.gradient_clip);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
int fget_sgd_opt(FILE *f, SGDOptParams *p)
|
||||
{
|
||||
uint8_t v8;
|
||||
|
||||
if (fget_u8(f, &v8))
|
||||
return -1;
|
||||
p->algo = v8;
|
||||
switch(p->algo) {
|
||||
case SGD_OPT_BASIC:
|
||||
break;
|
||||
case SGD_OPT_ADAM:
|
||||
if (fget_f32(f, &p->u.adam.beta1))
|
||||
return -1;
|
||||
if (fget_f32(f, &p->u.adam.beta2))
|
||||
return -1;
|
||||
if (fget_f32(f, &p->u.adam.eps))
|
||||
return -1;
|
||||
if (fget_f32(f, &p->u.adam.gradient_clip))
|
||||
return -1;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dump_sgd_opt_params(FILE *f, const SGDOptParams *p)
|
||||
{
|
||||
switch(p->algo) {
|
||||
case SGD_OPT_BASIC:
|
||||
fprintf(f, " sgd_opt=%s",
|
||||
"none");
|
||||
break;
|
||||
case SGD_OPT_ADAM:
|
||||
fprintf(f, " sgd_opt=%s beta1=%g beta2=%g eps=%g gclip=%g",
|
||||
"adam",
|
||||
p->u.adam.beta1,
|
||||
p->u.adam.beta2,
|
||||
p->u.adam.eps,
|
||||
p->u.adam.gradient_clip);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
typedef union {
|
||||
float f;
|
||||
uint32_t u32;
|
||||
} f32;
|
||||
|
||||
void fput_f32(FILE *f, float v)
|
||||
{
|
||||
f32 u;
|
||||
u.f = v;
|
||||
fput_be32(f, u.u32);
|
||||
}
|
||||
|
||||
int fget_f32(FILE *f, float *pv)
|
||||
{
|
||||
f32 u;
|
||||
if (fget_be32(f, &u.u32))
|
||||
return -1;
|
||||
*pv = u.f;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void write_sym(PutBitState *pb, const float *prob_table, int n_symb, int sym)
|
||||
{
|
||||
int start, range, prob0, bit, range0;
|
||||
float p, p0;
|
||||
|
||||
start = 0;
|
||||
range = n_symb;
|
||||
p = 1.0; /* invariant: p=sum(prob_table[start...start + range]) */
|
||||
while (range > 1) {
|
||||
range0 = range >> 1;
|
||||
p0 = vec_sum_f32(prob_table + start, range0);
|
||||
prob0 = lrintf(p0 * PROB_UNIT / p);
|
||||
prob0 = clamp_int(prob0, 1, PROB_UNIT - 1);
|
||||
bit = sym >= (start + range0);
|
||||
put_bit(pb, prob0, bit);
|
||||
if (bit) {
|
||||
start += range0;
|
||||
range = range - range0;
|
||||
p = p - p0;
|
||||
} else {
|
||||
p = p0;
|
||||
range = range0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int read_sym(GetBitState *gb, const float *prob_table, int n_symb)
|
||||
{
|
||||
int start, range, prob0, bit, range0;
|
||||
float p, p0;
|
||||
|
||||
start = 0;
|
||||
range = n_symb;
|
||||
p = 1.0; /* invariant: p=sum(prob_table[start...start + range]) */
|
||||
while (range > 1) {
|
||||
range0 = range >> 1;
|
||||
p0 = vec_sum_f32(prob_table + start, range0);
|
||||
prob0 = lrintf(p0 * PROB_UNIT / p);
|
||||
prob0 = clamp_int(prob0, 1, PROB_UNIT - 1);
|
||||
bit = get_bit(gb, prob0);
|
||||
if (bit) {
|
||||
start += range0;
|
||||
range = range - range0;
|
||||
p = p - p0;
|
||||
} else {
|
||||
p = p0;
|
||||
range = range0;
|
||||
}
|
||||
}
|
||||
return start;
|
||||
}
|
||||
|
||||
void create_debug_dir(char *debug_dir, size_t debug_dir_size,
|
||||
const char *debug_path, const char *prefix)
|
||||
{
|
||||
char name1[1024];
|
||||
struct tm *tm;
|
||||
time_t ti;
|
||||
|
||||
snprintf(name1, sizeof(name1), "%s/%s", debug_path, prefix);
|
||||
#ifdef _WIN32
|
||||
_mkdir(name1);
|
||||
#else
|
||||
mkdir(name1, 0777);
|
||||
#endif
|
||||
|
||||
ti = time(NULL);
|
||||
tm = localtime(&ti);
|
||||
snprintf(debug_dir, debug_dir_size, "%s/%04u%02u%02u-%02u%02u%02u",
|
||||
name1,
|
||||
tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
|
||||
tm->tm_hour, tm->tm_min, tm->tm_sec);
|
||||
#ifdef _WIN32
|
||||
_mkdir(debug_dir);
|
||||
#else
|
||||
mkdir(debug_dir, 0777);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* we print at least 3 significant digits with at most 5 chars, except
|
||||
if larger than 9999T. The value is rounded to zero. */
|
||||
char *get_si_prefix(char *buf, int buf_size, uint64_t val)
|
||||
{
|
||||
static const char suffixes[4] = "kMGT";
|
||||
uint64_t base;
|
||||
int i;
|
||||
|
||||
if (val <= 999) {
|
||||
snprintf(buf, buf_size, "%" PRId64, val);
|
||||
} else {
|
||||
base = 1000;
|
||||
for(i=0;i<4;i++) {
|
||||
/* Note: we round to 0 */
|
||||
if (val < base * 10) {
|
||||
snprintf(buf, buf_size, "%0.2f%c",
|
||||
floor((val * 100.0) / base) / 100.0,
|
||||
suffixes[i]);
|
||||
break;
|
||||
} else if (val < base * 100) {
|
||||
snprintf(buf, buf_size, "%0.1f%c",
|
||||
floor((val * 10.0) / base) / 10.0,
|
||||
suffixes[i]);
|
||||
break;
|
||||
} else if (val < base * 1000 || (i == 3)) {
|
||||
snprintf(buf, buf_size,
|
||||
"%" PRId64 "%c",
|
||||
val / base,
|
||||
suffixes[i]);
|
||||
break;
|
||||
}
|
||||
base = base * 1000;
|
||||
}
|
||||
}
|
||||
return buf;
|
||||
}
|
48
gpt2/cp_utils.h
Normal file
48
gpt2/cp_utils.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Compression utilities
|
||||
*
|
||||
* Copyright (c) 2018-2019 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#include "arith.h"
|
||||
#include "libnc.h"
|
||||
|
||||
void __attribute__((noreturn, format(printf, 1, 2))) fatal_error(const char *fmt, ...);
|
||||
|
||||
int64_t get_time_ms(void);
|
||||
void fput_u8(FILE *f, uint8_t v);
|
||||
int fget_u8(FILE *f, uint8_t *pv);
|
||||
void fput_be16(FILE *f, uint16_t v);
|
||||
int fget_be16(FILE *f, uint16_t *pv);
|
||||
void fput_be32(FILE *f, uint32_t v);
|
||||
int fget_be32(FILE *f, uint32_t *pv);
|
||||
void fput_f32(FILE *f, float v);
|
||||
int fget_f32(FILE *f, float *pv);
|
||||
void fput_sgd_opt(FILE *f, const SGDOptParams *p);
|
||||
int fget_sgd_opt(FILE *f, SGDOptParams *p);
|
||||
void dump_sgd_opt_params(FILE *f, const SGDOptParams *p);
|
||||
|
||||
void write_sym(PutBitState *pb, const float *prob_table, int n_symb, int sym);
|
||||
int read_sym(GetBitState *gb, const float *prob_table, int n_symb);
|
||||
|
||||
void create_debug_dir(char *debug_dir, size_t debug_dir_size,
|
||||
const char *debug_path, const char *prefix);
|
||||
char *get_si_prefix(char *buf, int buf_size, uint64_t val);
|
||||
|
152
gpt2/cutils.h
Normal file
152
gpt2/cutils.h
Normal file
|
@ -0,0 +1,152 @@
|
|||
#ifndef CUTILS_H
|
||||
#define CUTILS_H
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#define force_inline inline __attribute__((always_inline))
|
||||
#define no_inline __attribute__((noinline))
|
||||
#define __unused __attribute__((unused))
|
||||
#define xglue(x, y) x ## y
|
||||
#define glue(x, y) xglue(x, y)
|
||||
#ifndef offsetof
|
||||
#define offsetof(type, field) ((size_t) &((type *)0)->field)
|
||||
#endif
|
||||
#define countof(x) (sizeof(x) / sizeof(x[0]))
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
#ifndef FALSE
|
||||
enum {
|
||||
FALSE = 0,
|
||||
TRUE = 1,
|
||||
};
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint16_t u16;
|
||||
} bfloat16_t;
|
||||
|
||||
#if defined(__x86_64__)
|
||||
static inline int64_t get_cycles(void)
|
||||
{
|
||||
uint32_t low,high;
|
||||
int64_t val;
|
||||
asm volatile("rdtsc" : "=a" (low), "=d" (high));
|
||||
val = high;
|
||||
val <<= 32;
|
||||
val |= low;
|
||||
return val;
|
||||
}
|
||||
#else
|
||||
static inline int64_t get_cycles(void)
|
||||
{
|
||||
int64_t val;
|
||||
asm volatile ("rdtsc" : "=A" (val));
|
||||
return val;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int max_int(int a, int b)
|
||||
{
|
||||
if (a > b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline int min_int(int a, int b)
|
||||
{
|
||||
if (a < b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline size_t max_size_t(size_t a, size_t b)
|
||||
{
|
||||
if (a > b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline size_t min_size_t(size_t a, size_t b)
|
||||
{
|
||||
if (a < b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline ssize_t max_ssize_t(ssize_t a, ssize_t b)
|
||||
{
|
||||
if (a > b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline ssize_t min_ssize_t(ssize_t a, ssize_t b)
|
||||
{
|
||||
if (a < b)
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline int clamp_int(int val, int min_val, int max_val)
|
||||
{
|
||||
if (val < min_val)
|
||||
return min_val;
|
||||
else if (val > max_val)
|
||||
return max_val;
|
||||
else
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline float clamp_float(float val, float min_val, float max_val)
|
||||
{
|
||||
if (val < min_val)
|
||||
return min_val;
|
||||
else if (val > max_val)
|
||||
return max_val;
|
||||
else
|
||||
return val;
|
||||
}
|
||||
|
||||
/* WARNING: undefined if a = 0 */
|
||||
static inline int clz32(unsigned int a)
|
||||
{
|
||||
return __builtin_clz(a);
|
||||
}
|
||||
|
||||
/* WARNING: undefined if a = 0 */
|
||||
static inline int clz64(uint64_t a)
|
||||
{
|
||||
return __builtin_clzll(a);
|
||||
}
|
||||
|
||||
static inline int floor_log2(uint64_t a)
|
||||
{
|
||||
return 63 - clz64(a);
|
||||
}
|
||||
|
||||
static inline int ceil_log2(uint64_t a)
|
||||
{
|
||||
if (a <= 1)
|
||||
return 0;
|
||||
else
|
||||
return 64 - clz64(a - 1);
|
||||
}
|
||||
|
||||
static inline float squaref(float x)
|
||||
{
|
||||
return x * x;
|
||||
}
|
||||
|
||||
#define DUP8(a) a, a, a, a, a, a, a, a
|
||||
|
||||
#endif /* CUTILS_H */
|
||||
|
2023
gpt2/gpt2tc.c
Normal file
2023
gpt2/gpt2tc.c
Normal file
File diff suppressed because it is too large
Load diff
143
gpt2/gpt2tc.h
Normal file
143
gpt2/gpt2tc.h
Normal file
|
@ -0,0 +1,143 @@
|
|||
#ifndef _GPT2TC_H
|
||||
#define _GPT2TC_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "cutils.h"
|
||||
#include "arith.h"
|
||||
#include "cp_utils.h"
|
||||
#include "list.h"
|
||||
#include "libnc.h"
|
||||
|
||||
#define MAX_INITIAL_TEXT_LEN 256 /* in symbols */
|
||||
#define MAX_OUTPUT_LEN 100
|
||||
#define DEFAULT_TOP_K 40
|
||||
#define DEFAULT_TOP_P 0.9
|
||||
#define BATCH_SIZE_MAX 16
|
||||
//#define BATCH_SIZE_MAX 1
|
||||
|
||||
|
||||
typedef uint16_t DataSymbol;
|
||||
|
||||
typedef enum {
|
||||
GPT2_MODEL_117M,
|
||||
GPT2_MODEL_345M,
|
||||
GPT2_MODEL_774M,
|
||||
GPT2_MODEL_1558M,
|
||||
} GPT2ModelEnum;
|
||||
|
||||
typedef struct {
|
||||
BOOL is_decoder;
|
||||
int n_layer;
|
||||
int d_model;
|
||||
int n_head;
|
||||
int d_key;
|
||||
int d_value;
|
||||
int d_inner;
|
||||
int n_ctx;
|
||||
int n_symbols;
|
||||
uint32_t seed;
|
||||
} TransformerModelParams;
|
||||
|
||||
typedef struct {
|
||||
NCTensor *ln_1_g, *ln_1_b;
|
||||
NCTensor *attn_w, *attn_b;
|
||||
NCTensor *attn_proj_w, *attn_proj_b;
|
||||
|
||||
NCTensor *ln_2_g, *ln_2_b;
|
||||
NCTensor *mlp_fc_w, *mlp_fc_b;
|
||||
NCTensor *mlp_proj_w, *mlp_proj_b;
|
||||
} TransformerLayer;
|
||||
|
||||
typedef struct {
|
||||
RNDState rnd_state;
|
||||
NCContext *model;
|
||||
NCDevice *device;
|
||||
int n_layer;
|
||||
int d_model;
|
||||
int n_head;
|
||||
int d_key;
|
||||
int d_value;
|
||||
int d_inner;
|
||||
int n_symbols;
|
||||
int n_ctx;
|
||||
|
||||
/* parameters */
|
||||
NCParamList param_list;
|
||||
TransformerLayer *layers;
|
||||
NCTensor *wte, *wpe, *wte_trans;
|
||||
NCTensor *ln_f_g, *ln_f_b;
|
||||
} TransformerModel;
|
||||
|
||||
typedef struct Word {
|
||||
uint32_t next; /* -1 = end */
|
||||
uint32_t len;
|
||||
uint8_t *buf;
|
||||
} Word;
|
||||
|
||||
typedef struct {
|
||||
Word *words;
|
||||
size_t word_count;
|
||||
size_t word_size;
|
||||
uint32_t *hash_table;
|
||||
int hash_size;
|
||||
int hash_bits;
|
||||
} WordList;
|
||||
|
||||
typedef struct {
|
||||
TransformerModel *trf_state;
|
||||
WordList *wl;
|
||||
} TextCompleteGlobalState;
|
||||
|
||||
typedef struct {
|
||||
struct list_head link;
|
||||
TextCompleteGlobalState *global_state;
|
||||
int top_k;
|
||||
float top_p;
|
||||
float temperature;
|
||||
RNDState rnd_state;
|
||||
NCTensor **mem_k, **mem_v;
|
||||
DataSymbol *input_buf;
|
||||
int input_buf_len;
|
||||
int text_len; /* current input text len */
|
||||
BOOL is_first;
|
||||
int last_c;
|
||||
int max_output_len;
|
||||
|
||||
/* output */
|
||||
char out_text[1024];
|
||||
int out_text_len; /* 0 means end of output */
|
||||
} TextGenContext;
|
||||
|
||||
GPT2ModelEnum parse_model(const char *str);
|
||||
void trf_set_params(TransformerModelParams *p, GPT2ModelEnum model);
|
||||
void gpt2_pp_encode(const char *word_filename, const char *in_filename, const char *out_filename);
|
||||
size_t gpt2_pp_encode_buf(WordList *s, DataSymbol **pout_buf, const uint8_t *buf, size_t buf_size);
|
||||
void gpt2_pp_decode(const char *word_filename, const char *in_filename, const char *out_filename);
|
||||
char *trim_text(const char *str);
|
||||
TextCompleteGlobalState *text_complete_global_init(GPT2ModelEnum model, const char *filename);
|
||||
void text_complete_global_end(TextCompleteGlobalState *tcs);
|
||||
TextGenContext *text_complete_start(TextCompleteGlobalState *tcs, const char *input_text, int top_k, float top_p, float temperature, int seed, int max_output_len);
|
||||
void text_complete_next(TextCompleteGlobalState *tcs, struct list_head *ts_list);
|
||||
void text_complete_end(TextGenContext *ts);
|
||||
void text_complete(GPT2ModelEnum model, const char *model_filename, const char *input_text, int top_k, float top_p, float temperature, int max_output_len, int batch_size, int seed, BOOL verbose);
|
||||
int unicode_to_utf8(uint8_t *buf, unsigned int c);
|
||||
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);
|
||||
size_t convert_to_chars(char **pout_buf, uint8_t *buf, size_t n_bits);
|
||||
ssize_t convert_from_chars(uint8_t **pout_buf, const char *str);
|
||||
int encode_length(PutBitState *pb, uint32_t val);
|
||||
int decode_length(GetBitState *gb);
|
||||
int text_decompress(TextCompleteGlobalState *tcs, char **poutput_text, const char *input_text);
|
||||
int text_compress(TextCompleteGlobalState *tcs, char **poutput_text, const char *input_text, BOOL dump_stats);
|
||||
void text_compress_test(GPT2ModelEnum model, const char *model_filename, const char *input_text, BOOL is_decode, BOOL verbose);
|
||||
int file_compress(TextCompleteGlobalState *tcs, const char *infilename, const char *outfilename);
|
||||
int file_decompress(TextCompleteGlobalState *tcs, const char *infilename, const char *outfilename);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
BIN
gpt2/gpt2vocab.txt
Normal file
BIN
gpt2/gpt2vocab.txt
Normal file
Binary file not shown.
426
gpt2/libnc.h
Normal file
426
gpt2/libnc.h
Normal file
|
@ -0,0 +1,426 @@
|
|||
/*
|
||||
* LibNC
|
||||
*
|
||||
* Copyright (c) 2018-2019 Fabrice Bellard
|
||||
*
|
||||
*/
|
||||
#ifndef LIBNC_H
|
||||
#define LIBNC_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include "cutils.h"
|
||||
#include "list.h"
|
||||
|
||||
/* profiling */
|
||||
|
||||
typedef enum {
|
||||
PROF_EVAL,
|
||||
PROF_GRAD,
|
||||
PROF_SGD,
|
||||
PROF_UPDATE,
|
||||
PROF_WRITE_SYM,
|
||||
PROF_PROBE,
|
||||
PROF_TOTAL,
|
||||
PROF_COUNT,
|
||||
} ProfEnum;
|
||||
|
||||
#ifdef PROFILE
|
||||
|
||||
extern int64_t prof_cycles[PROF_COUNT];
|
||||
extern int64_t prof_samples[PROF_COUNT];
|
||||
extern int64_t prof_ops[PROF_COUNT];
|
||||
|
||||
static inline void prof_start(int idx)
|
||||
{
|
||||
prof_cycles[idx] -= get_cycles();
|
||||
}
|
||||
|
||||
static inline void prof_end(int idx)
|
||||
{
|
||||
prof_cycles[idx] += get_cycles();
|
||||
prof_samples[idx]++;
|
||||
}
|
||||
|
||||
static inline void prof_end_ops(int idx, int n_ops)
|
||||
{
|
||||
prof_cycles[idx] += get_cycles();
|
||||
prof_ops[idx] += n_ops;
|
||||
prof_samples[idx]++;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void prof_start(int idx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void prof_end(int idx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void prof_end_ops(int idx, int n_ops)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void nc_prof_dump(void);
|
||||
|
||||
/* Automatic Differentiation Engine */
|
||||
|
||||
typedef struct NCContext NCContext;
|
||||
typedef struct NCDevice NCDevice;
|
||||
typedef struct NCTensor NCTensor;
|
||||
typedef struct NCTensorBuffer NCTensorBuffer;
|
||||
typedef struct NCNode NCNode;
|
||||
typedef struct NCRNDState NCRNDState;
|
||||
typedef struct NCSGDOptState NCSGDOptState;
|
||||
|
||||
typedef enum {
|
||||
NC_TYPE_F32,
|
||||
NC_TYPE_BF16,
|
||||
NC_TYPE_F16,
|
||||
NC_TYPE_I8,
|
||||
NC_TYPE_I16,
|
||||
NC_TYPE_I32,
|
||||
NC_TYPE_COUNT,
|
||||
} NCTypeEnum;
|
||||
|
||||
extern size_t nc_type_size_table[NC_TYPE_COUNT];
|
||||
extern const char *nc_type_name_table[NC_TYPE_COUNT];
|
||||
|
||||
#define NC_N_DIMS_MAX 4 /* maximum number of axis for tensors */
|
||||
|
||||
typedef struct NCTensorData {
|
||||
NCTypeEnum item_type;
|
||||
size_t item_size;
|
||||
void *data;
|
||||
size_t stride; /* in elements */
|
||||
size_t n_strides; /* prod(j = 1 ... n_dims, dims[j]); */
|
||||
int n_dims;
|
||||
const size_t *dims; /* n_dims length */
|
||||
const size_t *strides; /* n_dims length, strides in bytes */
|
||||
} NCTensorData;
|
||||
|
||||
void *nc_malloc(size_t size);
|
||||
void *nc_mallocz(size_t size);
|
||||
void nc_free(void *ptr);
|
||||
|
||||
NCContext *nc_context_init(int nb_threads);
|
||||
void nc_context_end(NCContext *m);
|
||||
|
||||
NCDevice *nc_new_cpu_device(NCContext *m);
|
||||
NCDevice *nc_new_cuda_device(NCContext *m, int device_index);
|
||||
NCDevice *nc_new_device(NCContext *m, const char *device_name);
|
||||
void nc_synchronize(NCDevice *d);
|
||||
|
||||
NCTensorBuffer *nc_new_tensor_buffer(NCDevice *d, size_t size);
|
||||
NCTensorBuffer *nc_dup_tensor_buffer(const NCTensorBuffer *b);
|
||||
void nc_free_tensor_buffer(NCTensorBuffer *b);
|
||||
|
||||
NCTensor *nc_new_tensor(NCDevice *d, NCTypeEnum type,
|
||||
int n_dims, const size_t *dims);
|
||||
NCTensor *nc_new_tensor_from_tensor(const NCTensor *x);
|
||||
NCTensor *nc_new_tensor_from_tensor_nz(const NCTensor *x);
|
||||
NCTensor *nc_new_scalar(NCDevice *d, NCTypeEnum type);
|
||||
NCTensor *nc_new_tensor_1d(NCDevice *d, NCTypeEnum type, size_t len);
|
||||
NCTensor *nc_new_tensor_2d(NCDevice *d, NCTypeEnum type, size_t n0, size_t n1);
|
||||
NCTensor *nc_new_tensor_3d(NCDevice *d, NCTypeEnum type,
|
||||
size_t n0, size_t n1, size_t n2);
|
||||
NCTensor *nc_new_tensor_4d(NCDevice *d, NCTypeEnum type,
|
||||
size_t n0, size_t n1, size_t n2, size_t n3);
|
||||
NCTensor *__attribute__((format(printf, 2, 3))) nc_tensor_set_name(NCTensor *x, const char *fmt, ...);
|
||||
NCTensor *nc_dup_tensor(const NCTensor *x);
|
||||
void nc_free_tensor(NCTensor *x);
|
||||
void nc_dump_tensor(const char *name, NCTensor *x, size_t n);
|
||||
uint32_t nc_tensor_get_hash(NCTensor *x);
|
||||
void nc_dump_tensor_hash(const char *name, const NCTensor *x);
|
||||
NCNode *nc_get_node(NCTensor *x);
|
||||
/* create an alias to tensor 'x1'. Gradient is not propagated thru it */
|
||||
NCTensor *nc_slice_alias(const NCTensor *x1, int axis, size_t start, size_t end);
|
||||
|
||||
NCTypeEnum nc_tensor_get_item_type(const NCTensor *x);
|
||||
NCTensorData *nc_tensor_get_data(NCTensorData *sd, const NCTensor *x);
|
||||
/* Return a pointer to the tensor data. If *pstride is non NULL,
|
||||
return the stride (in elements) of the first dimension. */
|
||||
void *nc_tensor_get_ptr(NCTensor *x, size_t *pstride);
|
||||
const size_t *nc_tensor_get_dims(const NCTensor *x, int *pn_dims);
|
||||
void nc_tensor_set_zero(NCTensor *y);
|
||||
void nc_tensor_set_f32(NCTensor *y, float val);
|
||||
NCRNDState *nc_rnd_init(NCDevice *d, uint32_t seed);
|
||||
void nc_rnd_end(NCRNDState *s);
|
||||
void nc_tensor_set_rnd_unif(NCTensor *y, float avg, float range,
|
||||
NCRNDState *rnd_state);
|
||||
void nc_tensor_set_dropout(NCTensor *y, float prob, NCRNDState *rnd_state);
|
||||
|
||||
void nc_set1_i32(NCTensor *y, int n_dims, const size_t *tab_indexes,
|
||||
int32_t val);
|
||||
void nc_set1_i32_1d(NCTensor *y, size_t i0, int32_t val);
|
||||
void nc_set1_i32_2d(NCTensor *y, size_t i0, size_t i1, int32_t val);
|
||||
void nc_set1_f32(NCTensor *y, int n_dims, const size_t *tab_indexes,
|
||||
float val);
|
||||
void nc_set1_f32_1d(NCTensor *y, size_t i0, float val);
|
||||
|
||||
int32_t nc_get1_i32(const NCTensor *x, int n_dims, const size_t *tab_indexes);
|
||||
float nc_get1_f32(const NCTensor *x, int n_dims, const size_t *tab_indexes);
|
||||
float nc_get1_f32_1d(const NCTensor *x, size_t i0);
|
||||
float nc_get_scalar_f32(const NCTensor *x);
|
||||
|
||||
void nc_tensor_copy(NCTensor *dst, NCTensor *src);
|
||||
void nc_tensor_convert(NCTensor *dst, NCTensor *src);
|
||||
|
||||
void nc_dump_dims(const char *str, NCTensor *x);
|
||||
size_t nc_get_heap_size(NCContext *m);
|
||||
NCContext *nc_get_tensor_context(const NCTensor *x);
|
||||
NCTensor *nc_tensor_to_device(NCTensor *x, NCDevice *d);
|
||||
NCTensor *nc_tensor_to_cpu_device(NCTensor *x);
|
||||
NCDevice *nc_get_tensor_device(const NCTensor *x);
|
||||
|
||||
/* element wise operations */
|
||||
NCTensor *nc_convert(NCTensor *x, NCTypeEnum new_type);
|
||||
NCTensor *nc_add(NCTensor *x1, NCTensor *x2);
|
||||
NCTensor *nc_neg(NCTensor *x);
|
||||
NCTensor *nc_sub(NCTensor *x1, NCTensor *x2);
|
||||
NCTensor *nc_mul(NCTensor *x1, NCTensor *x2);
|
||||
NCTensor *nc_div(NCTensor *x1, NCTensor *x2);
|
||||
NCTensor *nc_recip(NCTensor *x);
|
||||
NCTensor *nc_min(NCTensor *x1, NCTensor *x2);
|
||||
NCTensor *nc_max(NCTensor *x1, NCTensor *x2);
|
||||
/* select x1[i] if z[i] = 0 and x2[i] otherwise */
|
||||
NCTensor *nc_select(NCTensor *z, NCTensor *x1, NCTensor *x2);
|
||||
/* set y[i] = x1[i] if mask[i] = 0 and y[i] = c if mask[i] != 0. If
|
||||
mask_inv is TRUE, 'mask' is inverted */
|
||||
NCTensor *nc_masked_fill(NCTensor *x, NCTensor *mask, float c, BOOL mask_inv);
|
||||
NCTensor *nc_sigmoid(NCTensor *x);
|
||||
NCTensor *nc_tanh(NCTensor *x);
|
||||
NCTensor *nc_relu(NCTensor *x);
|
||||
NCTensor *nc_gelu(NCTensor *x);
|
||||
NCTensor *nc_log(NCTensor *x);
|
||||
/* return cp * fg + min(1 - fg, ig) * in */
|
||||
NCTensor *nc_lstm_clamped(NCTensor *cp, NCTensor *in,
|
||||
NCTensor *fg, NCTensor *ig);
|
||||
/* return a * (1 - t) + b * t */
|
||||
NCTensor *nc_lerp(NCTensor *a, NCTensor *b, NCTensor *t);
|
||||
|
||||
/* other operations */
|
||||
NCTensor *nc_new_vec_f32(NCDevice *d, size_t n, float val);
|
||||
NCTensor *nc_new_f32(NCDevice *d, float val);
|
||||
NCTensor *nc_reshape(NCTensor *x, int n_dims, const size_t *dims);
|
||||
NCTensor *nc_reshape_1d(NCTensor *x, size_t n0);
|
||||
NCTensor *nc_reshape_2d(NCTensor *x, size_t n0, size_t n1);
|
||||
NCTensor *nc_reshape_3d(NCTensor *x, size_t n0, size_t n1, size_t n2);
|
||||
NCTensor *nc_reshape_4d(NCTensor *x, size_t n0, size_t n1, size_t n2,
|
||||
size_t n3);
|
||||
/* duplicate the tensor by adding n_dims dimensions */
|
||||
NCTensor *nc_repeat(NCTensor *x, int n_dims, const size_t *dims);
|
||||
NCTensor *nc_repeat_1d(NCTensor *x, size_t n);
|
||||
/* return y0 + sum over the dimensions > n_dims of 'x'. y0 = NULL
|
||||
is supported */
|
||||
NCTensor *nc_reduce_sum(NCTensor *y0, NCTensor *x, int n_dims);
|
||||
/* sum all the elements of a tensor */
|
||||
NCTensor *nc_sum(NCTensor *x);
|
||||
/* sum of squares */
|
||||
NCTensor *nc_reduce_sum_sqr(NCTensor *x);
|
||||
NCTensor *nc_slice(NCTensor *x, int axis, size_t start, size_t end);
|
||||
NCTensor *nc_slice_add(NCTensor *y0, NCTensor *x, int axis, size_t start);
|
||||
/* concatenation along axis 'axis' */
|
||||
NCTensor *nc_concat(NCTensor **inputs, int n_inputs, int axis);
|
||||
/* shortcut for axis = 0 */
|
||||
NCTensor *nc_vconcat(NCTensor **inputs, int n_inputs);
|
||||
/* shortcut for axis = 1 */
|
||||
NCTensor *nc_hconcat(NCTensor **inputs, int n_inputs);
|
||||
/* split along axis 'axis'. If tab_size = NULL, split equally. */
|
||||
void nc_split(NCTensor **tab_y, NCTensor *x, int n_outputs,
|
||||
const size_t *tab_size, int axis);
|
||||
/* shortcut for axis = 0 */
|
||||
void nc_vsplit(NCTensor **tab_y, NCTensor *x, int n_outputs,
|
||||
const size_t *tab_size);
|
||||
/* shortcut for axis = 1 */
|
||||
void nc_hsplit(NCTensor **tab_y, NCTensor *x, int n_outputs,
|
||||
const size_t *tab_size);
|
||||
|
||||
typedef enum {
|
||||
NC_PAD_ZERO,
|
||||
NC_PAD_DUP, /* duplicate element */
|
||||
/* trim types, dual to padding */
|
||||
NC_TRIM_NORMAL = NC_PAD_ZERO,
|
||||
NC_TRIM_SUM, /* add trimmed elements to the edge */
|
||||
} NCPadEnum;
|
||||
|
||||
/* pad (len > 0) or trim (len < 0) the axis 0 of 'x' */
|
||||
NCTensor *nc_pad(NCTensor *x, ssize_t left_len, NCPadEnum left_op,
|
||||
ssize_t right_len, NCPadEnum right_op);
|
||||
/* shortcut to nc_pad() */
|
||||
NCTensor *nc_resize(NCTensor *x, size_t n);
|
||||
|
||||
/* if x is not contiguous then create a new contiguous tensor and copy
|
||||
x to it. Otherwise, return 'x'. */
|
||||
NCTensor *nc_make_contiguous(NCTensor *x);
|
||||
/* Return a new tensor sharing the same buffer as 'x' with the permuted
|
||||
dimensions. axis[i] is the corresponding axis in 'x' */
|
||||
NCTensor *nc_permute_alias(NCTensor *x, int n_dims, const int *axis);
|
||||
/* same as nc_permute_alias but calls nc_make_contiguous after. */
|
||||
NCTensor *nc_permute(NCTensor *x, int n_dims, const int *axis);
|
||||
/* special case of nc_permute() */
|
||||
NCTensor *nc_transpose(NCTensor *x);
|
||||
NCTensor *nc_matmul(NCTensor *w, NCTensor *x);
|
||||
/* return w*x + y0. w and x can be optionally transposed. y0 can be NULL */
|
||||
NCTensor *nc_matmul_add(NCTensor *w, NCTensor *x, NCTensor *y0,
|
||||
BOOL w_trans, BOOL x_trans);
|
||||
NCTensor *nc_matmul_stride(NCTensor *w, NCTensor *x);
|
||||
/* return a matrix where each column is the column x[i] of matrix 'w' */
|
||||
NCTensor *nc_get_col(NCTensor *w, NCTensor *x);
|
||||
/* add the vectors 'z' at column number 'x' in matrix 'w'. */
|
||||
NCTensor *nc_add_col(NCTensor *z, NCTensor *x, NCTensor *w);
|
||||
/* select the x-th element in each column of 'w' */
|
||||
NCTensor *nc_get_element(NCTensor *w, NCTensor *x);
|
||||
/* add z to the x-th element in each column of 'w' */
|
||||
NCTensor *nc_add_element(NCTensor *z, NCTensor *x, NCTensor *w);
|
||||
NCTensor *nc_soft_max(NCTensor *x);
|
||||
/* Equivalent to y = log(get_element(x, eout)). It is expected to be
|
||||
used as nc_index_log(nc_soft_max(x), eout) so that the gradient
|
||||
computation is optimized. */
|
||||
NCTensor *nc_indexed_log(NCTensor *x, NCTensor *eout);
|
||||
NCTensor *nc_layer_norm(NCTensor *x, float eps);
|
||||
NCTensor *nc_rms_norm(NCTensor *x, float eps);
|
||||
NCTensor *nc_slt_mat_set(NCTensor *x, size_t pos, float c);
|
||||
/* shift the column 'i' by 'pos + i * mult' elements and pad with with zeros */
|
||||
NCTensor *nc_rel_shift(NCTensor *x, ssize_t pos, ssize_t mult);
|
||||
|
||||
/* auto differentiation */
|
||||
|
||||
/* get_col_index is non NULL in the sparse gradient case */
|
||||
typedef void NCParamUpdateFunc(void *opaque, NCTensor *grad,
|
||||
NCTensor *get_col_index);
|
||||
|
||||
/* add a 'parameter' graph node to 'x' and return 'x'. */
|
||||
NCTensor *nc_set_param(NCTensor *x, void *opaque);
|
||||
/* return a new tensor with its graph removed */
|
||||
NCTensor *nc_stop_grad(NCTensor *x);
|
||||
|
||||
/* manipulation of graph nodes */
|
||||
NCNode *nc_dup_node(const NCNode *n);
|
||||
void nc_free_node(NCNode *n);
|
||||
void nc_combine_nodes(NCContext *m, NCNode **tab_op1, int count,
|
||||
int axis, int elem_size, const size_t *tab_elem_size);
|
||||
NCNode *nc_concat_node(NCContext *m, NCNode **inputs, int count,
|
||||
int axis, const size_t *tab_size);
|
||||
void nc_concat_optimization(NCContext *m, NCNode **concat_nodes, int count);
|
||||
void nc_node_set_parent(NCNode *n, int arg_index, const NCNode *n1);
|
||||
void nc_node_set_arg(NCNode *n, int arg_index, const NCTensor *x);
|
||||
|
||||
#define NC_BW_KEEP_GRAD_GRAPH (1 << 0)
|
||||
/* optimize the nc_get_col() gradient */
|
||||
#define NC_BW_SPARSE_GRAD (1 << 1)
|
||||
|
||||
void nc_backward(const NCTensor *x, NCTensor *grad,
|
||||
NCParamUpdateFunc *param_update_func, int flags);
|
||||
void nc_dump_graph(NCTensor *x);
|
||||
|
||||
/* utilities for function parameters */
|
||||
|
||||
typedef struct {
|
||||
struct list_head link;
|
||||
NCTensor **pval; /* pointer to the tensor location */
|
||||
char *name; /* parameter name */
|
||||
NCTensor *low_part; /* if BF16 parameter, additional 16 bit precision */
|
||||
NCTensor *saved_grad; /* debug */
|
||||
/* SGD opt data */
|
||||
struct SGDOptVarState *sgd_opt;
|
||||
} NCParam;
|
||||
|
||||
typedef struct {
|
||||
struct list_head param_list;
|
||||
BOOL add_graph;
|
||||
} NCParamList;
|
||||
|
||||
void nc_param_list_init(NCParamList *pl);
|
||||
void nc_param_list_set_graph(NCParamList *pl, BOOL add_graph);
|
||||
NCParam *nc_new_param_str(NCParamList *pl, NCTensor **pval, const char *str);
|
||||
__attribute__((format(printf, 3, 4))) NCParam *nc_new_param(NCParamList *pl, NCTensor **pval, const char *fmt, ...);
|
||||
void nc_param_list_end(NCParamList *pl);
|
||||
|
||||
NCParam *nc_find_param(NCParamList *pl, const char *name);
|
||||
size_t nc_get_param_count(NCParamList *pl);
|
||||
void nc_save_coefs(NCParamList *pl, const char *filename);
|
||||
void nc_load_coefs(NCParamList *pl, const char *filename);
|
||||
void nc_save_state(NCParamList *pl, const char *filename);
|
||||
void nc_load_state(NCParamList *pl, const char *filename);
|
||||
|
||||
/* SGD optimizer */
|
||||
|
||||
typedef enum {
|
||||
SGD_OPT_BASIC,
|
||||
SGD_OPT_ADAM,
|
||||
SGD_OPT_TEST,
|
||||
} SGDOptAlgoEnum;
|
||||
|
||||
typedef struct {
|
||||
SGDOptAlgoEnum algo;
|
||||
union {
|
||||
struct {
|
||||
float beta1;
|
||||
float beta2;
|
||||
float eps;
|
||||
float gradient_clip; /* if != 0, per parameter gradient clipping */
|
||||
} adam;
|
||||
} u;
|
||||
float lr;
|
||||
} SGDOptParams;
|
||||
|
||||
NCSGDOptState *nc_sgd_opt_init(NCContext *m, const SGDOptParams *p);
|
||||
void nc_sgd_opt_end(NCSGDOptState *s);
|
||||
void sgd_opt_update_var(void *opaque, NCTensor *yg, NCTensor *get_col_index);
|
||||
|
||||
/* set the SGD optimizer 's' to all parameters of the model */
|
||||
void nc_sgd_opt_set_all(NCParamList *param_list, NCSGDOptState *s);
|
||||
|
||||
/* set the SGD optimizer 's' to the variable 'x'. Remove it if s = NULL */
|
||||
void nc_sgd_opt_set(NCParam *x, NCSGDOptState *s);
|
||||
void nc_sgd_opt_update(NCSGDOptState *s);
|
||||
/* force the learning rate */
|
||||
void nc_sgd_opt_set_lr(NCSGDOptState *s, float lr);
|
||||
float nc_sgd_opt_get_lr(NCSGDOptState *s);
|
||||
|
||||
/* for SGD_OPT_TEST */
|
||||
NCTensor *nc_sgd_opt_get_grad(NCParam *p);
|
||||
|
||||
/* misc utilities (to be removed) */
|
||||
|
||||
typedef struct {
|
||||
uint32_t seed;
|
||||
/* used by Gaussian generator */
|
||||
int idx;
|
||||
float y1;
|
||||
} RNDState;
|
||||
|
||||
typedef struct {
|
||||
uint16_t u16;
|
||||
} nc_float16_t;
|
||||
|
||||
void rnd_init(RNDState *s, uint32_t seed);
|
||||
uint32_t rnd_unif_u32(RNDState *s);
|
||||
float rnd_unif(RNDState *s);
|
||||
void rnd_unif_vec(float *tab, size_t n, float mu, float range,
|
||||
RNDState *s);
|
||||
void rnd_unif_mat(float *tab, size_t stride, size_t h, size_t w,
|
||||
float mu, float sigma, RNDState *s);
|
||||
|
||||
float vec_sum_f32(const float *tab, size_t n);
|
||||
|
||||
typedef struct {
|
||||
float val;
|
||||
uint32_t idx;
|
||||
} NCTopKEntry;
|
||||
|
||||
/* Return the k largest values among prob[0...n_symb-1] such that k is
|
||||
the largest value such that k <= topk and sum(i=0 .. k - 2,
|
||||
prob[tab[i]]) < topp.
|
||||
|
||||
It is assumed that prob[i] >= 0. The function returns (k, tab,
|
||||
sum). 'sum' is the sum of the k returned values. 'tab' must be
|
||||
freed with nc_free(). */
|
||||
int nc_topk(NCTopKEntry **ptab, double *psum,
|
||||
const float *prob, size_t n, int topk, float topp);
|
||||
|
||||
#endif /* LIBNC_H */
|
96
gpt2/list.h
Normal file
96
gpt2/list.h
Normal file
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Linux klist like system
|
||||
*
|
||||
* Copyright (c) 2016-2017 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#ifndef LIST_H
|
||||
#define LIST_H
|
||||
|
||||
struct list_head {
|
||||
struct list_head *prev;
|
||||
struct list_head *next;
|
||||
};
|
||||
|
||||
#define LIST_HEAD_INIT(el) { &(el), &(el) }
|
||||
|
||||
/* return the pointer of type 'type *' containing 'el' as field 'member' */
|
||||
#define list_entry(el, type, member) \
|
||||
((type *)((uint8_t *)(el) - offsetof(type, member)))
|
||||
|
||||
static inline void init_list_head(struct list_head *head)
|
||||
{
|
||||
head->prev = head;
|
||||
head->next = head;
|
||||
}
|
||||
|
||||
/* insert 'el' between 'prev' and 'next' */
|
||||
static inline void __list_add(struct list_head *el,
|
||||
struct list_head *prev, struct list_head *next)
|
||||
{
|
||||
prev->next = el;
|
||||
el->prev = prev;
|
||||
el->next = next;
|
||||
next->prev = el;
|
||||
}
|
||||
|
||||
/* add 'el' at the head of the list 'head' (= after element head) */
|
||||
static inline void list_add(struct list_head *el, struct list_head *head)
|
||||
{
|
||||
__list_add(el, head, head->next);
|
||||
}
|
||||
|
||||
/* add 'el' at the end of the list 'head' (= before element head) */
|
||||
static inline void list_add_tail(struct list_head *el, struct list_head *head)
|
||||
{
|
||||
__list_add(el, head->prev, head);
|
||||
}
|
||||
|
||||
static inline void list_del(struct list_head *el)
|
||||
{
|
||||
struct list_head *prev, *next;
|
||||
prev = el->prev;
|
||||
next = el->next;
|
||||
prev->next = next;
|
||||
next->prev = prev;
|
||||
el->prev = NULL; /* fail safe */
|
||||
el->next = NULL; /* fail safe */
|
||||
}
|
||||
|
||||
static inline int list_empty(struct list_head *el)
|
||||
{
|
||||
return el->next == el;
|
||||
}
|
||||
|
||||
#define list_for_each(el, head) \
|
||||
for(el = (head)->next; el != (head); el = el->next)
|
||||
|
||||
#define list_for_each_safe(el, el1, head) \
|
||||
for(el = (head)->next, el1 = el->next; el != (head); \
|
||||
el = el1, el1 = el->next)
|
||||
|
||||
#define list_for_each_prev(el, head) \
|
||||
for(el = (head)->prev; el != (head); el = el->prev)
|
||||
|
||||
#define list_for_each_prev_safe(el, el1, head) \
|
||||
for(el = (head)->prev, el1 = el->prev; el != (head); \
|
||||
el = el1, el1 = el->prev)
|
||||
|
||||
#endif /* LIST_H */
|
86
gpt2/readme.txt
Normal file
86
gpt2/readme.txt
Normal file
|
@ -0,0 +1,86 @@
|
|||
GPT-2 text completion and compression demo
|
||||
==========================================
|
||||
|
||||
1) Usage
|
||||
--------
|
||||
|
||||
Extract the 117M GPT-2 model to the gpt2tc directory:
|
||||
|
||||
tar xtf gpt2tc-117M.tar.gz
|
||||
|
||||
Text completion example:
|
||||
|
||||
./gpt2tc g "Hello, my name is"
|
||||
|
||||
Use more CPU cores (only faster on server CPUs):
|
||||
|
||||
./gpt2tc -T 4 g "Hello, my name is"
|
||||
|
||||
Short Text compression and decompression example:
|
||||
|
||||
./gpt2tc cs "Hello, how are you ?"
|
||||
|
||||
./gpt2tc ds "姯敳痪"
|
||||
|
||||
Text compression example:
|
||||
|
||||
./gpt2tc c in.txt out.bin
|
||||
|
||||
Decompression:
|
||||
|
||||
./gpt2tc d out.bin out.txt
|
||||
|
||||
2) Using larger models
|
||||
----------------------
|
||||
|
||||
The smallest GPT-2 model (117M) is provided in a separate
|
||||
archive. Larger models can be built by downloading the TensorFlow
|
||||
parameters and converting them with the attached script. Example:
|
||||
|
||||
# download the model to models/345M
|
||||
./download_model.sh 345M
|
||||
|
||||
# convert it to the gpt2tc format:
|
||||
python3 gpt2convert.py models/345M gpt2_345M.bin
|
||||
|
||||
# use it
|
||||
./gpt2tc -m 345M g "Hello, how are you ?"
|
||||
|
||||
3) Compression results
|
||||
----------------------
|
||||
|
||||
File Model Original size Compr. size Ratio CMIX v18
|
||||
#params (bytes) (bytes) (bpb) ratio (bpb)
|
||||
book1 117M 768771 152283 1.58 1.82
|
||||
book1 345M 768771 142183 1.48
|
||||
book1 774M 768771 137562 1.43
|
||||
book1 1558M 768771 134217 1.40
|
||||
|
||||
alice29.txt 117M 152089 23615 1.24 1.65
|
||||
alice29.txt 345M 152089 20587 1.08
|
||||
alice29.txt 774M 152089 19096 1.00
|
||||
alice29.txt 1558M 152089 17382 0.91
|
||||
|
||||
enwik5 117M 100000 14875 1.19 1.60
|
||||
enwik5 345M 100000 13511 1.08
|
||||
enwik5 774M 100000 13240 1.06
|
||||
enwik5 1558M 100000 12918 1.03
|
||||
|
||||
Notes:
|
||||
- book1 comes from the Calgary corpus.
|
||||
- alice29.txt comes from the Canterbury corpus.
|
||||
- enwik5 contains the first 100000 bytes of the English
|
||||
Wikipedia dump of March 3, 2006
|
||||
(http://mattmahoney.net/dc/textdata.html).
|
||||
- For best performance, use the UTF-8 encoding and don't mix CRLF and
|
||||
LF line breaks.
|
||||
- For reference, the results of CMIX
|
||||
(http://www.byronknoll.com/cmix.html) are provided.
|
||||
|
||||
4) More information
|
||||
-------------------
|
||||
|
||||
This demo has no external dependency. It is written in C and uses the
|
||||
LibNC library for tensor manipulation. The CPU must support AVX2.
|
||||
|
||||
A similar program is used for http://textsynth.org/
|
54
justlm.hpp
Normal file
54
justlm.hpp
Normal file
|
@ -0,0 +1,54 @@
|
|||
#ifndef LLM_H
|
||||
#define LLM_H
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
|
||||
|
||||
class LLM {
|
||||
struct {
|
||||
int32_t seed; // RNG seed
|
||||
int32_t n_threads = static_cast<int32_t>(std::thread::hardware_concurrency()) / 2;
|
||||
union {
|
||||
int32_t n_ctx; // Context size, llama.cpp specific
|
||||
int32_t n_prompt = -1; // Prompt size, gpt2 specific
|
||||
};
|
||||
int32_t n_batch = 8; // Batch size, unused
|
||||
|
||||
int32_t top_k = 40;
|
||||
float top_p = 0.5f;
|
||||
float temp = 0.72f;
|
||||
} params;
|
||||
|
||||
struct State *state;
|
||||
|
||||
void init(const std::string& weights_path);
|
||||
|
||||
static
|
||||
bool ends_with(std::string_view str, std::string_view suffix);
|
||||
|
||||
public:
|
||||
struct Exception : public std::runtime_error {
|
||||
using std::runtime_error::runtime_error;
|
||||
};
|
||||
struct ContextLengthException : public Exception {
|
||||
ContextLengthException() : Exception("Max. context length exceeded") {}
|
||||
};
|
||||
|
||||
LLM(const std::string& weights_path, int32_t seed = 0) {
|
||||
// Set random seed
|
||||
params.seed = seed?seed:time(NULL);
|
||||
|
||||
// Initialize llm
|
||||
init(weights_path);
|
||||
}
|
||||
~LLM();
|
||||
|
||||
void append(std::string_view prompt, const std::function<bool (float progress)>& on_tick = nullptr);
|
||||
|
||||
std::string run(std::string_view end, const std::function<bool (const char *generated)>& on_tick = nullptr);
|
||||
};
|
||||
#endif // LLM_H
|
9
libjustlm_core.cpp
Normal file
9
libjustlm_core.cpp
Normal file
|
@ -0,0 +1,9 @@
|
|||
#include "justlm.hpp"
|
||||
|
||||
#include <string_view>
|
||||
|
||||
|
||||
|
||||
bool LLM::ends_with(std::string_view str, std::string_view suffix) {
|
||||
return str.size() >= suffix.size() && 0 == str.compare(str.size()-suffix.size(), suffix.size(), suffix);
|
||||
}
|
80
libjustlm_gpt2.cpp
Normal file
80
libjustlm_gpt2.cpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
#include "justlm.hpp"
|
||||
#include "gpt2/gpt2tc.h"
|
||||
|
||||
#include <filesystem>
|
||||
#include <cstring>
|
||||
|
||||
|
||||
struct State {
|
||||
std::string prompt;
|
||||
std::string model_path;
|
||||
GPT2ModelEnum model;
|
||||
} state;
|
||||
|
||||
|
||||
|
||||
void LLM::init(const std::string& weights_path) {
|
||||
state->model_path = weights_path;
|
||||
// Get weight file size
|
||||
auto weights_size = std::filesystem::file_size(weights_path);
|
||||
// Determine weight size
|
||||
switch (weights_size) {
|
||||
case 250700242: state->model = GPT2_MODEL_117M; break;
|
||||
case 3120522738: state->model = GPT2_MODEL_1558M; break;
|
||||
case 712396722: state->model = GPT2_MODEL_345M; break;
|
||||
case 1551900050: state->model = GPT2_MODEL_774M; break;
|
||||
default: throw Exception("Unknown model size");
|
||||
}
|
||||
}
|
||||
|
||||
LLM::~LLM() {
|
||||
delete state;
|
||||
}
|
||||
|
||||
void LLM::append(std::string_view prompt, const std::function<bool (float)> &on_tick) {
|
||||
state->prompt.append(prompt);
|
||||
std::cout << prompt << std::endl;
|
||||
}
|
||||
|
||||
std::string LLM::run(std::string_view end, const std::function<bool (const char *)> &on_tick) {
|
||||
std::string fres;
|
||||
TextCompleteGlobalState *tcs;
|
||||
TextGenContext *ts;
|
||||
int count;
|
||||
struct timeval tv;
|
||||
struct list_head ts_list;
|
||||
|
||||
// Initialize completion
|
||||
tcs = text_complete_global_init(state->model, state->model_path.c_str());
|
||||
|
||||
// Run completion
|
||||
ts = text_complete_start(tcs, state->prompt.c_str(), params.top_k, params.top_p, params.temp,
|
||||
params.seed, params.n_prompt>0?params.n_prompt:0xfffffff - state->prompt.size());
|
||||
bool abort = false;
|
||||
while (!abort && !ends_with(fres, end)) {
|
||||
// Run completion
|
||||
init_list_head(&ts_list);
|
||||
list_add_tail(&ts->link, &ts_list);
|
||||
text_complete_next(tcs, &ts_list);
|
||||
if (ts->out_text_len == 0)
|
||||
break;
|
||||
auto str = std::string_view{ts->out_text, static_cast<std::string_view::size_type>(ts->out_text_len)};
|
||||
|
||||
// Append result to fres
|
||||
fres.append(str);
|
||||
|
||||
// Tick
|
||||
if (on_tick && !on_tick(std::string(str).c_str()) /*Huge overhead in favor of llama.cpp*/) abort = true;
|
||||
}
|
||||
// End completion
|
||||
text_complete_end(ts);
|
||||
|
||||
text_complete_global_end(tcs);
|
||||
|
||||
// Create final string TODO: Could be optimized
|
||||
state->prompt.append(fres);
|
||||
fres = std::string(fres.data(), fres.size()-end.size());
|
||||
|
||||
// Return final string
|
||||
return fres;
|
||||
}
|
115
libjustlm_llama.cpp
Normal file
115
libjustlm_llama.cpp
Normal file
|
@ -0,0 +1,115 @@
|
|||
#include "justlm.hpp"
|
||||
|
||||
#include <ggml.h>
|
||||
#include <llama.h>
|
||||
|
||||
|
||||
struct State {
|
||||
llama_context *ctx = nullptr;
|
||||
std::string prompt;
|
||||
std::vector<int> embd;
|
||||
int n_ctx;
|
||||
std::string last_result;
|
||||
} state;
|
||||
|
||||
|
||||
|
||||
void LLM::init(const std::string& weights_path) {
|
||||
// Allocate state
|
||||
state = new State;
|
||||
|
||||
// Get llama parameters
|
||||
auto lparams = llama_context_default_params();
|
||||
lparams.seed = params.seed;
|
||||
lparams.n_ctx = params.n_ctx>0?params.n_ctx:2024;
|
||||
|
||||
// Create context
|
||||
state->ctx = llama_init_from_file(weights_path.c_str(), lparams);
|
||||
if (!state->ctx) {
|
||||
throw Exception("Failed to initialize llama from file");
|
||||
}
|
||||
|
||||
// Initialize some variables
|
||||
state->n_ctx = llama_n_ctx(state->ctx);
|
||||
}
|
||||
|
||||
LLM::~LLM() {
|
||||
if (state->ctx) llama_free(state->ctx);
|
||||
delete state;
|
||||
}
|
||||
|
||||
void LLM::append(std::string_view prompt, const std::function<bool (float)> &on_tick) {
|
||||
// Check if prompt was empty
|
||||
const bool was_empty = state->prompt.empty();
|
||||
|
||||
// Append to current prompt
|
||||
state->prompt.append(prompt);
|
||||
|
||||
// Resize buffer for tokens
|
||||
const auto old_token_count = state->embd.size();
|
||||
state->embd.resize(old_token_count+state->prompt.size()+1);
|
||||
|
||||
// Run tokenizer
|
||||
const auto token_count = llama_tokenize(state->ctx, prompt.data(), state->embd.data()+old_token_count, state->embd.size()-old_token_count, was_empty);
|
||||
state->embd.resize(old_token_count+token_count);
|
||||
|
||||
// Make sure limit is far from being hit
|
||||
if (state->embd.size() > state->n_ctx-6) {
|
||||
// Yup. *this MUST be decomposed now.
|
||||
throw ContextLengthException();
|
||||
}
|
||||
|
||||
// Evaluate new tokens
|
||||
// TODO: Larger batch size
|
||||
std::cout << "Context size: " << old_token_count << '+' << token_count << '=' << state->embd.size() << '/' << state->n_ctx << std::endl;
|
||||
for (int it = old_token_count; it != state->embd.size(); it++) {
|
||||
std::cout << llama_token_to_str(state->ctx, state->embd.data()[it]) << std::flush;
|
||||
llama_eval(state->ctx, state->embd.data()+it, 1, it, params.n_threads);
|
||||
|
||||
// Tick
|
||||
if (on_tick) {
|
||||
// Calculate progress
|
||||
auto progress = float(it-old_token_count) / (state->embd.size()-old_token_count) * 100.f;
|
||||
// Run callback
|
||||
if (!on_tick(progress)) break;
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
std::string LLM::run(std::string_view end, const std::function<bool (const char *)> &on_tick) {
|
||||
std::string fres;
|
||||
|
||||
// Loop until done
|
||||
bool abort = false;
|
||||
while (!abort && !ends_with(fres, end)) {
|
||||
// Sample top p and top k
|
||||
const auto id = llama_sample_top_p_top_k(state->ctx, nullptr, 0, params.top_k, params.top_p, params.temp, 1.0f);
|
||||
|
||||
// Add token
|
||||
state->embd.push_back(id);
|
||||
|
||||
// Get token as string
|
||||
const auto str = llama_token_to_str(state->ctx, id);
|
||||
|
||||
// Debug
|
||||
std::cout << str << std::flush;
|
||||
|
||||
// Append string to function result
|
||||
fres.append(str);
|
||||
|
||||
// Evaluate token
|
||||
// TODO: Respect batch size
|
||||
llama_eval(state->ctx, state->embd.data()+state->embd.size()-1, 1, state->embd.size()-1, params.n_threads);
|
||||
|
||||
// Tick
|
||||
if (on_tick && !on_tick(str)) abort = true;
|
||||
}
|
||||
|
||||
// Create final string TODO: Could be optimized
|
||||
state->prompt.append(fres);
|
||||
fres = std::string(fres.data(), fres.size()-end.size());
|
||||
|
||||
// Return final string
|
||||
return fres;
|
||||
}
|
1
llama.cpp
Submodule
1
llama.cpp
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 9cbc404ba6699a9ba4925ea25a60552b13491c7a
|
12
test.cpp
Normal file
12
test.cpp
Normal file
|
@ -0,0 +1,12 @@
|
|||
#include "ai.hpp"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
|
||||
|
||||
int main() {
|
||||
Ai ai;
|
||||
std::cout << "Completing \"she replied that\"..." << std::endl;
|
||||
std::cout << "Using model " << ai.model_name << "..." << std::endl;
|
||||
std::cout << "> she replied that" << ai.complete("she replied that", '\n') << std::endl;
|
||||
}
|
Loading…
Add table
Reference in a new issue