A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/gptlang/lua-tiktoken/commit/1b9baba6120aea47135d6e2478478568f81edb22 below:

invalid utf-8 parsing · gptlang/lua-tiktoken@1b9baba · GitHub

File tree Expand file treeCollapse file tree 2 files changed

+7

-6

lines changed

Filter options

Expand file treeCollapse file tree 2 files changed

+7

-6

lines changed Original file line number Diff line number Diff line change

@@ -1,3 +1,4 @@

1 +

use base64;

1 2

use fancy_regex::Regex;

2 3

use mlua::prelude::*;

3 4

use rustc_hash::FxHashMap as HashMap;

@@ -6,7 +7,6 @@ use std::fs::File;

6 7

use std::io::{BufRead, BufReader};

7 8

use std::sync::{Arc, Mutex};

8 9

use std::thread;

9 -

use base64;

10 10 11 11

#[cfg(feature = "multithreading")]

12 12

const MAX_NUM_THREADS: usize = 128;

@@ -203,7 +203,7 @@ pub fn tiktoken_core(lua: &mlua::Lua) -> LuaResult<LuaTable> {

203 203

Ok(())

204 204

},

205 205

)?;

206 -

let _encode = lua.create_function(move |_, text: String| encode(&*state2, text))?;

206 +

let _encode = lua.create_function(move |_, text: mlua::String| encode(&*state2, text))?;

207 207 208 208

let exports = lua.create_table()?;

209 209

exports.set("new", _new)?;

@@ -261,7 +261,8 @@ fn new(

261 261

});

262 262

}

263 263 264 -

fn encode(state: &State, text: String) -> LuaResult<(Vec<usize>, usize, usize)> {

264 +

fn encode(state: &State, text: mlua::String) -> LuaResult<(Vec<usize>, usize, usize)> {

265 +

let encoded_str = String::from_utf8_lossy(text.as_bytes());

265 266

let allowed_special = HashSet::new();

266 267

let max_tokens = None;

267 268

Ok(state

@@ -270,7 +271,7 @@ fn encode(state: &State, text: String) -> LuaResult<(Vec<usize>, usize, usize)>

270 271

.unwrap()

271 272

.as_ref()

272 273

.unwrap()

273 -

._encode_native(&text, &allowed_special, max_tokens))

274 +

._encode_native(&encoded_str, &allowed_special, max_tokens))

274 275

}

275 276 276 277

pub struct CoreBPENative {

Original file line number Diff line number Diff line change

@@ -1,9 +1,9 @@

1 1

package = "tiktoken_core"

2 -

version = "0.2.2-1"

2 +

version = "0.2.3-1"

3 3 4 4

source = {

5 5

url = "git+https://github.com/gptlang/lua-tiktoken",

6 -

tag = "v0.2.2",

6 +

tag = "v0.2.3",

7 7

}

8 8 9 9

description = {

You can’t perform that action at this time.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4