A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/gptlang/lua-tiktoken/commit/e3262ddc47f45358f25136bf906b02b763f43dc8 below:

update mlua-rs, fix warnings, remove dead code · gptlang/lua-tiktoken@e3262dd · GitHub

File tree Expand file treeCollapse file tree 4 files changed

+160

-30

lines changed

Filter options

Expand file treeCollapse file tree 4 files changed

+160

-30

lines changed Original file line number Diff line number Diff line change

@@ -9,7 +9,7 @@ name = "tiktoken_core"

9 9

crate-type = ["cdylib"]

10 10 11 11

[dependencies]

12 -

mlua = { version = "0.9.7", features = ["serialize", "module"] }

12 +

mlua = { version = "0.10.5", features = ["serialize", "module"] }

13 13

# tiktoken dependencies

14 14

fancy-regex = "0.11.0"

15 15

regex = "1.8.3"

Original file line number Diff line number Diff line change

@@ -1,4 +1,4 @@

1 -

use base64;

1 +

use base64::{prelude::BASE64_STANDARD, Engine as _};

2 2

use fancy_regex::Regex;

3 3

use mlua::prelude::*;

4 4

use rustc_hash::FxHashMap as HashMap;

@@ -8,10 +8,6 @@ use std::io::{BufRead, BufReader};

8 8

use std::sync::{Arc, Mutex};

9 9

use std::thread;

10 10 11 -

#[cfg(feature = "multithreading")]

12 -

const MAX_NUM_THREADS: usize = 128;

13 - 14 -

#[cfg(not(feature = "multithreading"))]

15 11

const MAX_NUM_THREADS: usize = 1;

16 12 17 13

fn _byte_pair_merge<T>(

@@ -224,7 +220,9 @@ fn new(

224 220

for line in reader.lines() {

225 221

let line = line.unwrap();

226 222

let mut parts = line.split_whitespace();

227 -

let token = base64::decode(parts.next().unwrap().as_bytes()).unwrap();

223 +

let token = BASE64_STANDARD

224 +

.decode(parts.next().unwrap().as_bytes())

225 +

.unwrap();

228 226

let rank = parts.next().unwrap().parse().unwrap();

229 227

encoder.insert(token, rank);

230 228

}

@@ -262,7 +260,8 @@ fn new(

262 260

}

263 261 264 262

fn encode(state: &State, text: mlua::String) -> LuaResult<(Vec<usize>, usize, usize)> {

265 -

let encoded_str = String::from_utf8_lossy(text.as_bytes());

263 +

let text_bytes = text.as_bytes();

264 +

let encoded_str = String::from_utf8_lossy(&text_bytes);

266 265

let allowed_special = HashSet::new();

267 266

let max_tokens = None;

268 267

Ok(state

@@ -592,13 +591,6 @@ impl CoreBPENative {

592 591

Err(piece.to_owned())

593 592

}

594 593 595 -

fn encode_single_piece(&self, piece: &[u8]) -> Vec<usize> {

596 -

if let Some(token) = self.encoder.get(piece) {

597 -

return vec![*token];

598 -

}

599 -

byte_pair_encode(piece, &self.encoder)

600 -

}

601 - 602 594

// ====================

603 595

// Decoding

604 596

// ====================

Original file line number Diff line number Diff line change

@@ -1,9 +1,9 @@

1 1

package = "tiktoken_core"

2 -

version = "0.2.3-1"

2 +

version = "0.2.4-1"

3 3 4 4

source = {

5 5

url = "git+https://github.com/gptlang/lua-tiktoken",

6 -

tag = "v0.2.3",

6 +

tag = "v0.2.4",

7 7

}

8 8 9 9

description = {

You can’t perform that action at this time.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4