+160
-30
lines changedFilter options
+160
-30
lines changed Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@ name = "tiktoken_core"
9
9
crate-type = ["cdylib"]
10
10
11
11
[dependencies]
12
-
mlua = { version = "0.9.7", features = ["serialize", "module"] }
12
+
mlua = { version = "0.10.5", features = ["serialize", "module"] }
13
13
# tiktoken dependencies
14
14
fancy-regex = "0.11.0"
15
15
regex = "1.8.3"
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1
-
use base64;
1
+
use base64::{prelude::BASE64_STANDARD, Engine as _};
2
2
use fancy_regex::Regex;
3
3
use mlua::prelude::*;
4
4
use rustc_hash::FxHashMap as HashMap;
@@ -8,10 +8,6 @@ use std::io::{BufRead, BufReader};
8
8
use std::sync::{Arc, Mutex};
9
9
use std::thread;
10
10
11
-
#[cfg(feature = "multithreading")]
12
-
const MAX_NUM_THREADS: usize = 128;
13
-
14
-
#[cfg(not(feature = "multithreading"))]
15
11
const MAX_NUM_THREADS: usize = 1;
16
12
17
13
fn _byte_pair_merge<T>(
@@ -224,7 +220,9 @@ fn new(
224
220
for line in reader.lines() {
225
221
let line = line.unwrap();
226
222
let mut parts = line.split_whitespace();
227
-
let token = base64::decode(parts.next().unwrap().as_bytes()).unwrap();
223
+
let token = BASE64_STANDARD
224
+
.decode(parts.next().unwrap().as_bytes())
225
+
.unwrap();
228
226
let rank = parts.next().unwrap().parse().unwrap();
229
227
encoder.insert(token, rank);
230
228
}
@@ -262,7 +260,8 @@ fn new(
262
260
}
263
261
264
262
fn encode(state: &State, text: mlua::String) -> LuaResult<(Vec<usize>, usize, usize)> {
265
-
let encoded_str = String::from_utf8_lossy(text.as_bytes());
263
+
let text_bytes = text.as_bytes();
264
+
let encoded_str = String::from_utf8_lossy(&text_bytes);
266
265
let allowed_special = HashSet::new();
267
266
let max_tokens = None;
268
267
Ok(state
@@ -592,13 +591,6 @@ impl CoreBPENative {
592
591
Err(piece.to_owned())
593
592
}
594
593
595
-
fn encode_single_piece(&self, piece: &[u8]) -> Vec<usize> {
596
-
if let Some(token) = self.encoder.get(piece) {
597
-
return vec![*token];
598
-
}
599
-
byte_pair_encode(piece, &self.encoder)
600
-
}
601
-
602
594
// ====================
603
595
// Decoding
604
596
// ====================
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
1
1
package = "tiktoken_core"
2
-
version = "0.2.3-1"
2
+
version = "0.2.4-1"
3
3
4
4
source = {
5
5
url = "git+https://github.com/gptlang/lua-tiktoken",
6
-
tag = "v0.2.3",
6
+
tag = "v0.2.4",
7
7
}
8
8
9
9
description = {
You can’t perform that action at this time.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4