It seems when querying tesseract.js from multiple threads, the eng.traineddata is locked and throws errors like
Error opening data file ./eng.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language 'eng' Error opening data file ./eng.traineddata Tesseract couldn't load any languages!
To reproduce this:
const { Worker } = require('worker_threads')
function runService(workerData) {
return new Promise((resolve, reject) => {
const worker = new Worker('./databaseTests.js', { workerData });
worker.on('message', resolve);
worker.on('error', reject);
worker.on('exit', (code) => {
if (code !== 0)
reject(new Error(`Worker stopped with exit code ${code}`));
})
})
}
async function run() {
const result = await runService('world')
console.log(result);
}
(async () => {
while(true){
run().catch(err => console.error(err))
await new Promise(resolve => setTimeout(resolve, 5));
}
})();
In databaseTests I am simply creating a new worker:
const { createWorker } = require('tesseract.js');
const path = require('path');
const worker = createWorker({
langPath: path.join(__dirname, '..', 'lang-data'),
logger: m => console.log(m),
});
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(path.join(__dirname, '..', 'images', 'testocr.png'));
console.log(text);
await worker.terminate();
})();
Is there any way to reuse the worker or to skip checking the trained data file?
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4