Optional OCR engine instance to use when extracting text.
public IOcrEngine OcrEngine { get; set; }
public:
property IOcrEngine^ OcrEngine
{
IOcrEngine^ get()
void set(IOcrEngine^ value)
}
public OcrEngine getOcrEngine()
public void setOcrEngine(OcrEngine value)
OcrEngine # get and set (DocumentText)
The optional OCR engine instance to use when extracting text. Default value is null.
This property is not used if the value of TextExtractionMode is DocumentTextExtractionMode.SvgOnly.
The text of the document can be extracted using OCR technologies, in this mode, a valid IOcrEngine instance must be initialized (created and started) and set in the OcrEngine property before DocumentPage.GetText is called.
OcrEngine can be setup with any extra options from outside (such as enable languages, spell checker, accuracy tradeoffs, etc.). The Document class will use the engine as is.
The Document object will not delete the value of OcrEngine when it is disposed. IOcrEngine is thread-safe by nature and the same instance can be set into multiple Document instances and used at the same time.
For more information, refer to Parsing Text with the Document Library.
Note that setting this property to a value will update the same value in each child document.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Barcode;
using Leadtools.Document.Converter;
public void DocumentTextExample()
{
var options = new LoadDocumentOptions();
using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "slice.tif"), options))
{
//for the TIF file we need an OCR engine
var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD);
var rasterCodecs = new RasterCodecs();
var documentWriter = new DocumentWriter();
ocrEngine.Startup(rasterCodecs, documentWriter, null, LEAD_VARS.OcrLEADRuntimeDir);
// DocumentText reference
document.Text.OcrEngine = ocrEngine;
// Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference)
DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode));
foreach (var modes in textExtractionModes)
{
Console.WriteLine($"Text extraction mode: {modes}");
}
// get text
var page = document.Pages[0];
var pageText = page.GetText();
if (pageText != null)
{
pageText.BuildText();
var text = pageText.Text;
Console.WriteLine(text);
}
else
{
Console.WriteLine("Failed!");
}
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";
public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime";
}
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import org.junit.*;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
import org.junit.runner.notification.Failure;
import static org.junit.Assert.*;
import leadtools.*;
import leadtools.annotations.engine.*;
import leadtools.barcode.*;
import leadtools.caching.*;
import leadtools.codecs.*;
import leadtools.document.*;
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler;
import leadtools.document.converter.*;
import leadtools.document.writer.*;
import leadtools.ocr.*;
public void documentTextExample() {
final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images";
final String OCR_LEAD_RUNTIME_DIR = "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime";
LoadDocumentOptions options = new LoadDocumentOptions();
LEADDocument document = DocumentFactory.loadFromFile(combine(LEAD_VARS_IMAGES_DIR, "slice.tif"), options);
// for the TIF file we need an OCR engine
OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD);
RasterCodecs rasterCodecs = new RasterCodecs();
DocumentWriter documentWriter = new DocumentWriter();
ocrEngine.startup(rasterCodecs, documentWriter, null, OCR_LEAD_RUNTIME_DIR);
// DocumentText reference
document.getText().setOcrEngine(ocrEngine);
// Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode
// reference)
DocumentTextExtractionMode[] textExtractionModes = DocumentTextExtractionMode.values();
for (DocumentTextExtractionMode modes : textExtractionModes) {
System.out.println("Text extraction mode: " + modes);
}
// get text
DocumentPage page = document.getPages().get(0);
DocumentPageText pageText = page.getText();
if (pageText != null) {
pageText.buildText();
String text = pageText.getText();
System.out.println(text);
} else {
System.out.println("Failed!");
}
assertTrue(pageText != null);
}
Help Collections
Raster .NET | C API | C++ Class Library | HTML5 JavaScript
Document .NET | C API | C++ Class Library | HTML5 JavaScript
Medical .NET | C API | C++ Class Library | HTML5 JavaScript
Medical Web Viewer .NET
Multimedia
Direct Show .NET | C API | Filters
Media Foundation .NET | C API | Transforms
Supported Platforms
.NET, Java, Android, and iOS/macOS Assemblies
Imaging, Medical, and Document
C API/C++ Class Libraries
Imaging, Medical, and Document
HTML5 JavaScript Libraries
Imaging, Medical, and Document