OcrEngine Property

Summary

Optional OCR engine instance to use when extracting text.

Syntax

C++/CLI

Java

Python

public IOcrEngine OcrEngine { get; set; }

public:  
   property IOcrEngine^ OcrEngine 
   { 
      IOcrEngine^ get() 
      void set(IOcrEngine^ value) 
   }

public OcrEngine getOcrEngine() 
public void setOcrEngine(OcrEngine value)

OcrEngine # get and set (DocumentText)

Property Value

The optional OCR engine instance to use when extracting text. Default value is null.

Remarks

This property is not used if the value of TextExtractionMode is DocumentTextExtractionMode.SvgOnly.

The text of the document can be extracted using OCR technologies, in this mode, a valid IOcrEngine instance must be initialized (created and started) and set in the OcrEngine property before DocumentPage.GetText is called.

OcrEngine can be setup with any extra options from outside (such as enable languages, spell checker, accuracy tradeoffs, etc.). The Document class will use the engine as is.

The Document object will not delete the value of OcrEngine when it is disposed. IOcrEngine is thread-safe by nature and the same instance can be set into multiple Document instances and used at the same time.

For more information, refer to Parsing Text with the Document Library.

Note that setting this property to a value will update the same value in each child document.

Example

Java

using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Barcode; 
using Leadtools.Document.Converter; 
 
public void DocumentTextExample() 
{ 
   var options = new LoadDocumentOptions(); 
   using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "slice.tif"), options)) 
   { 
      //for the TIF file we need an OCR engine 
      var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD); 
      var rasterCodecs = new RasterCodecs(); 
      var documentWriter = new DocumentWriter(); 
      ocrEngine.Startup(rasterCodecs, documentWriter, null, LEAD_VARS.OcrLEADRuntimeDir); 
 
      // DocumentText reference 
      document.Text.OcrEngine = ocrEngine; 
 
      // Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference) 
      DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode)); 
      foreach (var modes in textExtractionModes) 
      { 
         Console.WriteLine($"Text extraction mode: {modes}"); 
      } 
 
      // get text  
      var page = document.Pages[0]; 
      var pageText = page.GetText(); 
      if (pageText != null) 
      { 
         pageText.BuildText(); 
         var text = pageText.Text; 
 
         Console.WriteLine(text); 
      } 
      else 
      { 
         Console.WriteLine("Failed!"); 
      } 
   } 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images"; 
   public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime"; 
}

 
import java.io.File; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.net.MalformedURLException; 
import java.net.URI; 
import java.net.URISyntaxException; 
import java.net.URL; 
import java.nio.file.Files; 
import java.nio.file.Paths; 
import java.util.ArrayList; 
import java.util.Calendar; 
import java.util.List; 
import java.util.concurrent.Callable; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.Future; 
import java.util.regex.Pattern; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
import static org.junit.Assert.*; 
 
import leadtools.*; 
import leadtools.annotations.engine.*; 
import leadtools.barcode.*; 
import leadtools.caching.*; 
import leadtools.codecs.*; 
import leadtools.document.*; 
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler; 
import leadtools.document.converter.*; 
import leadtools.document.writer.*; 
import leadtools.ocr.*; 
 
 
public void documentTextExample() { 
   final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images"; 
   final String OCR_LEAD_RUNTIME_DIR = "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime"; 
   LoadDocumentOptions options = new LoadDocumentOptions(); 
   LEADDocument document = DocumentFactory.loadFromFile(combine(LEAD_VARS_IMAGES_DIR, "slice.tif"), options); 
   // for the TIF file we need an OCR engine 
   OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD); 
   RasterCodecs rasterCodecs = new RasterCodecs(); 
   DocumentWriter documentWriter = new DocumentWriter(); 
   ocrEngine.startup(rasterCodecs, documentWriter, null, OCR_LEAD_RUNTIME_DIR); 
 
   // DocumentText reference 
   document.getText().setOcrEngine(ocrEngine); 
 
   // Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode 
   // reference) 
   DocumentTextExtractionMode[] textExtractionModes = DocumentTextExtractionMode.values(); 
   for (DocumentTextExtractionMode modes : textExtractionModes) { 
      System.out.println("Text extraction mode: " + modes); 
   } 
 
   // get text 
   DocumentPage page = document.getPages().get(0); 
   DocumentPageText pageText = page.getText(); 
   if (pageText != null) { 
      pageText.buildText(); 
      String text = pageText.getText(); 
 
      System.out.println(text); 
   } else { 
      System.out.println("Failed!"); 
   } 
   assertTrue(pageText != null); 
}

Requirements

Target Platforms

Reference

DocumentText Class

DocumentText Members

Leadtools.Document Namespace

Download our FREE evaluation

Help Version 23.0.2024.2.29

Leadtools.Document Assembly

Introduction

Getting Started

Namespaces

Leadtools.Document Namespace

Assemblies