OcrPageCharacters Property

Summary

The recognized OCR characters object associated with this page.

Syntax

C++/CLI

Python

public IOcrPageCharacters OcrPageCharacters { get; }

public:  
   property IOcrPageCharacters^ OcrPageCharacters 
   { 
      IOcrPageCharacters^ get() 
   }

OcrPageCharacters # get  (DocumentPageText)

Property Value

The recognized OCR characters object associated with this page if DocumentText.StoreOcrPageCharacters was true; otherwise, null. The default value is null.

Remarks

For more information, refer to DocumentText.StoreOcrPageCharacters and Parsing Text with the Document Library.

Example

Java

using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Barcode; 
using Leadtools.Document.Converter; 
 
public void StoreOcrPageCharactersExample() 
{ 
   // Initialize an OCR engine 
   IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD); 
   // Start the engine using default parameters 
   ocrEngine.Startup(null, null, null, @"C:\LEADTOOLS22\Bin\Common\OcrLEADRuntime"); 
 
   // Load a document that we know will be OCRed 
   string documentFile = @"C:\LEADTOOLS22\Resources\Images\ocr1.tif"; 
   using (LEADDocument document = DocumentFactory.LoadFromFile(documentFile, new LoadDocumentOptions())) 
   { 
      // Set the OCR engine 
      document.Text.OcrEngine = ocrEngine; 
      // First, do not store IOcrPageCharacters in DocumentPageText, this will 
      // cause our code to extract the character OCR confidences value to not work 
      document.Text.StoreOcrPageCharacters = false; 
      Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to false. Should fail"); 
 
      // Now get the text 
      DocumentPage documentPage = document.Pages[0]; 
      DocumentPageText documentPageText = documentPage.GetText(); 
      // Build the words 
      documentPageText.BuildWords(); 
      ShowOcrConfidence(documentPageText); 
 
      Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to true. Should work"); 
      document.Text.StoreOcrPageCharacters = true; 
      documentPageText = documentPage.GetText(); 
      // Build the words 
      documentPageText.BuildWords(); 
      ShowOcrConfidence(documentPageText); 
   } 
   ocrEngine.Dispose(); 
} 
 
private static void ShowOcrConfidence(DocumentPageText documentPageText) 
{ 
   Console.WriteLine("Showing OCR confidence value of the characters of the first word"); 
   if (documentPageText.Words == null) 
   { 
      Console.WriteLine("Nothing to show"); 
      return; 
   } 
 
   // Show the OCR character confidence for the first word 
   // Get the first word 
   DocumentWord word = documentPageText.Words[0]; 
   Console.WriteLine($"first word value:{word.Value}"); 
   // Get its characters 
   for (int characterIndex = word.FirstCharacterIndex; characterIndex <= word.LastCharacterIndex; characterIndex++) 
   { 
      // DocumentCharacter reference 
      DocumentCharacter documentCharacter = documentPageText.Characters[characterIndex]; 
      Console.WriteLine($" character at index {characterIndex} is {documentCharacter.Code} its isFromOcr value is {documentCharacter.IsFromOcr} and its bounds are {documentCharacter.Bounds}"); 
      Console.WriteLine($"Is end of line: {documentCharacter.IsEndOfLine}"); 
      Console.WriteLine($"Is end of word: {documentCharacter.IsEndOfWord}"); 
      Console.WriteLine($"Is right to left: {documentCharacter.IsRightToLeft}"); 
      // Ensure this is a character obtained from OCR 
      if (documentCharacter.IsFromOcr) 
      { 
         // See if we stored the IOcrPageCharacters in  
         if (documentPageText.OcrPageCharacters != null) 
         { 
            // We have it, get the corresponding OcrCharcater 
            // Get the zone characters 
            IOcrZoneCharacters ocrZoneCharacters = documentPageText.OcrPageCharacters[documentCharacter.OcrZoneIndex]; 
            // And the character in this zone 
            OcrCharacter ocrCharacter = ocrZoneCharacters[documentCharacter.OcrCharacterIndex]; 
            int confidence = ocrCharacter.Confidence; 
            Console.WriteLine($"   OCR character code is {ocrCharacter.Code} and confidence is {confidence}"); 
            // Sanity check 
            Debug.Assert(ocrCharacter.Code == documentCharacter.Code); 
         } 
         else 
         { 
            Console.WriteLine("   Failed to get OCR confidence"); 
         } 
      } 
   } 
}

 
import java.io.File; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.net.MalformedURLException; 
import java.net.URI; 
import java.net.URISyntaxException; 
import java.net.URL; 
import java.nio.file.Files; 
import java.nio.file.Paths; 
import java.util.ArrayList; 
import java.util.Calendar; 
import java.util.List; 
import java.util.concurrent.Callable; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.Future; 
import java.util.regex.Pattern; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
import static org.junit.Assert.*; 
 
import leadtools.*; 
import leadtools.annotations.engine.*; 
import leadtools.barcode.*; 
import leadtools.caching.*; 
import leadtools.codecs.*; 
import leadtools.document.*; 
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler; 
import leadtools.document.converter.*; 
import leadtools.document.writer.*; 
import leadtools.ocr.*; 
 
 
public void storeOcrPageCharactersExample() { 
   // Initialize an OCR engine 
   OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD); 
   // Start the engine using default parameters 
   ocrEngine.startup(null, null, null, "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime"); 
 
   // Load a document that we know will be OCRed 
   String documentFile = "C:\\LEADTOOLS23\\Resources\\Images\\ocr1.tif"; 
   LEADDocument document = DocumentFactory.loadFromFile(documentFile, new LoadDocumentOptions()); 
 
   // Set the OCR engine 
   document.getText().setOcrEngine(ocrEngine); 
   // First, do not store IOcrPageCharacters in DocumentPageText, this will 
   // cause our code to extract the character OCR confidences value to not work 
   document.getText().setStoreOcrPageCharacters(false); 
   System.out 
         .println("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to false. Should fail"); 
 
   // Now get the text 
   DocumentPage documentPage = document.getPages().get(0); 
   DocumentPageText documentPageText = documentPage.getText(); 
   // Build the words 
   documentPageText.buildWords(); 
   showOcrConfidence(documentPageText); 
 
   System.out.println("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to true. Should work"); 
   document.getText().setStoreOcrPageCharacters(true); 
   documentPageText = documentPage.getText(); 
   // Build the words 
   documentPageText.buildWords(); 
   showOcrConfidence(documentPageText); 
 
   ocrEngine.dispose(); 
} 
 
private void showOcrConfidence(DocumentPageText documentPageText) { 
 
   System.out.println("Showing OCR confidence value of the characters of the first word"); 
   if (documentPageText.getWords() == null) { 
      System.out.println("Nothing to show"); 
      return; 
   } 
 
   // Show the OCR character confidence for the first word 
   // Get the first word 
   DocumentWord word = documentPageText.getWords().get(0); 
   System.out.println("first word value:" + word.getValue()); 
   // Get its characters 
   for (int characterIndex = word.getFirstCharacterIndex(); characterIndex <= word 
         .getLastCharacterIndex(); characterIndex++) { 
      // DocumentCharacter reference 
      DocumentCharacter documentCharacter = documentPageText.getCharacters().get(characterIndex); 
      System.out.println(" character at index " + characterIndex + " is " + documentCharacter.getCode() 
            + " its isFromOcr value is " + documentCharacter.isFromOcr() + " and its bounds are " 
            + documentCharacter.getBounds()); 
      System.out.println("Is end of line: " + documentCharacter.isEndOfLine()); 
      System.out.println("Is end of word: " + documentCharacter.isEndOfWord()); 
      System.out.println("Is right to left: " + documentCharacter.isRightToLeft()); 
      // Ensure this is a character obtained from OCR 
      if (documentCharacter.isFromOcr()) { 
         // See if we stored the IOcrPageCharacters in 
         if (documentPageText.getOcrPageCharacters() != null) { 
            // We have it, get the corresponding OcrCharcater 
            // Get the zone characters 
            OcrZoneCharacters ocrZoneCharacters = documentPageText.getOcrPageCharacters() 
                  .get(documentCharacter.getOcrZoneIndex()); 
            // And the character in this zone 
            OcrCharacter ocrCharacter = ocrZoneCharacters.get(documentCharacter.getOcrCharacterIndex()); 
            int confidence = ocrCharacter.getConfidence(); 
            System.out.println( 
                  "   OCR character code is " + ocrCharacter.getCode() + " and confidence is " + confidence); 
            // Sanity check 
            assertTrue(ocrCharacter.getCode() == documentCharacter.getCode()); 
         } else { 
            System.out.println("   Failed to get OCR confidence"); 
         } 
      } 
   } 
}

Requirements

Target Platforms

Reference

DocumentPageText Class

DocumentPageText Members

Leadtools.Document Namespace

Download our FREE evaluation

Help Version 23.0.2024.2.29

Leadtools.Document Assembly

Introduction

Getting Started

Namespaces

Leadtools.Document Namespace

Assemblies