OcrCharacterIndex Property

Summary

Index of this character index into the zone collection stored in DocumentPageText.OcrPageCharacters.

Syntax

C++/CLI

Python

public int OcrCharacterIndex {get; set;}

public:  
   property Int32 OcrCharacterIndex 
   { 
      Int32 get() 
      void set(Int32 value) 
   }

OcrCharacterIndex # get and set (DocumentCharacter)

Property Value

0-based index of this character index into the zone collection stored in DocumentPageText.OcrPageCharacters. This value is valid only if IsFromOcr is true. The default value is 0.

Remarks

For more information, refer to DocumentText.StoreOcrPageCharacters and Parsing Text with the Document Library.

Example

Java

using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Barcode; 
using Leadtools.Document.Converter; 
 
public void StoreOcrPageCharactersExample() 
{ 
   // Initialize an OCR engine 
   IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD); 
   // Start the engine using default parameters 
   ocrEngine.Startup(null, null, null, @"C:\LEADTOOLS22\Bin\Common\OcrLEADRuntime"); 
 
   // Load a document that we know will be OCRed 
   string documentFile = @"C:\LEADTOOLS22\Resources\Images\ocr1.tif"; 
   using (LEADDocument document = DocumentFactory.LoadFromFile(documentFile, new LoadDocumentOptions())) 
   { 
      // Set the OCR engine 
      document.Text.OcrEngine = ocrEngine; 
      // First, do not store IOcrPageCharacters in DocumentPageText, this will 
      // cause our code to extract the character OCR confidences value to not work 
      document.Text.StoreOcrPageCharacters = false; 
      Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to false. Should fail"); 
 
      // Now get the text 
      DocumentPage documentPage = document.Pages[0]; 
      DocumentPageText documentPageText = documentPage.GetText(); 
      // Build the words 
      documentPageText.BuildWords(); 
      ShowOcrConfidence(documentPageText); 
 
      Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to true. Should work"); 
      document.Text.StoreOcrPageCharacters = true; 
      documentPageText = documentPage.GetText(); 
      // Build the words 
      documentPageText.BuildWords(); 
      ShowOcrConfidence(documentPageText); 
   } 
   ocrEngine.Dispose(); 
} 
 
private static void ShowOcrConfidence(DocumentPageText documentPageText) 
{ 
   Console.WriteLine("Showing OCR confidence value of the characters of the first word"); 
   if (documentPageText.Words == null) 
   { 
      Console.WriteLine("Nothing to show"); 
      return; 
   } 
 
   // Show the OCR character confidence for the first word 
   // Get the first word 
   DocumentWord word = documentPageText.Words[0]; 
   Console.WriteLine($"first word value:{word.Value}"); 
   // Get its characters 
   for (int characterIndex = word.FirstCharacterIndex; characterIndex <= word.LastCharacterIndex; characterIndex++) 
   { 
      // DocumentCharacter reference 
      DocumentCharacter documentCharacter = documentPageText.Characters[characterIndex]; 
      Console.WriteLine($" character at index {characterIndex} is {documentCharacter.Code} its isFromOcr value is {documentCharacter.IsFromOcr} and its bounds are {documentCharacter.Bounds}"); 
      Console.WriteLine($"Is end of line: {documentCharacter.IsEndOfLine}"); 
      Console.WriteLine($"Is end of word: {documentCharacter.IsEndOfWord}"); 
      Console.WriteLine($"Is right to left: {documentCharacter.IsRightToLeft}"); 
      // Ensure this is a character obtained from OCR 
      if (documentCharacter.IsFromOcr) 
      { 
         // See if we stored the IOcrPageCharacters in  
         if (documentPageText.OcrPageCharacters != null) 
         { 
            // We have it, get the corresponding OcrCharcater 
            // Get the zone characters 
            IOcrZoneCharacters ocrZoneCharacters = documentPageText.OcrPageCharacters[documentCharacter.OcrZoneIndex]; 
            // And the character in this zone 
            OcrCharacter ocrCharacter = ocrZoneCharacters[documentCharacter.OcrCharacterIndex]; 
            int confidence = ocrCharacter.Confidence; 
            Console.WriteLine($"   OCR character code is {ocrCharacter.Code} and confidence is {confidence}"); 
            // Sanity check 
            Debug.Assert(ocrCharacter.Code == documentCharacter.Code); 
         } 
         else 
         { 
            Console.WriteLine("   Failed to get OCR confidence"); 
         } 
      } 
   } 
}

 
import java.io.File; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.net.MalformedURLException; 
import java.net.URI; 
import java.net.URISyntaxException; 
import java.net.URL; 
import java.nio.file.Files; 
import java.nio.file.Paths; 
import java.util.ArrayList; 
import java.util.Calendar; 
import java.util.List; 
import java.util.concurrent.Callable; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.Future; 
import java.util.regex.Pattern; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
import static org.junit.Assert.*; 
 
import leadtools.*; 
import leadtools.annotations.engine.*; 
import leadtools.barcode.*; 
import leadtools.caching.*; 
import leadtools.codecs.*; 
import leadtools.document.*; 
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler; 
import leadtools.document.converter.*; 
import leadtools.document.writer.*; 
import leadtools.ocr.*; 
 
 
public void storeOcrPageCharactersExample() { 
   // Initialize an OCR engine 
   OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD); 
   // Start the engine using default parameters 
   ocrEngine.startup(null, null, null, "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime"); 
 
   // Load a document that we know will be OCRed 
   String documentFile = "C:\\LEADTOOLS23\\Resources\\Images\\ocr1.tif"; 
   LEADDocument document = DocumentFactory.loadFromFile(documentFile, new LoadDocumentOptions()); 
 
   // Set the OCR engine 
   document.getText().setOcrEngine(ocrEngine); 
   // First, do not store IOcrPageCharacters in DocumentPageText, this will 
   // cause our code to extract the character OCR confidences value to not work 
   document.getText().setStoreOcrPageCharacters(false); 
   System.out 
         .println("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to false. Should fail"); 
 
   // Now get the text 
   DocumentPage documentPage = document.getPages().get(0); 
   DocumentPageText documentPageText = documentPage.getText(); 
   // Build the words 
   documentPageText.buildWords(); 
   showOcrConfidence(documentPageText); 
 
   System.out.println("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to true. Should work"); 
   document.getText().setStoreOcrPageCharacters(true); 
   documentPageText = documentPage.getText(); 
   // Build the words 
   documentPageText.buildWords(); 
   showOcrConfidence(documentPageText); 
 
   ocrEngine.dispose(); 
} 
 
private void showOcrConfidence(DocumentPageText documentPageText) { 
 
   System.out.println("Showing OCR confidence value of the characters of the first word"); 
   if (documentPageText.getWords() == null) { 
      System.out.println("Nothing to show"); 
      return; 
   } 
 
   // Show the OCR character confidence for the first word 
   // Get the first word 
   DocumentWord word = documentPageText.getWords().get(0); 
   System.out.println("first word value:" + word.getValue()); 
   // Get its characters 
   for (int characterIndex = word.getFirstCharacterIndex(); characterIndex <= word 
         .getLastCharacterIndex(); characterIndex++) { 
      // DocumentCharacter reference 
      DocumentCharacter documentCharacter = documentPageText.getCharacters().get(characterIndex); 
      System.out.println(" character at index " + characterIndex + " is " + documentCharacter.getCode() 
            + " its isFromOcr value is " + documentCharacter.isFromOcr() + " and its bounds are " 
            + documentCharacter.getBounds()); 
      System.out.println("Is end of line: " + documentCharacter.isEndOfLine()); 
      System.out.println("Is end of word: " + documentCharacter.isEndOfWord()); 
      System.out.println("Is right to left: " + documentCharacter.isRightToLeft()); 
      // Ensure this is a character obtained from OCR 
      if (documentCharacter.isFromOcr()) { 
         // See if we stored the IOcrPageCharacters in 
         if (documentPageText.getOcrPageCharacters() != null) { 
            // We have it, get the corresponding OcrCharcater 
            // Get the zone characters 
            OcrZoneCharacters ocrZoneCharacters = documentPageText.getOcrPageCharacters() 
                  .get(documentCharacter.getOcrZoneIndex()); 
            // And the character in this zone 
            OcrCharacter ocrCharacter = ocrZoneCharacters.get(documentCharacter.getOcrCharacterIndex()); 
            int confidence = ocrCharacter.getConfidence(); 
            System.out.println( 
                  "   OCR character code is " + ocrCharacter.getCode() + " and confidence is " + confidence); 
            // Sanity check 
            assertTrue(ocrCharacter.getCode() == documentCharacter.getCode()); 
         } else { 
            System.out.println("   Failed to get OCR confidence"); 
         } 
      } 
   } 
}