Indicates whether to store the recognized OCR object used to obtain the text in this DocumentPageText.
public bool StoreOcrPageCharacters {get; set;}
Public Property StoreOcrPageCharacters() As Boolean
Get
Set
public:
property bool StoreOcrPageCharacters
{
bool get()
void set(bool value)
}
true to store the recognized OCR object used to obtain the text in this DocumentPageText; otherwise, false. The default value is false.
When DocumentPage.GetText is called, the engine tries to parse the text of the page using either the SVG or OCR engines.
When the OCR engine is used, the engine creates a valid IOcrPage object and calls Recognize. It then parses the resulting OCR characters(stored in an IOcrPageCharacters object), and converts them to DocumentCharacters and adds them to the page DocumentPageText.Characters collection. Each resulting DocumentCharacter object will contain DocumentCharacter.IsFromOcr set to true (since it was obtained through OCR), and the values of DocumentCharacter.OcrZoneIndex and DocumentCharacter.OcrCharacterIndex set the zone and character index into the IOcrPageCharacters object used to obtain it.
This conversion will obtain some of the values stored in an OcrCharacter object such as the character code and location. Other information obtained by the OCR engine such as the color, confidence, and baseline are not converted and will be lost.
When the value of StoreOcrPageCharacters is set to false (the default value), then the IOcrPageCharacters object used to create these document characters is created internally and not saved and the information is lost.
When the value of StoreOcrPageCharacters is set to true, then the IOcrPageCharacters object used to create these document characters will be stored in the DocumentPageText.OcrPageCharacters property and can be used to obtain all the other OCR information of the character.
The example below shows how to use StoreOcrPageCharacters to obtain the OCR confidence value for each document character.
For more information on text parsing refer Parsing Text with the Document Library.
This example will get the text from a document page using OCR and then use StoreOcrPageCharacters and DocumentPageText.OcrPageCharacters to find the OCR character confidences.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Barcode;
using Leadtools.Document.Converter;
public void StoreOcrPageCharactersExample()
{
// Initialize an OCR engine
IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD);
// Start the engine using default parameters
ocrEngine.Startup(null, null, null, @"C:\LEADTOOLS21\Bin\Common\OcrLEADRuntime");
// Load a document that we know will be OCRed
string documentFile = @"C:\LEADTOOLS21\Resources\Images\ocr1.tif";
using (LEADDocument document = DocumentFactory.LoadFromFile(documentFile, new LoadDocumentOptions()))
{
// Set the OCR engine
document.Text.OcrEngine = ocrEngine;
// First, do not store IOcrPageCharacters in DocumentPageText, this will
// cause our code to extract the character OCR confidences value to not work
document.Text.StoreOcrPageCharacters = false;
Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to false. Should fail");
// Now get the text
DocumentPage documentPage = document.Pages[0];
DocumentPageText documentPageText = documentPage.GetText();
// Build the words
documentPageText.BuildWords();
ShowOcrConfidence(documentPageText);
Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to true. Should work");
document.Text.StoreOcrPageCharacters = true;
documentPageText = documentPage.GetText();
// Build the words
documentPageText.BuildWords();
ShowOcrConfidence(documentPageText);
}
ocrEngine.Dispose();
}
private static void ShowOcrConfidence(DocumentPageText documentPageText)
{
Console.WriteLine("Showing OCR confidence value of the characters of the first word");
if (documentPageText.Words == null)
{
Console.WriteLine("Nothing to show");
return;
}
// Show the OCR character confidence for the first word
// Get the first word
DocumentWord word = documentPageText.Words[0];
Console.WriteLine($"first word value:{word.Value}");
// Get its characters
for (int characterIndex = word.FirstCharacterIndex; characterIndex <= word.LastCharacterIndex; characterIndex++)
{
DocumentCharacter documentCharacter = documentPageText.Characters[characterIndex];
Console.WriteLine($" character at index {characterIndex} is {documentCharacter.Code} and its isFromOcr value is {documentCharacter.IsFromOcr}");
// Ensure this is a character obtained from OCR
if (documentCharacter.IsFromOcr)
{
// See if we stored the IOcrPageCharacters in
if (documentPageText.OcrPageCharacters != null)
{
// We have it, get the corresponding OcrCharcater
// Get the zone characters
IOcrZoneCharacters ocrZoneCharacters = documentPageText.OcrPageCharacters[documentCharacter.OcrZoneIndex];
// And the character in this zone
OcrCharacter ocrCharacter = ocrZoneCharacters[documentCharacter.OcrCharacterIndex];
int confidence = ocrCharacter.Confidence;
Console.WriteLine($" OCR character code is {ocrCharacter.Code} and confidence is {confidence}");
// Sanity check
Debug.Assert(ocrCharacter.Code == documentCharacter.Code);
}
else
{
Console.WriteLine(" Failed to get OCR confidence");
}
}
}
}
Imports Leadtools
Imports Leadtools.Codecs
Imports Leadtools.Document.Writer
Imports Leadtools.Svg
Imports Leadtools.Document
Imports Leadtools.Caching
Imports Leadtools.Annotations.Engine
Imports Leadtools.Barcode
Imports Leadtools.Ocr
Imports LeadtoolsDocumentExamples.LeadtoolsExamples.Common
Imports Leadtools.Document.Converter
Public Shared Sub StoreOcrPageCharactersExample()
' Initialize an OCR engine
Dim ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD)
' Start the engine using default parameters
ocrEngine.Startup(Nothing, Nothing, Nothing, "C:\LEADTOOLS21\Bin\Common\OcrLEADRuntime")
' Load a document that we know will be OCRed
Dim documentFile As String = "C:\LEADTOOLS21\Resources\Images\ocr1.tif"
Using document As LEADDocument = DocumentFactory.LoadFromFile(documentFile, New LoadDocumentOptions())
' Set the OCR engine
document.Text.OcrEngine = ocrEngine
' First, do not store IOcrPageCharacters in DocumentPageText, this will
' cause our code to extract the character OCR confidences value to not work
document.Text.StoreOcrPageCharacters = False
Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to false. Should fail")
' Now get the text
Dim documentPage As DocumentPage = document.Pages(0)
Dim documentPageText As DocumentPageText = documentPage.GetText()
' Build the words
documentPageText.BuildWords()
ShowOcrConfidence(documentPageText)
Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to true. Should work")
document.Text.StoreOcrPageCharacters = True
documentPageText = documentPage.GetText()
' Build the words
documentPageText.BuildWords()
ShowOcrConfidence(documentPageText)
End Using
ocrEngine.Dispose()
End Sub
Private Shared Sub ShowOcrConfidence(documentPageText As DocumentPageText)
Console.WriteLine("Showing OCR confidence value of the characters of the first word")
If IsNothing(documentPageText.Words) Then
Console.WriteLine("Nothing to show")
Return
End If
' Show the OCR character confidence for the first word
' Get the first word
Dim word As DocumentWord = documentPageText.Words(0)
Console.WriteLine($"first word value:{word.Value}")
' Get its characters
For characterIndex As Integer = word.FirstCharacterIndex To word.LastCharacterIndex
Dim DocumentCharacter As DocumentCharacter = documentPageText.Characters(characterIndex)
Console.WriteLine($" character at index {characterIndex} is {DocumentCharacter.Code} and its isFromOcr value is {DocumentCharacter.IsFromOcr}")
' Ensure this is a character obtained from OCR
If DocumentCharacter.IsFromOcr Then
' See if we stored the IOcrPageCharacters in
If Not IsNothing(documentPageText.OcrPageCharacters) Then
' We have it, get the corresponding OcrCharcater
' Get the zone characters
Dim ocrZoneCharacters As IOcrZoneCharacters = documentPageText.OcrPageCharacters(DocumentCharacter.OcrZoneIndex)
' And the character in this zone
Dim OcrCharacter As OcrCharacter = ocrZoneCharacters(DocumentCharacter.OcrCharacterIndex)
Dim confidence As Integer = OcrCharacter.Confidence
Console.WriteLine($" OCR character code is {OcrCharacter.Code} and confidence is {confidence}")
' Sanity check
Debug.Assert(OcrCharacter.Code = DocumentCharacter.Code)
Else
Console.WriteLine(" Failed to get OCR confidence")
End If
End If
Next
End Sub
Help Collections
Raster .NET | C API | C++ Class Library | HTML5 JavaScript
Document .NET | C API | C++ Class Library | HTML5 JavaScript
Medical .NET | C API | C++ Class Library | HTML5 JavaScript
Medical Web Viewer .NET
Multimedia
Direct Show .NET | C API | Filters
Media Foundation .NET | C API | Transforms
Supported Platforms
.NET, Java, Android, and iOS/macOS Assemblies
Imaging, Medical, and Document
C API/C++ Class Libraries
Imaging, Medical, and Document
HTML5 JavaScript Libraries
Imaging, Medical, and Document