Contains the properties of a text character found in the page.
[SerializableAttribute()]
[DataContractAttribute()]
public struct DocumentCharacter
public [SerializableAttribute,
DataContractAttribute]
value class DocumentCharacter sealed
public class DocumentCharacter implements Serializable
class DocumentCharacter:
The DocumentCharacter structure is used as the type for the Characters list.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Barcode;
using Leadtools.Document.Converter;
public void StoreOcrPageCharactersExample()
{
// Initialize an OCR engine
IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD);
// Start the engine using default parameters
ocrEngine.Startup(null, null, null, @"C:\LEADTOOLS22\Bin\Common\OcrLEADRuntime");
// Load a document that we know will be OCRed
string documentFile = @"C:\LEADTOOLS22\Resources\Images\ocr1.tif";
using (LEADDocument document = DocumentFactory.LoadFromFile(documentFile, new LoadDocumentOptions()))
{
// Set the OCR engine
document.Text.OcrEngine = ocrEngine;
// First, do not store IOcrPageCharacters in DocumentPageText, this will
// cause our code to extract the character OCR confidences value to not work
document.Text.StoreOcrPageCharacters = false;
Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to false. Should fail");
// Now get the text
DocumentPage documentPage = document.Pages[0];
DocumentPageText documentPageText = documentPage.GetText();
// Build the words
documentPageText.BuildWords();
ShowOcrConfidence(documentPageText);
Console.WriteLine("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to true. Should work");
document.Text.StoreOcrPageCharacters = true;
documentPageText = documentPage.GetText();
// Build the words
documentPageText.BuildWords();
ShowOcrConfidence(documentPageText);
}
ocrEngine.Dispose();
}
private static void ShowOcrConfidence(DocumentPageText documentPageText)
{
Console.WriteLine("Showing OCR confidence value of the characters of the first word");
if (documentPageText.Words == null)
{
Console.WriteLine("Nothing to show");
return;
}
// Show the OCR character confidence for the first word
// Get the first word
DocumentWord word = documentPageText.Words[0];
Console.WriteLine($"first word value:{word.Value}");
// Get its characters
for (int characterIndex = word.FirstCharacterIndex; characterIndex <= word.LastCharacterIndex; characterIndex++)
{
// DocumentCharacter reference
DocumentCharacter documentCharacter = documentPageText.Characters[characterIndex];
Console.WriteLine($" character at index {characterIndex} is {documentCharacter.Code} its isFromOcr value is {documentCharacter.IsFromOcr} and its bounds are {documentCharacter.Bounds}");
Console.WriteLine($"Is end of line: {documentCharacter.IsEndOfLine}");
Console.WriteLine($"Is end of word: {documentCharacter.IsEndOfWord}");
Console.WriteLine($"Is right to left: {documentCharacter.IsRightToLeft}");
// Ensure this is a character obtained from OCR
if (documentCharacter.IsFromOcr)
{
// See if we stored the IOcrPageCharacters in
if (documentPageText.OcrPageCharacters != null)
{
// We have it, get the corresponding OcrCharcater
// Get the zone characters
IOcrZoneCharacters ocrZoneCharacters = documentPageText.OcrPageCharacters[documentCharacter.OcrZoneIndex];
// And the character in this zone
OcrCharacter ocrCharacter = ocrZoneCharacters[documentCharacter.OcrCharacterIndex];
int confidence = ocrCharacter.Confidence;
Console.WriteLine($" OCR character code is {ocrCharacter.Code} and confidence is {confidence}");
// Sanity check
Debug.Assert(ocrCharacter.Code == documentCharacter.Code);
}
else
{
Console.WriteLine(" Failed to get OCR confidence");
}
}
}
}
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import org.junit.*;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
import org.junit.runner.notification.Failure;
import static org.junit.Assert.*;
import leadtools.*;
import leadtools.annotations.engine.*;
import leadtools.barcode.*;
import leadtools.caching.*;
import leadtools.codecs.*;
import leadtools.document.*;
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler;
import leadtools.document.converter.*;
import leadtools.document.writer.*;
import leadtools.ocr.*;
public void storeOcrPageCharactersExample() {
// Initialize an OCR engine
OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD);
// Start the engine using default parameters
ocrEngine.startup(null, null, null, "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime");
// Load a document that we know will be OCRed
String documentFile = "C:\\LEADTOOLS23\\Resources\\Images\\ocr1.tif";
LEADDocument document = DocumentFactory.loadFromFile(documentFile, new LoadDocumentOptions());
// Set the OCR engine
document.getText().setOcrEngine(ocrEngine);
// First, do not store IOcrPageCharacters in DocumentPageText, this will
// cause our code to extract the character OCR confidences value to not work
document.getText().setStoreOcrPageCharacters(false);
System.out
.println("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to false. Should fail");
// Now get the text
DocumentPage documentPage = document.getPages().get(0);
DocumentPageText documentPageText = documentPage.getText();
// Build the words
documentPageText.buildWords();
showOcrConfidence(documentPageText);
System.out.println("Obtaining OCR characters confidence with StoreOcrPageCharacters equals to true. Should work");
document.getText().setStoreOcrPageCharacters(true);
documentPageText = documentPage.getText();
// Build the words
documentPageText.buildWords();
showOcrConfidence(documentPageText);
ocrEngine.dispose();
}
private void showOcrConfidence(DocumentPageText documentPageText) {
System.out.println("Showing OCR confidence value of the characters of the first word");
if (documentPageText.getWords() == null) {
System.out.println("Nothing to show");
return;
}
// Show the OCR character confidence for the first word
// Get the first word
DocumentWord word = documentPageText.getWords().get(0);
System.out.println("first word value:" + word.getValue());
// Get its characters
for (int characterIndex = word.getFirstCharacterIndex(); characterIndex <= word
.getLastCharacterIndex(); characterIndex++) {
// DocumentCharacter reference
DocumentCharacter documentCharacter = documentPageText.getCharacters().get(characterIndex);
System.out.println(" character at index " + characterIndex + " is " + documentCharacter.getCode()
+ " its isFromOcr value is " + documentCharacter.isFromOcr() + " and its bounds are "
+ documentCharacter.getBounds());
System.out.println("Is end of line: " + documentCharacter.isEndOfLine());
System.out.println("Is end of word: " + documentCharacter.isEndOfWord());
System.out.println("Is right to left: " + documentCharacter.isRightToLeft());
// Ensure this is a character obtained from OCR
if (documentCharacter.isFromOcr()) {
// See if we stored the IOcrPageCharacters in
if (documentPageText.getOcrPageCharacters() != null) {
// We have it, get the corresponding OcrCharcater
// Get the zone characters
OcrZoneCharacters ocrZoneCharacters = documentPageText.getOcrPageCharacters()
.get(documentCharacter.getOcrZoneIndex());
// And the character in this zone
OcrCharacter ocrCharacter = ocrZoneCharacters.get(documentCharacter.getOcrCharacterIndex());
int confidence = ocrCharacter.getConfidence();
System.out.println(
" OCR character code is " + ocrCharacter.getCode() + " and confidence is " + confidence);
// Sanity check
assertTrue(ocrCharacter.getCode() == documentCharacter.getCode());
} else {
System.out.println(" Failed to get OCR confidence");
}
}
}
}
Help Collections
Raster .NET | C API | C++ Class Library | HTML5 JavaScript
Document .NET | C API | C++ Class Library | HTML5 JavaScript
Medical .NET | C API | C++ Class Library | HTML5 JavaScript
Medical Web Viewer .NET
Multimedia
Direct Show .NET | C API | Filters
Media Foundation .NET | C API | Transforms
Supported Platforms
.NET, Java, Android, and iOS/macOS Assemblies
Imaging, Medical, and Document
C API/C++ Class Libraries
Imaging, Medical, and Document
HTML5 JavaScript Libraries
Imaging, Medical, and Document