Mode to use when extracting text from this document.
[SerializableAttribute()]
[DataContractAttribute()]
public enum DocumentTextExtractionMode
public [SerializableAttribute,
DataContractAttribute]
enum class DocumentTextExtractionMode sealed
public enum DocumentTextExtractionMode
class DocumentTextExtractionMode(Enum):
Auto = 0
SvgOnly = 1
OcrOnly = 2
0 |
Auto |
Automatic. This depends on the document type, if it supports SVG, then SVG is used; otherwise, if available use OCR.
|
1 |
SvgOnly |
Use SVG only if the document supports it. Otherwise, do not extract text.
|
2 |
OcrOnly |
Use OCR always even if the document supports SVG.
|
DocumentTextExtractionMode is used as the type of the DocumentText.TextExtractionMode to control how DocumentPage.GetText extracts the text from the page.
For more information, refer to Parsing Text with the Document Library.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Barcode;
using Leadtools.Document.Converter;
public void DocumentPageTextExample()
{
var options = new LoadDocumentOptions();
using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.doc"), options))
{
// Get page text
var page = document.Pages[0];
// Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference)
DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode));
foreach (var modes in textExtractionModes)
{
Console.WriteLine($"Text extraction mode: {modes}");
}
// Text extraction mode. Auto is default
document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto;
// DocumentPageText reference
var pageText = page.GetText();
if (pageText != null)
{
pageText.BuildText();
var characters = pageText.Characters;
var text = pageText.Text;
Console.WriteLine(text);
Console.WriteLine($"Total number of characters: {characters.Count}");
pageText.BuildWords();
Console.WriteLine($"Total number of words: {pageText.Words.Count}");
// Get each word
foreach (DocumentWord word in pageText.Words)
{
Console.WriteLine($"Bounds: {word.Bounds} | First character index: {word.FirstCharacterIndex} " +
$"| Last character index: {word.LastCharacterIndex} | Value: {word.Value}");
}
}
else
{
Console.WriteLine("Failed!");
}
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images";
}
Help Collections
Raster .NET | C API | C++ Class Library | HTML5 JavaScript
Document .NET | C API | C++ Class Library | HTML5 JavaScript
Medical .NET | C API | C++ Class Library | HTML5 JavaScript
Medical Web Viewer .NET
Multimedia
Direct Show .NET | C API | Filters
Media Foundation .NET | C API | Transforms
Supported Platforms
.NET, Java, Android, and iOS/macOS Assemblies
Imaging, Medical, and Document
C API/C++ Class Libraries
Imaging, Medical, and Document
HTML5 JavaScript Libraries
Imaging, Medical, and Document