←Select platform

DocumentTextExtractionMode Enumeration

Summary

Mode to use when extracting text from this document.

Syntax
C#
C++/CLI
Java
Python
[SerializableAttribute()] 
[DataContractAttribute()] 
public enum DocumentTextExtractionMode 
public [SerializableAttribute,  
   DataContractAttribute] 
   enum class DocumentTextExtractionMode sealed 
public enum DocumentTextExtractionMode 
class DocumentTextExtractionMode(Enum): 
   Auto = 0 
   SvgOnly = 1 
   OcrOnly = 2 
Members

0

Auto

Automatic. This depends on the document type, if it supports SVG, then SVG is used; otherwise, if available use OCR.

1

SvgOnly

Use SVG only if the document supports it. Otherwise, do not extract text.

2

OcrOnly

Use OCR always even if the document supports SVG.

Remarks

DocumentTextExtractionMode is used as the type of the DocumentText.TextExtractionMode to control how DocumentPage.GetText extracts the text from the page.

For more information, refer to Parsing Text with the Document Library.

Example
C#
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Barcode; 
using Leadtools.Document.Converter; 
 
public void DocumentPageTextExample() 
{ 
   var options = new LoadDocumentOptions(); 
   using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.doc"), options)) 
   { 
      // Get page text  
      var page = document.Pages[0]; 
            
      // Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference) 
      DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode)); 
      foreach (var modes in textExtractionModes) 
      { 
         Console.WriteLine($"Text extraction mode: {modes}"); 
      } 
 
      // Text extraction mode. Auto is default 
      document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto; 
 
      // DocumentPageText reference 
      var pageText = page.GetText(); 
      if (pageText != null) 
      { 
         pageText.BuildText(); 
         var characters = pageText.Characters; 
         var text = pageText.Text; 
 
         Console.WriteLine(text); 
         Console.WriteLine($"Total number of characters: {characters.Count}"); 
 
         pageText.BuildWords(); 
         Console.WriteLine($"Total number of words: {pageText.Words.Count}"); 
         // Get each word 
         foreach (DocumentWord word in pageText.Words) 
         { 
            Console.WriteLine($"Bounds: {word.Bounds} | First character index: {word.FirstCharacterIndex} " + 
               $"| Last character index: {word.LastCharacterIndex} | Value: {word.Value}"); 
         } 
      } 
      else 
      { 
         Console.WriteLine("Failed!"); 
      } 
 
   } 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images"; 
} 
Requirements

Target Platforms

See Also

Reference

Leadtools.Document Namespace

Help Version 22.0.2023.4.21
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2023 LEAD Technologies, Inc. All Rights Reserved.

Leadtools.Document Assembly
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2023 LEAD Technologies, Inc. All Rights Reserved.