←Select platform

Characters Property

Summary

The list of text characters found in the document page.

Syntax
C#
C++/CLI
Java
Python
public IList<DocumentCharacter> Characters { get; } 
public:  
   property System::Collections::Generic::IList<Leadtools::Documents::DocumentCharacter^>^ Characters 
   { 
      System::Collections::Generic::IList<Leadtools::Documents::DocumentCharacter^>^ get() 
   } 
public List<DocumentCharacter> getCharacters() 
Characters # get  (DocumentPageText) 

Property Value

List of text characters found in the document page. The default value is an empty list.

Remarks

The text of a document page can be read by using the DocumentPage.GetText method. The text characters found in the page will be set in the in Characters property of the returned DocumentPageText object.

For more information, refer to Parsing Text with the Document Library.

Example
C#
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Barcode; 
using Leadtools.Document.Converter; 
 
public void DocumentPageTextExample() 
{ 
   var options = new LoadDocumentOptions(); 
   using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.doc"), options)) 
   { 
      // Get page text  
      var page = document.Pages[0]; 
            
      // Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference) 
      DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode)); 
      foreach (var modes in textExtractionModes) 
      { 
         Console.WriteLine($"Text extraction mode: {modes}"); 
      } 
 
      // Text extraction mode. Auto is default 
      document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto; 
 
      // DocumentPageText reference 
      var pageText = page.GetText(); 
      if (pageText != null) 
      { 
         pageText.BuildText(); 
         var characters = pageText.Characters; 
         var text = pageText.Text; 
 
         Console.WriteLine(text); 
         Console.WriteLine($"Total number of characters: {characters.Count}"); 
 
         pageText.BuildWords(); 
         Console.WriteLine($"Total number of words: {pageText.Words.Count}"); 
         // Get each word 
         foreach (DocumentWord word in pageText.Words) 
         { 
            Console.WriteLine($"Bounds: {word.Bounds} | First character index: {word.FirstCharacterIndex} " + 
               $"| Last character index: {word.LastCharacterIndex} | Value: {word.Value}"); 
         } 
      } 
      else 
      { 
         Console.WriteLine("Failed!"); 
      } 
 
   } 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images"; 
} 
Requirements

Target Platforms

Help Version 22.0.2023.4.21
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2023 LEAD Technologies, Inc. All Rights Reserved.

Leadtools.Document Assembly
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2023 LEAD Technologies, Inc. All Rights Reserved.