←Select platform

DocumentText Class

Summary

Manages the text options of the document.

Syntax
C#
C++/CLI
Java
Python
[DataContractAttribute()] 
public class DocumentText 
public [DataContractAttribute] 
   ref class DocumentText 
public class DocumentText implements Serializable 
class DocumentText: 
Remarks

DocumentText manages the text of the document and can be accessed through the Text property of LEADDocument.

The text of a document page can be extracted using the DocumentPage.GetText method. This will return a DocumentPageText instance that contains the text characters found in the page with location and size properties. Furthermore, DocumentPageText supports building the words or the text as string for easy processing.

The framework can use either SVG or OCR technologies to extract the text data. Which method to use can be controlled using the TextExtractionMode property.

For more information, refer to Parsing Text with the Document Library.

Example
C#
Java
using Leadtools; 
using Leadtools.Codecs; 
using Leadtools.Document.Writer; 
 
using Leadtools.Document; 
using Leadtools.Caching; 
using Leadtools.Annotations.Engine; 
using Leadtools.Ocr; 
using Leadtools.Barcode; 
using Leadtools.Document.Converter; 
 
public void DocumentTextExample() 
{ 
   var options = new LoadDocumentOptions(); 
   using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "slice.tif"), options)) 
   { 
      //for the TIF file we need an OCR engine 
      var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD); 
      var rasterCodecs = new RasterCodecs(); 
      var documentWriter = new DocumentWriter(); 
      ocrEngine.Startup(rasterCodecs, documentWriter, null, LEAD_VARS.OcrLEADRuntimeDir); 
 
      // DocumentText reference 
      document.Text.OcrEngine = ocrEngine; 
 
      // Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference) 
      DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode)); 
      foreach (var modes in textExtractionModes) 
      { 
         Console.WriteLine($"Text extraction mode: {modes}"); 
      } 
 
      // get text  
      var page = document.Pages[0]; 
      var pageText = page.GetText(); 
      if (pageText != null) 
      { 
         pageText.BuildText(); 
         var text = pageText.Text; 
 
         Console.WriteLine(text); 
      } 
      else 
      { 
         Console.WriteLine("Failed!"); 
      } 
   } 
} 
 
static class LEAD_VARS 
{ 
   public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images"; 
   public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS23\Bin\Common\OcrLEADRuntime"; 
} 
 
import java.io.File; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.net.MalformedURLException; 
import java.net.URI; 
import java.net.URISyntaxException; 
import java.net.URL; 
import java.nio.file.Files; 
import java.nio.file.Paths; 
import java.util.ArrayList; 
import java.util.Calendar; 
import java.util.List; 
import java.util.concurrent.Callable; 
import java.util.concurrent.ExecutorService; 
import java.util.concurrent.Executors; 
import java.util.concurrent.Future; 
import java.util.regex.Pattern; 
 
import org.junit.*; 
import org.junit.runner.JUnitCore; 
import org.junit.runner.Result; 
import org.junit.runner.notification.Failure; 
import static org.junit.Assert.*; 
 
import leadtools.*; 
import leadtools.annotations.engine.*; 
import leadtools.barcode.*; 
import leadtools.caching.*; 
import leadtools.codecs.*; 
import leadtools.document.*; 
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler; 
import leadtools.document.converter.*; 
import leadtools.document.writer.*; 
import leadtools.ocr.*; 
 
 
public void documentTextExample() { 
   final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images"; 
   final String OCR_LEAD_RUNTIME_DIR = "C:\\LEADTOOLS23\\Bin\\Common\\OcrLEADRuntime"; 
   LoadDocumentOptions options = new LoadDocumentOptions(); 
   LEADDocument document = DocumentFactory.loadFromFile(combine(LEAD_VARS_IMAGES_DIR, "slice.tif"), options); 
   // for the TIF file we need an OCR engine 
   OcrEngine ocrEngine = OcrEngineManager.createEngine(OcrEngineType.LEAD); 
   RasterCodecs rasterCodecs = new RasterCodecs(); 
   DocumentWriter documentWriter = new DocumentWriter(); 
   ocrEngine.startup(rasterCodecs, documentWriter, null, OCR_LEAD_RUNTIME_DIR); 
 
   // DocumentText reference 
   document.getText().setOcrEngine(ocrEngine); 
 
   // Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode 
   // reference) 
   DocumentTextExtractionMode[] textExtractionModes = DocumentTextExtractionMode.values(); 
   for (DocumentTextExtractionMode modes : textExtractionModes) { 
      System.out.println("Text extraction mode: " + modes); 
   } 
 
   // get text 
   DocumentPage page = document.getPages().get(0); 
   DocumentPageText pageText = page.getText(); 
   if (pageText != null) { 
      pageText.buildText(); 
      String text = pageText.getText(); 
 
      System.out.println(text); 
   } else { 
      System.out.println("Failed!"); 
   } 
   assertTrue(pageText != null); 
} 
Requirements

Target Platforms

Help Version 23.0.2024.2.29
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2024 LEAD Technologies, Inc. All Rights Reserved.

Leadtools.Document Assembly
Products | Support | Contact Us | Intellectual Property Notices
© 1991-2023 LEAD Technologies, Inc. All Rights Reserved.