Products | Support | Email a link to this topic. | Send comments on this topic. | Back to Introduction - All Topics | Help Version 19.0.4.3
|
Leadtools.Documents Namespace : DocumentText Class |
[DataContractAttribute()] public class DocumentText
'Declaration
<DataContractAttribute()> Public Class DocumentText
'Usage
Dim instance As DocumentText
public class DocumentText implements Serializable
[DataContractAttribute()] public ref class DocumentText
DocumentText manages the text of the document and can be accessed through the Text property of Document.
The text of a document page can be extracted using the DocumentPage.GetText method. This will return a DocumentPageText instance that contains the text characters found in the page with location and size properties. Furthermore, DocumentPageText supports building the words or the text as string for easy processing.
The framework can use either SVG or OCR technologies to extract the text data. Which method to use can be controlled using the TextExtractionMode property.
For more information, refer to Parsing Text with the Documents Library.
Imports Leadtools Imports Leadtools.Codecs Imports Leadtools.Forms.DocumentWriters Imports Leadtools.Svg Imports Leadtools.Documents Imports Leadtools.Caching Imports Leadtools.Annotations.Core Imports Leadtools.Barcode Imports Leadtools.Forms.Ocr <TestMethod()> _ Public Sub DocumentTextExample() Dim options As New LoadDocumentOptions() options.UseCache = False Using document As Leadtools.Documents.Document = DocumentFactory.LoadFromFile(Path.Combine(ImagesPath.Path, "Leadtools.tif"), options) 'for the TIF file we need an OCR engine Dim ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False) Dim rasterCodecs As New RasterCodecs() Dim documentWriter As New DocumentWriter() ocrEngine.Startup(rasterCodecs, documentWriter, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir) document.Text.OcrEngine = ocrEngine ' get text Dim page As Leadtools.Documents.DocumentPage = document.Pages(0) Dim pageText As DocumentPageText = page.GetText() If Not pageText Is Nothing Then pageText.BuildText() Dim text As String = pageText.Text Console.WriteLine(text) Else Console.WriteLine("Failed!") End If End Using End Sub
using Leadtools; using Leadtools.Codecs; using Leadtools.Forms.DocumentWriters; using Leadtools.Svg; using Leadtools.Documents; using Leadtools.Caching; using Leadtools.Annotations.Core; using Leadtools.Forms.Ocr; using Leadtools.Barcode; [TestMethod] public void DocumentTextExample() { var options = new LoadDocumentOptions(); options.UseCache = false; using (var document = DocumentFactory.LoadFromFile(Path.Combine(ImagesPath.Path, "Leadtools.tif"), options)) { //for the TIF file we need an OCR engine var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false); var rasterCodecs = new RasterCodecs(); var documentWriter = new DocumentWriter(); ocrEngine.Startup(rasterCodecs, documentWriter, null, LEAD_VARS.OcrAdvantageRuntimeDir); document.Text.OcrEngine = ocrEngine; // get text var page = document.Pages[0]; var pageText = page.GetText(); if (pageText != null) { pageText.BuildText(); var text = pageText.Text; Console.WriteLine(text); } else { Console.WriteLine("Failed!"); } } }