Manages the text options of the document.
[DataContractAttribute()]
public class DocumentText
<DataContractAttribute()>
Public Class DocumentText
public [DataContractAttribute]
ref class DocumentText
public class DocumentText implements Serializable
DocumentText manages the text of the document and can be accessed through the Text property of Document.
The text of a document page can be extracted using the DocumentPage.GetText method. This will return a DocumentPageText instance that contains the text characters found in the page with location and size properties. Furthermore, DocumentPageText supports building the words or the text as string for easy processing.
The framework can use either SVG or OCR technologies to extract the text data. Which method to use can be controlled using the TextExtractionMode property.
For more information, refer to Parsing Text with the Documents Library.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Forms.DocumentWriters;
using Leadtools.Svg;
using LeadtoolsExamples.Common;
using Leadtools.Documents;
using Leadtools.Caching;
using Leadtools.Annotations.Core;
using Leadtools.Forms.Ocr;
using Leadtools.Barcode;
public static void DocumentTextExample()
{
var options = new LoadDocumentOptions();
using (var document = DocumentFactory.LoadFromFile(Path.Combine(ImagesPath.Path, "Leadtools.tif"), options))
{
//for the TIF file we need an OCR engine
var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false);
var rasterCodecs = new RasterCodecs();
var documentWriter = new DocumentWriter();
ocrEngine.Startup(rasterCodecs, documentWriter, null, LEAD_VARS.OcrAdvantageRuntimeDir);
document.Text.OcrEngine = ocrEngine;
// get text
var page = document.Pages[0];
var pageText = page.GetText();
if (pageText != null)
{
pageText.BuildText();
var text = pageText.Text;
Console.WriteLine(text);
}
else
{
Console.WriteLine("Failed!");
}
}
}
static class LEAD_VARS
{
public const string OcrAdvantageRuntimeDir = @"C:\LEADTOOLS 19\Bin\Common\OcrAdvantageRuntime";
}
Imports Leadtools
Imports Leadtools.Codecs
Imports Leadtools.Forms.DocumentWriters
Imports Leadtools.Svg
Imports Leadtools.Documents
Imports Leadtools.Caching
Imports Leadtools.Annotations.Core
Imports Leadtools.Barcode
Imports Leadtools.Forms.Ocr
Imports LeadtoolsDocumentsExamples.LeadtoolsExamples.Common
'Imports LeadtoolsDocumentsExamples.LeadtoolsExamples.Common
Public Shared Sub DocumentTextExample()
Dim options As New LoadDocumentOptions()
Using document As Leadtools.Documents.Document = DocumentFactory.LoadFromFile(Path.Combine(ImagesPath.Path, "Leadtools.tif"), options)
'for the TIF file we need an OCR engine
Dim ocrEngine As IOcrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False)
Dim rasterCodecs As New RasterCodecs()
Dim documentWriter As New DocumentWriter()
ocrEngine.Startup(rasterCodecs, documentWriter, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir)
document.Text.OcrEngine = ocrEngine
' get text
Dim page As Leadtools.Documents.DocumentPage = document.Pages(0)
Dim pageText As DocumentPageText = page.GetText()
If Not pageText Is Nothing Then
pageText.BuildText()
Dim text As String = pageText.Text
Console.WriteLine(text)
Else
Console.WriteLine("Failed!")
End If
End Using
End Sub
Public NotInheritable Class LEAD_VARS
Public Const OcrAdvantageRuntimeDir As String = "C:\LEADTOOLS 19\Bin\Common\OcrAdvantageRuntime"
End Class
Raster .NET | C API | C++ Class Library | JavaScript HTML5
Document .NET | C API | C++ Class Library | JavaScript HTML5
Medical .NET | C API | C++ Class Library | JavaScript HTML5
Medical Web Viewer .NET