Initializes a new instance of DocumentPageText.
public DocumentPageText()
public:
DocumentPageText()
public DocumentPageText()
__init__() # Default constructor
This constructor initializes Characters to an empty list ready to be used. The rest of the members of this class are null.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Barcode;
using Leadtools.Document.Converter;
public void DocumentPageTextExample()
{
var options = new LoadDocumentOptions();
using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.doc"), options))
{
// Get page text
var page = document.Pages[0];
// Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference)
DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode));
foreach (var modes in textExtractionModes)
{
Console.WriteLine($"Text extraction mode: {modes}");
}
// Text extraction mode. Auto is default
document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto;
// DocumentPageText reference
var pageText = page.GetText();
if (pageText != null)
{
pageText.BuildText();
var characters = pageText.Characters;
var text = pageText.Text;
Console.WriteLine(text);
Console.WriteLine($"Total number of characters: {characters.Count}");
pageText.BuildWords();
Console.WriteLine($"Total number of words: {pageText.Words.Count}");
// Get each word
foreach (DocumentWord word in pageText.Words)
{
Console.WriteLine($"Bounds: {word.Bounds} | First character index: {word.FirstCharacterIndex} " +
$"| Last character index: {word.LastCharacterIndex} | Value: {word.Value}");
}
}
else
{
Console.WriteLine("Failed!");
}
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS23\Resources\Images";
}
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import org.junit.*;
import org.junit.runner.JUnitCore;
import org.junit.runner.Result;
import org.junit.runner.notification.Failure;
import static org.junit.Assert.*;
import leadtools.*;
import leadtools.annotations.engine.*;
import leadtools.barcode.*;
import leadtools.caching.*;
import leadtools.codecs.*;
import leadtools.document.*;
import leadtools.document.DocumentMimeTypes.UserGetDocumentStatusHandler;
import leadtools.document.converter.*;
import leadtools.document.writer.*;
import leadtools.ocr.*;
public void documentPageTextExample() {
final String LEAD_VARS_IMAGES_DIR = "C:\\LEADTOOLS23\\Resources\\Images";
LoadDocumentOptions options = new LoadDocumentOptions();
LEADDocument document = DocumentFactory.loadFromFile(combine(LEAD_VARS_IMAGES_DIR, "Leadtools.pdf"),
options);
// Get page text
DocumentPage page = document.getPages().get(0);
// Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode
// reference)
DocumentTextExtractionMode[] textExtractionModes = DocumentTextExtractionMode.values();
for (DocumentTextExtractionMode modes : textExtractionModes) {
System.out.println("Text extraction mode: " + modes);
}
// Text extraction mode. Auto is default
document.getText().setTextExtractionMode(DocumentTextExtractionMode.AUTO);
// DocumentPageText reference
String text = "";
DocumentPageText pageText = page.getText();
assertTrue(pageText != null);
if (pageText != null) {
pageText.buildText();
List<DocumentCharacter> characters = pageText.getCharacters();
text = pageText.getText();
System.out.println(text);
System.out.println("Total number of characters: " + characters.size());
pageText.buildWords();
System.out.println("Total number of words: " + pageText.getWords().size());
// Get each word
for (DocumentWord word : pageText.getWords()) {
System.out.println(
"Bounds: " + word.getBounds() + " | First character index: " + word.getFirstCharacterIndex() + " " +
"| Last character index: " + word.getLastCharacterIndex() + " | Value: " + word.getValue());
}
}
}
Help Collections
Raster .NET | C API | C++ Class Library | HTML5 JavaScript
Document .NET | C API | C++ Class Library | HTML5 JavaScript
Medical .NET | C API | C++ Class Library | HTML5 JavaScript
Medical Web Viewer .NET
Multimedia
Direct Show .NET | C API | Filters
Media Foundation .NET | C API | Transforms
Supported Platforms
.NET, Java, Android, and iOS/macOS Assemblies
Imaging, Medical, and Document
C API/C++ Class Libraries
Imaging, Medical, and Document
HTML5 JavaScript Libraries
Imaging, Medical, and Document