This example will use SortElements to load a page from a PDF as SVG and then create a text file with the strings found.
Imports Leadtools
Imports Leadtools.Codecs
Imports Leadtools.Drawing
Imports Leadtools.Forms.DocumentWriters
Imports Leadtools.Svg
Public Shared Sub SortElementsExample()
' The source PDF file
Dim srcFileName As String = Path.Combine(Common.ImagesPath.Path, "Leadtools.pdf")
Dim dstFileName As String = Path.Combine(Common.ImagesPath.Path, "Example.txt")
' SVG sort callback handler
Dim sortCallback As SvgSortElementsCallback = _
Function(document As SvgDocument, info As SvgElementInfo, userData As Object) As Boolean
Dim writer As StreamWriter = DirectCast(userData, StreamWriter)
' Is it text?
Dim textData As SvgTextData = info.TextData
If Not IsNothing(textData) Then
' Yes, print it to the console
writer.Write(textData.Text + " ")
' See if its end of line
Dim len As Integer = textData.Text.Length
If (textData.CharacterFlags(len - 1) And SvgTextCharacterFlags.EndOfLine) = SvgTextCharacterFlags.EndOfLine Then
writer.WriteLine()
End If
End If
Return True
End Function
Using codecs As New RasterCodecs()
' Set 300 as the default value for loading document files
codecs.Options.RasterizeDocument.Load.Resolution = 300
' get the number of pages
Dim pageCount As Integer = codecs.GetTotalPages(srcFileName)
' Create a writer for the output text file
Using writer As StreamWriter = File.CreateText(dstFileName)
For pageNumber As Integer = 1 To pageCount
' Load this page as SVG, we are interested in the text only so
' we will ask LEADTOOLS to skip other elements
Dim loadSvgOptions As New CodecsLoadSvgOptions()
loadSvgOptions.DropText = False
loadSvgOptions.DropImages = True
loadSvgOptions.DropShapes = True
Using svgDocument As SvgDocument = DirectCast(codecs.LoadSvg(srcFileName, pageNumber, loadSvgOptions), SvgDocument)
' Sort requires a flat document, so check for that
If Not svgDocument.IsFlat Then
svgDocument.Flat(Nothing)
End If
If Not svgDocument.Bounds.IsValid Then
svgDocument.CalculateBounds(False)
End If
Dim sortOptions As New SvgSortOptions()
sortOptions.ExtractText = SvgExtractText.Word
sortOptions.SortFlags = SvgSortFlags.Default
Console.WriteLine("Text for page {0}", pageNumber)
svgDocument.SortElements(sortOptions, sortCallback, writer)
End Using
Next
' Show the text file
System.Diagnostics.Process.Start(dstFileName)
End Using
End Using
End Sub
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Drawing;
using Leadtools.Forms.DocumentWriters;
using Leadtools.Svg;
public void SortElementsExample()
{
// The source PDF file
string srcFileName = Path.Combine(ImagesPath.Path, "Leadtools.pdf");
string dstFileName = Path.Combine(ImagesPath.Path, "Example.txt");
// SVG sort callback handler
SvgSortElementsCallback sortCallback = (callabackDocument, info, userData) =>
{
StreamWriter writer = userData as StreamWriter;
// Is it text?
SvgTextData textData = info.TextData;
if (textData != null)
{
// Yes, print it to the console
writer.Write(textData.Text + " ");
// See if its end of line
var len = textData.Text.Length;
if ((textData.CharacterFlags[len - 1] & SvgTextCharacterFlags.EndOfLine) == SvgTextCharacterFlags.EndOfLine)
writer.WriteLine();
}
return true;
};
using (var codecs = new RasterCodecs())
{
// Set 300 as the default value for loading document files
codecs.Options.RasterizeDocument.Load.Resolution = 300;
// get the number of pages
int pageCount = codecs.GetTotalPages(srcFileName);
// Create a writer for the output text file
using (StreamWriter writer = File.CreateText(dstFileName))
{
for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++)
{
// Load this page as SVG, we are interested in the text only so
// we will ask LEADTOOLS to skip other elements
CodecsLoadSvgOptions loadSvgOptions = new CodecsLoadSvgOptions();
loadSvgOptions.DropShapes = false;
loadSvgOptions.DropImages = true;
loadSvgOptions.DropShapes = true;
using (SvgDocument svgDocument = codecs.LoadSvg(srcFileName, pageNumber, loadSvgOptions) as SvgDocument)
{
// Sort requires a flat document, so check for that
if (!svgDocument.IsFlat)
svgDocument.Flat(null);
if (!svgDocument.Bounds.IsValid)
svgDocument.CalculateBounds(false);
SvgSortOptions sortOptions = new SvgSortOptions();
sortOptions.ExtractText = SvgExtractText.Word;
sortOptions.SortFlags = SvgSortFlags.Default;
Console.WriteLine("Text for page {0}", pageNumber);
svgDocument.SortElements(sortOptions, sortCallback, writer);
}
}
// Show the text file
System.Diagnostics.Process.Start(dstFileName);
}
}
}