Public Sub DocumentPageTextExample()
Dim documentFileName As String
Using dlg As New OpenFileDialog()
If dlg.ShowDialog() <> System.Windows.Forms.DialogResult.OK Then
Return
End If
documentFileName = dlg.FileName
End Using
' Load the document at 200 DPI
Dim loadOptions As New DocumentReaderLoadOptions()
loadOptions.Resolution = 200
Dim reader As DocumentReader = DocumentReader.Create(documentFileName, loadOptions)
' If this is a Raster document such as TIFF or JPEG, we must use an OCR engine
Dim ocrEngine As IOcrEngine = Nothing
If reader.ReaderType = DocumentReaderType.Raster Then
ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, False)
ocrEngine.Startup(Nothing, Nothing, Nothing, LEAD_VARS.OcrAdvantageRuntimeDir)
End If
reader.ObjectManager.BeginParse(ocrEngine)
' Get the text of the first page
Dim page As DocumentReaderPage = reader.Pages(0)
Dim pageText As DocumentPageText = reader.ObjectManager.ParsePageText(page)
' Create the bitmap to draw the objects to
Using btmp As New Bitmap(page.PixelWidth, page.PixelHeight)
btmp.SetResolution(CType(page.DpiX, Single), CType(page.DpiY, Single))
Using g As Graphics = Graphics.FromImage(btmp)
g.Clear(Color.White)
' Render the objects
' Text is a word at a time
Dim textRect As LogicalRectangle = LogicalRectangle.Empty
Dim textFontHeight As Double = 0
Dim textWord As New StringBuilder()
For Each character As DocumentCharacter In pageText.Characters
' Add the text code and rects together
textWord.Append(character.Code)
If textRect.IsEmpty Then
textRect = character.Bounds
Else
textRect = LogicalRectangle.Union(textRect, character.Bounds)
End If
textFontHeight = Math.Max(textFontHeight, character.FontSize)
' If this is the last object in a word, render it
If character.IsEndOfWord OrElse character.IsEndOfLine OrElse character.IsEndOfParagraph OrElse character.IsEndOfPage Then
RenderText(g, pageText, textWord.ToString(), textRect, character, textFontHeight)
textWord = New StringBuilder()
textRect = LogicalRectangle.Empty
End If
Next
' Save the result as PNG
Using saveDlg As New SaveFileDialog()
saveDlg.Filter = "PNG files|*.png"
If saveDlg.ShowDialog() = System.Windows.Forms.DialogResult.OK Then
btmp.Save(saveDlg.FileName, System.Drawing.Imaging.ImageFormat.Png)
End If
End Using
End Using
End Using
reader.ObjectManager.EndParse()
If Not IsNothing(ocrEngine) Then
ocrEngine.Dispose()
End If
reader.Dispose()
End Sub
Private Shared Sub RenderText(ByVal g As Graphics, ByVal pageText As DocumentPageText, _
ByVal text As String, ByVal textRect As LogicalRectangle, _
ByVal character As DocumentCharacter, ByVal textFontHeight _
As Double)
' Create the font
Dim font As DocumentFont = pageText.Fonts(character.FontIndex)
Dim faceName As String = font.FaceName
If String.IsNullOrEmpty(faceName) Then
' Could be an embedded font, use Arial
faceName = "Arial"
End If
Dim fontStyle As FontStyle = fontStyle.Regular
If (font.FontStyle And DocumentFontStyle.Bold) = DocumentFontStyle.Bold Then
fontStyle = fontStyle Or fontStyle.Bold
End If
If (font.FontStyle And DocumentFontStyle.Italic) = DocumentFontStyle.Italic Then
fontStyle = fontStyle Or fontStyle.Italic
End If
If (font.FontStyle And DocumentFontStyle.Underline) = DocumentFontStyle.Underline Then
fontStyle = fontStyle Or fontStyle.Underline
End If
Using f As New Font(faceName, CType(textFontHeight * 72 / g.DpiY, Single), fontStyle)
Dim rect As New Rectangle(CType(textRect.X, Integer), CType(textRect.Y, Integer), _
CType(textRect.Width, Integer), CType(textRect.Height, _
Integer))
Using sf As New StringFormat()
sf.Alignment = StringAlignment.Center
sf.LineAlignment = StringAlignment.Center
sf.FormatFlags = sf.FormatFlags Or StringFormatFlags.NoClip Or StringFormatFlags.NoWrap
g.DrawString(text, f, Brushes.Black, rect, sf)
End Using
End Using
End Sub
public void DocumentPageTextExample()
{
string documentFileName;
using(OpenFileDialog dlg = new OpenFileDialog())
{
if(dlg.ShowDialog() != DialogResult.OK)
{
return;
}
documentFileName = dlg.FileName;
}
// Load the document at 200 DPI
DocumentReaderLoadOptions loadOptions = new DocumentReaderLoadOptions();
loadOptions.Resolution = 200;
DocumentReader reader = DocumentReader.Create(documentFileName, loadOptions);
// If this is a Raster document such as TIFF or JPEG, we must use an OCR engine
IOcrEngine ocrEngine = null;
if(reader.ReaderType == DocumentReaderType.Raster)
{
ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Advantage, false);
ocrEngine.Startup(null, null, null, LEAD_VARS.OcrAdvantageRuntimeDir);
}
reader.ObjectManager.BeginParse(ocrEngine);
// Get the text of the first page
DocumentReaderPage page = reader.Pages[0];
DocumentPageText pageText = reader.ObjectManager.ParsePageText(page);
// Create the bitmap to draw the objects to
using(Bitmap btmp = new Bitmap(page.PixelWidth, page.PixelHeight))
{
btmp.SetResolution((float)page.DpiX, (float)page.DpiY);
using(Graphics g = Graphics.FromImage(btmp))
{
g.Clear(Color.White);
// Render the objects
// Text is a word at a time
LogicalRectangle textRect = LogicalRectangle.Empty;
double textFontHeight = 0;
StringBuilder textWord = new StringBuilder();
foreach(DocumentCharacter character in pageText.Characters)
{
// Add the text code and rects together
textWord.Append(character.Code);
if(textRect.IsEmpty)
{
textRect = character.Bounds;
}
else
{
textRect = LogicalRectangle.Union(textRect, character.Bounds);
}
textFontHeight = Math.Max(textFontHeight, character.FontSize);
// If this is the last object in a word, render it
if(character.IsEndOfWord || character.IsEndOfLine || character.IsEndOfParagraph
|| character.IsEndOfPage)
{
RenderText(g, pageText, textWord.ToString(), textRect, character, textFontHeight);
textWord = new StringBuilder();
textRect = LogicalRectangle.Empty;
}
}
// Save the result as PNG
using(SaveFileDialog saveDlg = new SaveFileDialog())
{
saveDlg.Filter = "PNG files|*.png";
if(saveDlg.ShowDialog() == DialogResult.OK)
{
btmp.Save(saveDlg.FileName, System.Drawing.Imaging.ImageFormat.Png);
}
}
}
}
reader.ObjectManager.EndParse();
if(ocrEngine != null)
{
ocrEngine.Dispose();
}
reader.Dispose();
}
private static void RenderText(Graphics g, DocumentPageText pageText, string text,
LogicalRectangle textRect, DocumentCharacter character,
double textFontHeight)
{
// Create the font
DocumentFont font = pageText.Fonts[character.FontIndex];
string faceName = font.FaceName;
if(string.IsNullOrEmpty(faceName))
{
// Could be an embedded font, use Arial
faceName = "Arial";
}
FontStyle fontStyle = FontStyle.Regular;
if((font.FontStyle & DocumentFontStyle.Bold) == DocumentFontStyle.Bold)
{
fontStyle |= FontStyle.Bold;
}
if((font.FontStyle & DocumentFontStyle.Italic) == DocumentFontStyle.Italic)
{
fontStyle |= FontStyle.Italic;
}
if((font.FontStyle & DocumentFontStyle.Underline) == DocumentFontStyle.Underline)
{
fontStyle |= FontStyle.Underline;
}
using(Font f = new Font(faceName, (float)textFontHeight * 72 / g.DpiY, fontStyle))
{
Rectangle rect = new Rectangle((int)textRect.X, (int)textRect.Y, (int)textRect.Width,
(int)textRect.Height);
using(StringFormat sf = new StringFormat())
{
sf.Alignment = StringAlignment.Center;
sf.LineAlignment = StringAlignment.Center;
sf.FormatFlags |= StringFormatFlags.NoClip | StringFormatFlags.NoWrap;
g.DrawString(text, f, Brushes.Black, rect, sf);
}
}
}