Extracts an embedded file (attachment) from a PDF document file and saves it to disk.
public static void ExtractEmbeddedFile(
string fileName,
string password,
int fileNumber,
string outputFileName
)
fileName
Name of the owner PDF document file.
password
The password to use when loading this PDF file (if the file is encrypted); otherwise, use null.
fileNumber
Embedded file number. This value must be between 1 and the total number of embedded files in the owner PDF document.
outputFileName
Name of the file that will contain the embedded file binary data.
ExtractEmbeddedFile is used to extract one embedded file (attachment) from a PDF document.
The attachment to extract is identified using the fileNumber variable which must be a value between 1 (for the embedded file) and the total number of attachments in the file (for the last embedded file).
Use GetEmbeddedFileCount to quickly get the number of embedded files in a PDF document.
To find more information regarding the embedded files before extracting them (such as the file name and size), the application can perform the following:
using Leadtools.WinForms;
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Controls;
using Leadtools.Drawing;
using Leadtools.ImageProcessing;
using Leadtools.Pdf;
using Leadtools.Svg;
public static void ExtractAttachmentsAndStitch(string inputFileName, string outputFileName)
{
File.Delete(outputFileName);
// Get the number of embedded files (attachments) in the input file
int attachmentCount = PDFFile.GetEmbeddedFileCount(inputFileName, null);
// If the file does not have any attachments we are done
if (attachmentCount == 0)
{
File.Copy(inputFileName, outputFileName, true);
return;
}
// To convert attachment files that are not PDF
var rasterCodecs = new RasterCodecs();
rasterCodecs.ThrowExceptionsOnInvalidImages = false;
var tempFiles = new List<string>();
// Now, extract all the attachments
for (int attachmentNumber = 1; attachmentNumber <= attachmentCount; attachmentNumber++)
{
// Extract this attachment to a temporary file
string tempFile = Path.GetTempFileName();
PDFFile.ExtractEmbeddedFile(inputFileName, null, attachmentNumber, tempFile);
// If attachment is not PDF, convert it
RasterImageFormat format;
int pageCount;
using (CodecsImageInfo info = rasterCodecs.GetInformation(tempFile, true))
{
format = info.Format;
pageCount = info.TotalPages;
}
if (format == RasterImageFormat.Unknown)
{
// We do not know what to do with this attachment, log and ignore it
Console.WriteLine("Could not convert attachment file to PDF, ignoring");
File.Delete(tempFile);
}
else if (format != RasterImageFormat.RasPdf)
{
// Simple conversion using RasterImage.
// TODO for the Example: A better way is to use the DocumentConverter to get true document conversion with all the
// options available
string tempPdfFile = Path.GetTempFileName();
for (int pageNumber = 1; pageNumber <= pageCount; pageNumber++)
{
using (RasterImage image = rasterCodecs.Load(tempFile, pageNumber))
{
rasterCodecs.Save(image, tempPdfFile, RasterImageFormat.RasPdfJpeg422, 0, 1, -1, -1, CodecsSavePageMode.Append);
}
}
File.Copy(tempPdfFile, tempFile, true);
File.Delete(tempPdfFile);
tempFiles.Add(tempFile);
}
else
{
// TODO for the Example: Check if this file is PDF with attachments and call the function recursively.
tempFiles.Add(tempFile);
}
}
// Time to generate the final document, first the pages from the original document
// Did we extract/convert any attachments?
if (tempFiles.Count > 0)
{
// Note that if the original document is PDF portfolio, then it does not contain real pages. Just a placeholder that should be ignored
if (!PDFFile.IsPortfolio(inputFileName, null))
{
PDFFile file = new PDFFile(inputFileName);
file.MergeWith(tempFiles.ToArray(), outputFileName);
}
else
{
// This is portfolio, so we will ignore the original file and just merge the attachments
string firstFile = tempFiles[0];
tempFiles.RemoveAt(0);
PDFFile file = new PDFFile(firstFile);
file.MergeWith(tempFiles.ToArray(), outputFileName);
File.Delete(firstFile);
}
}
else
{
// No, just copy the original file over
File.Copy(inputFileName, outputFileName, true);
}
// Clean up
foreach (string tempFile in tempFiles)
File.Delete(tempFile);
rasterCodecs.Dispose();
}