Collection of the embedded file schemas found in this document.
public IList<PDFSchema> EmbeddedFilesSchemas { get; }
Collection of the embedded file schemas found in this document. The default value is an empty collection.
PDF documents support embedded files of any type. The file can be another PDF, a TIF file, a JPEG image, or any other binary or textual data.
The value of EmbeddedFiles and EmbeddedFilesSchemas are empty collections when a new instance of PDFDocument is created. However, the IsPortfolio property will also be set to indicate whether this is a PDF portfolio document. Refer to IsPortfolio for more information.
Similarly, the HasEmbeddedFiles property will be set to true or false to indicate whether the document contains any embedded files using the schemas.
To find more information regarding the embedded files (such as the file name and its size), as well as schemas information such as schema type, the application must perform the following:
PDFFile.ExtractEmbeddedFile can then be used to extract the data of an embedded file into an output file or stream.
This example will generate an HTML file with a table containing links to the embedded files found in the document. If the document is a PDF portfolio, then table entries are created based on the schema and their values.
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Controls;
using Leadtools.Pdf;
using Leadtools.Svg;
using Leadtools.WinForms;
public static void EmbeddedFilesSchemasExample(string inputFileName, string outputDir)
{
// Load the source document
PDFDocument pdfDocument = new PDFDocument(inputFileName);
// Parse the embedded files
pdfDocument.ParseDocumentStructure(PDFParseDocumentStructureOptions.EmbeddedFiles);
if (!Directory.Exists(outputDir))
Directory.CreateDirectory(outputDir);
// Create the HTML file
string htmlFileName = Path.GetFileNameWithoutExtension(inputFileName);
htmlFileName = Path.ChangeExtension(htmlFileName, "html");
htmlFileName = Path.Combine(outputDir, htmlFileName);
using (var writer = File.CreateText(htmlFileName))
{
// add the HTML
writer.WriteLine("<html>");
writer.WriteLine(" <head>");
writer.WriteLine($" <title>{Path.GetFileName(inputFileName)}</title>");
writer.WriteLine(" <style>table, th, td {border: 1px solid black;}</style>");
writer.WriteLine(" </head>");
writer.WriteLine(" <body>");
// Add the table
writer.WriteLine(" <table>");
// Create HTML from the schema, only show the visible items
// Add the columns (the schema itself)
writer.WriteLine(" <tr>");
// Add the file name and created, always exists
writer.WriteLine(" <th>FileName</th>");
foreach (PDFSchema schema in pdfDocument.EmbeddedFilesSchemas)
{
if (schema.IsVisible)
{
// Add a column
writer.WriteLine($" <th>{schema.Key}</th>");
}
}
writer.WriteLine(" </tr>");
// Add the rows (the embedded files)
foreach (PDFEmbeddedFile embeddedFile in pdfDocument.EmbeddedFiles)
{
// Extract this attachment to the output directory
// Note: FileName of an embedded file is not guaranteed to be unique, also, we will use it in
// an HTML file, so use a generic name based on the file number
string outputFileName = $"attachment-{embeddedFile.FileNumber}";
PDFFile.ExtractEmbeddedFile(inputFileName, pdfDocument.Password, embeddedFile.FileNumber, Path.Combine(outputDir, outputFileName));
writer.WriteLine(" <tr>");
// Add the file name and make it a link to the file we extracted
string fileNameLink = $"<a href=\"./{outputFileName}\">{embeddedFile.FileName}</a>";
writer.WriteLine($" <td>{fileNameLink}</td>");
// Add the schema value for this file
foreach (PDFSchema schema in pdfDocument.EmbeddedFilesSchemas)
{
string value = embeddedFile.SchemaValues[schema.Key];
// Format if needed, the values are not used in this example
if (schema.SchemaType == PDFSchema.DateSchemaType)
{
DateTime date = schema.AsDate(value);
value = date.ToString("MM/dd/yyyy");
}
else if (schema.SchemaType == PDFSchema.NumberSchemaType)
{
/*long number =*/
schema.AsNumber(value);
}
// Add it to the table if visible
if (schema.IsVisible)
{
writer.WriteLine($" <td>{value}</td>");
}
}
writer.WriteLine(" </tr>");
}
writer.WriteLine(" </table>");
writer.WriteLine(" </body>");
writer.WriteLine("</html>");
}
pdfDocument.Dispose();
}