using it.stefanochizzolini.clown.documents;
using it.stefanochizzolini.clown.documents.contents;
using it.stefanochizzolini.clown.documents.contents.composition;
using it.stefanochizzolini.clown.documents.contents.objects;
using xObjectsit.stefanochizzolini.clown.documents.contents.xObjects;
using it.stefanochizzolini.clown.files;
using it.stefanochizzolini.clown.objects;
using System;
using System.Collections.Generic;
using System.Drawing;
namespace it.stefanochizzolini.clown.samples{
/**
<summary>This sample demonstrates how to retrieve the precise position (page and coordinates)
of each image within a PDF document, using the page content scanning functionality.</summary>
<remarks>This sample leverages the ContentScanner class, a powerful device for accessing
each single content object within a page.</remarks>
*/
public class ContentScanningSample
: ISample
{
#region dynamic
#region interface
#region public
#region ISample
public void Run(
SampleLoader loader
)
{
// (boilerplate user choice -- ignore it)
string filePath = loader.GetPdfFileChoice("Please select a PDF file");
// 1. Open the PDF file!
File file = new File(filePath);
// 2. Parsing the document...
// Get the PDF document!
Document document = file.Document;
Console.WriteLine("\nLooking for images...");
// Parsing each page of the document...
foreach(Page page in document.Pages)
{Scan(page);}
Console.WriteLine("\nImage search complete.");
}
#endregion
#endregion
#region private
/**
<summary>Scans a page's contents looking for images.</summary>
*/
private void Scan(
Page page
)
{
/*
NOTE: Page contents are represented by a sequence of content objects,
possibly nested into multiple levels.
*/
Scan(
new ContentScanner( // Wraps the page contents into the scanner.
page.Contents // Gets the page contents.
),
page
);
}
/**
<summary>Scans a content level looking for images.</summary>
*/
private void Scan(
ContentScanner level,
Page page
)
{
if(level == null)
return;
ContentObject obj;
while((obj = level.Current) != null)
{
/*
NOTE: Images can be represented on a page either as inline objects
or as external objects (XObject).
*/
// Is it an operation that shows an external object?
/*
NOTE: External objects are represented through PaintXObject operations.
*/
if(obj is PaintXObject)
{
// Get the reference key of the shown external object!
PdfName xObjectKey = ((PaintXObject)obj).Name;
// Get the external object associated to the reference key!
xObjects::XObject xObject = page.Resources.XObjects[xObjectKey];
// Is the external object an image?
if(xObject is xObjects::ImageXObject)
{
Console.WriteLine(
"Image '" + xObjectKey + "' (" + xObject.BaseObject + ") " // Image key and indirect reference.
+ "on page " + (page.Index + 1) + " (" + page.BaseObject + ")" // Page index and indirect reference.
);
// Get the coordinates of the image!
double[] ctm = level.State.CTM; // Current transformation matrix.
SizeF imageSize = xObject.Size; // Image native size.
Console.WriteLine(" Coordinates:");
Console.WriteLine(" x: " + Math.Round(ctm[4]));
Console.WriteLine(" y: " + Math.Round(page.Size.Value.Height - ctm[5]));
Console.WriteLine(" width: " + Math.Round(ctm[0]) + " (native: " + Math.Round(imageSize.Width) + ")");
Console.WriteLine(" height: " + Math.Round(Math.Abs(ctm[3])) + " (native: " + Math.Round(imageSize.Height) + ")");
}
}
else if(obj is InlineImage)
{
Console.WriteLine(
"Inline Image (TODO: info extraction)"
);
}
else if(!(obj is Operation)) // Composite object.
{
// Scan the inner level!
Scan(
level.ChildLevel,
page
);
}
level.MoveNext();
}
}
#endregion
#endregion
#endregion
}
}
|