<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Programmatic Method For Export PDF to XML in Adobe Experience Manager Questions</title>
    <link>https://experienceleaguecommunities.adobe.com/t5/adobe-experience-manager/programmatic-method-for-export-pdf-to-xml/m-p/427845#M121799</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;LI-USER uid="17486453"&gt;&lt;/LI-USER&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;You May give a try with this&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;PRE&gt;public class ConvertPDFToXML {
            static StreamResult streamResult;
            static TransformerHandler handler;
            static AttributesImpl atts;

            public static void main(String[] args) throws IOException {

                    try {
                            Document document = new Document();
                            document.open();
                            PdfReader reader = new PdfReader("C:\\hello.pdf");
                            PdfDictionary page = reader.getPageN(1);
                            PRIndirectReference objectReference = (PRIndirectReference) page
                                            .get(PdfName.CONTENTS);
                            PRStream stream = (PRStream) PdfReader
                                            .getPdfObject(objectReference);
                            byte[] streamBytes = PdfReader.getStreamBytes(stream);
                            PRTokeniser tokenizer = new PRTokeniser(streamBytes);

                            StringBuffer strbufe = new StringBuffer();
                            while (tokenizer.nextToken()) {
                                    if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) {
                                            strbufe.append(tokenizer.getStringValue());
                                    }
                            }
                            String test = strbufe.toString();
                            streamResult = new StreamResult("data.xml");
                            initXML();
                            process(test);
                            closeXML();
                            document.add(new Paragraph(".."));
                            document.close();
                    } catch (Exception e) {
                    }
            }

            public static void initXML() throws ParserConfigurationException,
                            TransformerConfigurationException, SAXException {
                    SAXTransformerFactory tf = (SAXTransformerFactory) SAXTransformerFactory
                                    .newInstance();

                    handler = tf.newTransformerHandler();
                    Transformer serializer = handler.getTransformer();
                    serializer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");
                    serializer.setOutputProperty(
                                    "{http://xml.apache.org/xslt}indent-amount", "4");
                    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                    handler.setResult(streamResult);
                    handler.startDocument();
                    atts = new AttributesImpl();
                    handler.startElement("", "", "data", atts);
            }

            public static void process(String s) throws SAXException {
                    String[] elements = s.split("\\|");
                    atts.clear();
                    handler.startElement("", "", "Message", atts);
                    handler.characters(elements[0].toCharArray(), 0, elements[0].length());
                    handler.endElement("", "", "Message");
            }

            public static void closeXML() throws SAXException {
                    handler.endElement("", "", "data");
                    handler.endDocument();
            }
    }&lt;/PRE&gt;&lt;P&gt;Regards,&lt;/P&gt;&lt;P&gt;Santosh&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.techinnovia.com/blog/" target="_blank"&gt;https://www.techinnovia.com/blog/&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Sat, 16 Oct 2021 15:38:13 GMT</pubDate>
    <dc:creator>SantoshSai</dc:creator>
    <dc:date>2021-10-16T15:38:13Z</dc:date>
    <item>
      <title>Programmatic Method For Export PDF to XML</title>
      <link>https://experienceleaguecommunities.adobe.com/t5/adobe-experience-manager/programmatic-method-for-export-pdf-to-xml/m-p/427748#M121780</link>
      <description>&lt;P&gt;I have a need to use the Edit | Form Options | Export Data feature to convert PDFs to XML. I need to be able to do this programmatically.&amp;nbsp; have done research on the SDK and this maybe a solution. I have Adobe Acrobat Pro 2017 installed. I was to work with the AcroExch COM object in Powershell. This offered a lot of interesting functionality but not a simple Export Data to XML. I see there maybe a way via a plugin in C to do this but not sure if that is possible either. I looked at my installation and I don't think the full SDK is available.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any thoughts on how to proceed?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Fri, 15 Oct 2021 15:51:32 GMT</pubDate>
      <guid>https://experienceleaguecommunities.adobe.com/t5/adobe-experience-manager/programmatic-method-for-export-pdf-to-xml/m-p/427748#M121780</guid>
      <dc:creator>fmcaruso</dc:creator>
      <dc:date>2021-10-15T15:51:32Z</dc:date>
    </item>
    <item>
      <title>Re: Programmatic Method For Export PDF to XML</title>
      <link>https://experienceleaguecommunities.adobe.com/t5/adobe-experience-manager/programmatic-method-for-export-pdf-to-xml/m-p/427845#M121799</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;LI-USER uid="17486453"&gt;&lt;/LI-USER&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;You May give a try with this&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;PRE&gt;public class ConvertPDFToXML {
            static StreamResult streamResult;
            static TransformerHandler handler;
            static AttributesImpl atts;

            public static void main(String[] args) throws IOException {

                    try {
                            Document document = new Document();
                            document.open();
                            PdfReader reader = new PdfReader("C:\\hello.pdf");
                            PdfDictionary page = reader.getPageN(1);
                            PRIndirectReference objectReference = (PRIndirectReference) page
                                            .get(PdfName.CONTENTS);
                            PRStream stream = (PRStream) PdfReader
                                            .getPdfObject(objectReference);
                            byte[] streamBytes = PdfReader.getStreamBytes(stream);
                            PRTokeniser tokenizer = new PRTokeniser(streamBytes);

                            StringBuffer strbufe = new StringBuffer();
                            while (tokenizer.nextToken()) {
                                    if (tokenizer.getTokenType() == PRTokeniser.TK_STRING) {
                                            strbufe.append(tokenizer.getStringValue());
                                    }
                            }
                            String test = strbufe.toString();
                            streamResult = new StreamResult("data.xml");
                            initXML();
                            process(test);
                            closeXML();
                            document.add(new Paragraph(".."));
                            document.close();
                    } catch (Exception e) {
                    }
            }

            public static void initXML() throws ParserConfigurationException,
                            TransformerConfigurationException, SAXException {
                    SAXTransformerFactory tf = (SAXTransformerFactory) SAXTransformerFactory
                                    .newInstance();

                    handler = tf.newTransformerHandler();
                    Transformer serializer = handler.getTransformer();
                    serializer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");
                    serializer.setOutputProperty(
                                    "{http://xml.apache.org/xslt}indent-amount", "4");
                    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                    handler.setResult(streamResult);
                    handler.startDocument();
                    atts = new AttributesImpl();
                    handler.startElement("", "", "data", atts);
            }

            public static void process(String s) throws SAXException {
                    String[] elements = s.split("\\|");
                    atts.clear();
                    handler.startElement("", "", "Message", atts);
                    handler.characters(elements[0].toCharArray(), 0, elements[0].length());
                    handler.endElement("", "", "Message");
            }

            public static void closeXML() throws SAXException {
                    handler.endElement("", "", "data");
                    handler.endDocument();
            }
    }&lt;/PRE&gt;&lt;P&gt;Regards,&lt;/P&gt;&lt;P&gt;Santosh&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.techinnovia.com/blog/" target="_blank"&gt;https://www.techinnovia.com/blog/&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 16 Oct 2021 15:38:13 GMT</pubDate>
      <guid>https://experienceleaguecommunities.adobe.com/t5/adobe-experience-manager/programmatic-method-for-export-pdf-to-xml/m-p/427845#M121799</guid>
      <dc:creator>SantoshSai</dc:creator>
      <dc:date>2021-10-16T15:38:13Z</dc:date>
    </item>
  </channel>
</rss>

