Expand my Community achievements bar.

SOLVED

AEM Workflow PDF Metadata

Avatar

Level 2

Hello, I have a workflow in AEM that is responsible for cleaning the metadata of my PDFs in the DAM. The issue is that it doesn't delete all the metadata; it seems to only delete it from the environment but not from the document when I download it again. It also appears that the XMP metadata persists. Any help or solution? I tried cleaning the metadata node or deleting it, but in both cases, due to configuration, I get a message saying that the node is protected and cannot be accessed. Greetings. My workflow that "works a bit".

package com.unicaja.aem.site.core.workflow;

import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.jcr.Session;

import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.ModifiableValueMap;
import org.apache.sling.api.resource.PersistenceException;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.apache.sling.api.resource.ValueMap;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.adobe.granite.workflow.WorkflowException;
import com.adobe.granite.workflow.WorkflowSession;
import com.adobe.granite.workflow.exec.WorkItem;
import com.adobe.granite.workflow.exec.Workflow;
import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.adobe.granite.workflow.metadata.MetaDataMap;

@Component(service = WorkflowProcess.class, property = { Constants.SERVICE_DESCRIPTION + "= Remove Metas PDF Proccess", Constants.SERVICE_VENDOR + "= Adobe", "process.label=" + "Remove Metas PDF Proccess" })
public class RemoveMetas implements WorkflowProcess {

    @reference
    private ResourceResolverFactory resolverFactory;

    /** Default log. */
    protected final Logger log = LoggerFactory.getLogger(this.getClass());

    public void execute(WorkItem item, WorkflowSession wfsession, MetaDataMap args) throws WorkflowException {
        try {
            Session session = wfsession.adaptTo(Session.class);
            final Map<String, Object> map = new HashMap<>();
            map.put("user.jcr.session", session);
            ResourceResolver rr = resolverFactory.getResourceResolver(map);
            String path = item.getWorkflowData().getPayload().toString();
            Resource resource;
            Pattern regexExpression = Pattern.compile(".*/jcr:content/metadata.*");
            Matcher match = regexExpression.matcher(path);
            if(match.matches()){
                resource = rr.getResource(path);
            } else {
                resource = rr.getResource(path + "/jcr:content/metadata");
            }
            ValueMap vm = resource.getValueMap();
            if (vm.get("dc:format").equals("application/pdf")) {
                for (String key : vm.keySet()) {
                    if (!key.equals("jcr:mixinTypes") && !key.equals("jcr:primaryType") && !key.equals("dc:format") && !key.equals("dateHR") && !key.equals("jcr:title") && !key.equals("pdf:Title") && !key.equals("dc:title") && !key.equals("dc:description") && !key.equals("cq:tags")) {
                        ModifiableValueMap map1 = resource.adaptTo(ModifiableValueMap.class);
                        if (map1 != null) {
                            map1.remove(key);
                        }
                    }
                }
                resource.getResourceResolver().commit();
            }
        } catch (LoginException | PersistenceException e) {
            log.error("Error {}", e.getMessage());
        } finally {
            Workflow workflow = wfsession.getWorkflow(item.getWorkflow().getId());
            wfsession.terminateWorkflow(workflow);
        }
    }
}

 

1 Accepted Solution

Avatar

Correct answer by
Community Advisor

Hello @Tenu 

 

Could you please try below implementation, if this is able to fix the issue ? 

 

  • The XMP metadata is stored within the PDF file is separate from the metadata stored in the DAM asset's metadata node.

 

 

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;

import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.commons.JcrUtils;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.adobe.granite.workflow.WorkflowException;
import com.adobe.granite.workflow.WorkflowSession;
import com.adobe.granite.workflow.exec.WorkItem;
import com.adobe.granite.workflow.exec.Workflow;
import com.adobe.granite.workflow.exec.WorkflowData;
import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.adobe.granite.workflow.metadata.MetaDataMap;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.DamConstants;
import com.day.cq.dam.commons.util.AssetReferenceSearch;
import com.day.cq.dam.commons.util.DamUtil;
import com.day.cq.dam.commons.util.UIHelper;

@Component(service = WorkflowProcess.class, property = {
Constants.SERVICE_DESCRIPTION + "=Remove Metas PDF Process",
Constants.SERVICE_VENDOR + "=Adobe",
"process.label=Remove Metas PDF Process"
})
public class RemoveMetas implements WorkflowProcess {

private static final Logger log = LoggerFactory.getLogger(RemoveMetas.class);

@Reference
private ResourceResolverFactory resolverFactory;

@Override
public void execute(WorkItem workItem, WorkflowSession workflowSession, MetaDataMap args)
throws WorkflowException {
ResourceResolver resourceResolver = null;
try {
Session session = workflowSession.adaptTo(Session.class);
resourceResolver = resolverFactory.getResourceResolver(
UIHelper.getAuthInfoMap(session));

WorkflowData workflowData = workItem.getWorkflowData();
String path = workflowData.getPayload().toString();

Asset asset = DamUtil.resolveToAsset(resourceResolver.getResource(path));

if (asset != null && DamUtil.isSupportedAsset(asset)) {
removeMetadataFromDAM(asset);
removeMetadataFromPDF(asset, resourceResolver);
resourceResolver.commit();
} else {
log.error("Invalid asset or unsupported asset type: {}", path);
}
} catch (Exception e) {
log.error("Error removing metadata from PDF: {}", e.getMessage());
} finally {
if (resourceResolver != null && resourceResolver.isLive()) {
resourceResolver.close();
}
}
}

private void removeMetadataFromDAM(Asset asset) throws RepositoryException {
Node assetNode = asset.adaptTo(Node.class);
if (assetNode != null && assetNode.hasNode(DamConstants.METADATA_FOLDER)) {
Node metadataNode = assetNode.getNode(DamConstants.METADATA_FOLDER);
metadataNode.remove();
}
}

private void removeMetadataFromPDF(Asset asset, ResourceResolver resourceResolver) throws IOException {
InputStream inputStream = null;
OutputStream outputStream = null;
try {
inputStream = asset.getOriginal().getStream();
outputStream = asset.getOriginal().getStream();
AssetReferenceSearch assetReferenceSearch = new AssetReferenceSearch(resourceResolver);
assetReferenceSearch.add(new AssetReferenceSearch.AssetReference(asset));
assetReferenceSearch.removeMetadata(inputStream, outputStream);
} finally {
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(outputStream);
}
}
}

View solution in original post

4 Replies

Avatar

Level 2

It seems resource resolver issue. if you are using "rr" resource resolver then use the same for commit method. 

Avatar

Level 2

I notice that if I delete the metadata in AEM when looking at the CRX, but if I download the file it still has metadata and XMP saved

Avatar

Level 2

AEM only extract the XMP data from the PDF file and store in metadata node. you can delete rom AEM node but if you want to delete XMP data from the PDF document itself then you will have to write custom code to do that. 

 

Example : https://stackoverflow.com/questions/21295420/pdf-metadata-removal-using-java

Avatar

Correct answer by
Community Advisor

Hello @Tenu 

 

Could you please try below implementation, if this is able to fix the issue ? 

 

  • The XMP metadata is stored within the PDF file is separate from the metadata stored in the DAM asset's metadata node.

 

 

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;

import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.commons.JcrUtils;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.adobe.granite.workflow.WorkflowException;
import com.adobe.granite.workflow.WorkflowSession;
import com.adobe.granite.workflow.exec.WorkItem;
import com.adobe.granite.workflow.exec.Workflow;
import com.adobe.granite.workflow.exec.WorkflowData;
import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.adobe.granite.workflow.metadata.MetaDataMap;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.DamConstants;
import com.day.cq.dam.commons.util.AssetReferenceSearch;
import com.day.cq.dam.commons.util.DamUtil;
import com.day.cq.dam.commons.util.UIHelper;

@Component(service = WorkflowProcess.class, property = {
Constants.SERVICE_DESCRIPTION + "=Remove Metas PDF Process",
Constants.SERVICE_VENDOR + "=Adobe",
"process.label=Remove Metas PDF Process"
})
public class RemoveMetas implements WorkflowProcess {

private static final Logger log = LoggerFactory.getLogger(RemoveMetas.class);

@Reference
private ResourceResolverFactory resolverFactory;

@Override
public void execute(WorkItem workItem, WorkflowSession workflowSession, MetaDataMap args)
throws WorkflowException {
ResourceResolver resourceResolver = null;
try {
Session session = workflowSession.adaptTo(Session.class);
resourceResolver = resolverFactory.getResourceResolver(
UIHelper.getAuthInfoMap(session));

WorkflowData workflowData = workItem.getWorkflowData();
String path = workflowData.getPayload().toString();

Asset asset = DamUtil.resolveToAsset(resourceResolver.getResource(path));

if (asset != null && DamUtil.isSupportedAsset(asset)) {
removeMetadataFromDAM(asset);
removeMetadataFromPDF(asset, resourceResolver);
resourceResolver.commit();
} else {
log.error("Invalid asset or unsupported asset type: {}", path);
}
} catch (Exception e) {
log.error("Error removing metadata from PDF: {}", e.getMessage());
} finally {
if (resourceResolver != null && resourceResolver.isLive()) {
resourceResolver.close();
}
}
}

private void removeMetadataFromDAM(Asset asset) throws RepositoryException {
Node assetNode = asset.adaptTo(Node.class);
if (assetNode != null && assetNode.hasNode(DamConstants.METADATA_FOLDER)) {
Node metadataNode = assetNode.getNode(DamConstants.METADATA_FOLDER);
metadataNode.remove();
}
}

private void removeMetadataFromPDF(Asset asset, ResourceResolver resourceResolver) throws IOException {
InputStream inputStream = null;
OutputStream outputStream = null;
try {
inputStream = asset.getOriginal().getStream();
outputStream = asset.getOriginal().getStream();
AssetReferenceSearch assetReferenceSearch = new AssetReferenceSearch(resourceResolver);
assetReferenceSearch.add(new AssetReferenceSearch.AssetReference(asset));
assetReferenceSearch.removeMetadata(inputStream, outputStream);
} finally {
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(outputStream);
}
}
}