Hello, I have a workflow in AEM that is responsible for cleaning the metadata of my PDFs in the DAM. The issue is that it doesn't delete all the metadata; it seems to only delete it from the environment but not from the document when I download it again. It also appears that the XMP metadata persists. Any help or solution? I tried cleaning the metadata node or deleting it, but in both cases, due to configuration, I get a message saying that the node is protected and cannot be accessed. Greetings. My workflow that "works a bit".
package com.unicaja.aem.site.core.workflow;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.jcr.Session;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.ModifiableValueMap;
import org.apache.sling.api.resource.PersistenceException;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.apache.sling.api.resource.ValueMap;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.adobe.granite.workflow.WorkflowException;
import com.adobe.granite.workflow.WorkflowSession;
import com.adobe.granite.workflow.exec.WorkItem;
import com.adobe.granite.workflow.exec.Workflow;
import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.adobe.granite.workflow.metadata.MetaDataMap;
@Component(service = WorkflowProcess.class, property = { Constants.SERVICE_DESCRIPTION + "= Remove Metas PDF Proccess", Constants.SERVICE_VENDOR + "= Adobe", "process.label=" + "Remove Metas PDF Proccess" })
public class RemoveMetas implements WorkflowProcess {
@reference
private ResourceResolverFactory resolverFactory;
/** Default log. */
protected final Logger log = LoggerFactory.getLogger(this.getClass());
public void execute(WorkItem item, WorkflowSession wfsession, MetaDataMap args) throws WorkflowException {
try {
Session session = wfsession.adaptTo(Session.class);
final Map<String, Object> map = new HashMap<>();
map.put("user.jcr.session", session);
ResourceResolver rr = resolverFactory.getResourceResolver(map);
String path = item.getWorkflowData().getPayload().toString();
Resource resource;
Pattern regexExpression = Pattern.compile(".*/jcr:content/metadata.*");
Matcher match = regexExpression.matcher(path);
if(match.matches()){
resource = rr.getResource(path);
} else {
resource = rr.getResource(path + "/jcr:content/metadata");
}
ValueMap vm = resource.getValueMap();
if (vm.get("dc:format").equals("application/pdf")) {
for (String key : vm.keySet()) {
if (!key.equals("jcr:mixinTypes") && !key.equals("jcr:primaryType") && !key.equals("dc:format") && !key.equals("dateHR") && !key.equals("jcr:title") && !key.equals("pdf:Title") && !key.equals("dc:title") && !key.equals("dc:description") && !key.equals("cq:tags")) {
ModifiableValueMap map1 = resource.adaptTo(ModifiableValueMap.class);
if (map1 != null) {
map1.remove(key);
}
}
}
resource.getResourceResolver().commit();
}
} catch (LoginException | PersistenceException e) {
log.error("Error {}", e.getMessage());
} finally {
Workflow workflow = wfsession.getWorkflow(item.getWorkflow().getId());
wfsession.terminateWorkflow(workflow);
}
}
}
Solved! Go to Solution.
Views
Replies
Total Likes
Hello @Tenu
Could you please try below implementation, if this is able to fix the issue ?
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.commons.JcrUtils;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.adobe.granite.workflow.WorkflowException;
import com.adobe.granite.workflow.WorkflowSession;
import com.adobe.granite.workflow.exec.WorkItem;
import com.adobe.granite.workflow.exec.Workflow;
import com.adobe.granite.workflow.exec.WorkflowData;
import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.adobe.granite.workflow.metadata.MetaDataMap;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.DamConstants;
import com.day.cq.dam.commons.util.AssetReferenceSearch;
import com.day.cq.dam.commons.util.DamUtil;
import com.day.cq.dam.commons.util.UIHelper;
@Component(service = WorkflowProcess.class, property = {
Constants.SERVICE_DESCRIPTION + "=Remove Metas PDF Process",
Constants.SERVICE_VENDOR + "=Adobe",
"process.label=Remove Metas PDF Process"
})
public class RemoveMetas implements WorkflowProcess {
private static final Logger log = LoggerFactory.getLogger(RemoveMetas.class);
@Reference
private ResourceResolverFactory resolverFactory;
@Override
public void execute(WorkItem workItem, WorkflowSession workflowSession, MetaDataMap args)
throws WorkflowException {
ResourceResolver resourceResolver = null;
try {
Session session = workflowSession.adaptTo(Session.class);
resourceResolver = resolverFactory.getResourceResolver(
UIHelper.getAuthInfoMap(session));
WorkflowData workflowData = workItem.getWorkflowData();
String path = workflowData.getPayload().toString();
Asset asset = DamUtil.resolveToAsset(resourceResolver.getResource(path));
if (asset != null && DamUtil.isSupportedAsset(asset)) {
removeMetadataFromDAM(asset);
removeMetadataFromPDF(asset, resourceResolver);
resourceResolver.commit();
} else {
log.error("Invalid asset or unsupported asset type: {}", path);
}
} catch (Exception e) {
log.error("Error removing metadata from PDF: {}", e.getMessage());
} finally {
if (resourceResolver != null && resourceResolver.isLive()) {
resourceResolver.close();
}
}
}
private void removeMetadataFromDAM(Asset asset) throws RepositoryException {
Node assetNode = asset.adaptTo(Node.class);
if (assetNode != null && assetNode.hasNode(DamConstants.METADATA_FOLDER)) {
Node metadataNode = assetNode.getNode(DamConstants.METADATA_FOLDER);
metadataNode.remove();
}
}
private void removeMetadataFromPDF(Asset asset, ResourceResolver resourceResolver) throws IOException {
InputStream inputStream = null;
OutputStream outputStream = null;
try {
inputStream = asset.getOriginal().getStream();
outputStream = asset.getOriginal().getStream();
AssetReferenceSearch assetReferenceSearch = new AssetReferenceSearch(resourceResolver);
assetReferenceSearch.add(new AssetReferenceSearch.AssetReference(asset));
assetReferenceSearch.removeMetadata(inputStream, outputStream);
} finally {
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(outputStream);
}
}
}
It seems resource resolver issue. if you are using "rr" resource resolver then use the same for commit method.
I notice that if I delete the metadata in AEM when looking at the CRX, but if I download the file it still has metadata and XMP saved
AEM only extract the XMP data from the PDF file and store in metadata node. you can delete rom AEM node but if you want to delete XMP data from the PDF document itself then you will have to write custom code to do that.
Example : https://stackoverflow.com/questions/21295420/pdf-metadata-removal-using-java
Hello @Tenu
Could you please try below implementation, if this is able to fix the issue ?
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import javax.jcr.Node;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.commons.JcrUtils;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.adobe.granite.workflow.WorkflowException;
import com.adobe.granite.workflow.WorkflowSession;
import com.adobe.granite.workflow.exec.WorkItem;
import com.adobe.granite.workflow.exec.Workflow;
import com.adobe.granite.workflow.exec.WorkflowData;
import com.adobe.granite.workflow.exec.WorkflowProcess;
import com.adobe.granite.workflow.metadata.MetaDataMap;
import com.day.cq.dam.api.Asset;
import com.day.cq.dam.api.DamConstants;
import com.day.cq.dam.commons.util.AssetReferenceSearch;
import com.day.cq.dam.commons.util.DamUtil;
import com.day.cq.dam.commons.util.UIHelper;
@Component(service = WorkflowProcess.class, property = {
Constants.SERVICE_DESCRIPTION + "=Remove Metas PDF Process",
Constants.SERVICE_VENDOR + "=Adobe",
"process.label=Remove Metas PDF Process"
})
public class RemoveMetas implements WorkflowProcess {
private static final Logger log = LoggerFactory.getLogger(RemoveMetas.class);
@Reference
private ResourceResolverFactory resolverFactory;
@Override
public void execute(WorkItem workItem, WorkflowSession workflowSession, MetaDataMap args)
throws WorkflowException {
ResourceResolver resourceResolver = null;
try {
Session session = workflowSession.adaptTo(Session.class);
resourceResolver = resolverFactory.getResourceResolver(
UIHelper.getAuthInfoMap(session));
WorkflowData workflowData = workItem.getWorkflowData();
String path = workflowData.getPayload().toString();
Asset asset = DamUtil.resolveToAsset(resourceResolver.getResource(path));
if (asset != null && DamUtil.isSupportedAsset(asset)) {
removeMetadataFromDAM(asset);
removeMetadataFromPDF(asset, resourceResolver);
resourceResolver.commit();
} else {
log.error("Invalid asset or unsupported asset type: {}", path);
}
} catch (Exception e) {
log.error("Error removing metadata from PDF: {}", e.getMessage());
} finally {
if (resourceResolver != null && resourceResolver.isLive()) {
resourceResolver.close();
}
}
}
private void removeMetadataFromDAM(Asset asset) throws RepositoryException {
Node assetNode = asset.adaptTo(Node.class);
if (assetNode != null && assetNode.hasNode(DamConstants.METADATA_FOLDER)) {
Node metadataNode = assetNode.getNode(DamConstants.METADATA_FOLDER);
metadataNode.remove();
}
}
private void removeMetadataFromPDF(Asset asset, ResourceResolver resourceResolver) throws IOException {
InputStream inputStream = null;
OutputStream outputStream = null;
try {
inputStream = asset.getOriginal().getStream();
outputStream = asset.getOriginal().getStream();
AssetReferenceSearch assetReferenceSearch = new AssetReferenceSearch(resourceResolver);
assetReferenceSearch.add(new AssetReferenceSearch.AssetReference(asset));
assetReferenceSearch.removeMetadata(inputStream, outputStream);
} finally {
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(outputStream);
}
}
}