Expand my Community achievements bar.

Guidelines for the Responsible Use of Generative AI in the Experience Cloud Community.
SOLVED

AEM v6.5.17 and GraphQL indexes v1.1.1: fragments vs contentFragments

Avatar

Level 2

Hello, I'm using AEM 6.5.17 on-prem and have AEM Content Fragment with GraphQL Index Package 1.1.1 installed.

This package has the following indexes: 

  • /oak:index/fragments

 

{
  "jcr:primaryType": "oak:QueryIndexDefinition",
  "selectionPolicy": "tag",
  "compatVersion": 2,
  "includedPaths": [
    "/content/dam"
  ],
  "queryPaths": [
    "/content/dam"
  ],
  "seed": 3980062065619390500,
  "tags": [
    "fragments"
  ],
  "type": "lucene",
  "async": [
    "async",
    "nrt"
  ],
  "evaluatePathRestrictions": true,
  "reindex": false,
  "reindexCount": 6,
  "facets": {
    "jcr:primaryType": "nt:unstructured",
    "topChildren": "100",
    "secure": "statistical"
  },
  "indexRules": {
    "jcr:primaryType": "nt:unstructured",
    "dam:Asset": {
      "jcr:primaryType": "nt:unstructured",
      "properties": {
        "jcr:primaryType": "nt:unstructured",
        "dcFormat": {
          "jcr:primaryType": "nt:unstructured",
          "facets": true,
          "propertyIndex": true,
          "name": "jcr:content/metadata/dc:format"
        },
        "damStatus": {
          "jcr:primaryType": "nt:unstructured",
          "facets": true,
          "propertyIndex": true,
          "name": "jcr:content/metadata/dam:status"
        },
        "contentFragment": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "name": "jcr:content/contentFragment",
          "type": "Boolean"
        },
        "model": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:content/data/cq:model",
          "type": "String"
        },
        "jcrUUID": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:uuid",
          "type": "String"
        },
        "jcrTitle": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "analyzed": true,
          "name": "jcr:content/jcr:title",
          "type": "String"
        },
        "jcrCreated": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:created",
          "type": "Date"
        },
        "jcrCreatedBy": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:createdBy",
          "type": "String"
        },
        "jcrLastModified": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:content/jcr:lastModified",
          "type": "Date"
        },
        "jcrLastModifiedBy": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:content/jcr:lastModifiedBy",
          "type": "String"
        },
        "cqLastReplicated": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:content/cq:lastReplicated",
          "type": "Date"
        },
        "cqLastReplicatedBy": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:content/cq:lastReplicatedBy",
          "type": "String"
        },
        "cqLastReplicationAction": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "name": "jcr:content/cq:lastReplicationAction",
          "type": "Boolean"
        },
        "createModifiedCombined": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "function": "fn:coalesce(jcr:content/@jcr:lastModified, @jcr:created)"
        },
        "createModifiedByCombined": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "function": "fn:coalesce(jcr:content/@jcr:lastModifiedBy, @jcr:createdBy)"
        },
        "cqTags": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "name": "jcr:content/metadata/cq:tags"
        },
        "_strucVersion": {
          "jcr:primaryType": "nt:unstructured",
          "nullCheckEnabled": true,
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:content/data/_strucVersion",
          "type": "Long"
        }
      }
    },
    "dam:cfVariationNode": {
      "jcr:primaryType": "nt:unstructured",
      "properties": {
        "jcr:primaryType": "nt:unstructured",
        "values": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^[^\\/]*$",
          "type": "String"
        }
      }
    }
  }
}

 

  • /oak:index/contentFragments

 

{
  "jcr:primaryType": "oak:QueryIndexDefinition",
  "selectionPolicy": "tag",
  "compatVersion": 2,
  "includedPaths": [
    "/content/dam"
  ],
  "queryPaths": [
    "/content/dam"
  ],
  "seed": 3585344949417103000,
  "tags": [
    "contentFragments"
  ],
  "type": "lucene",
  "async": [
    "async",
    "nrt"
  ],
  "evaluatePathRestrictions": true,
  "reindex": false,
  "reindexCount": 8,
  "indexRules": {
    "jcr:primaryType": "nt:unstructured",
    "dam:IndexedFragmentData": {
      "jcr:primaryType": "nt:unstructured",
      "indexNodeName": true,
      "properties": {
        "jcr:primaryType": "nt:unstructured",
        "stringValues": {
          "jcr:primaryType": "nt:unstructured",
          "nodeScopeIndex": true,
          "ordered": true,
          "propertyIndex": true,
          "analyzed": true,
          "isRegexp": true,
          "name": "^string@.*$",
          "type": "String"
        },
        "stringArrayValues": {
          "jcr:primaryType": "nt:unstructured",
          "nodeScopeIndex": true,
          "propertyIndex": true,
          "analyzed": true,
          "isRegexp": true,
          "name": "^stringArray@.*$",
          "type": "String"
        },
        "longValues": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^long@.*$",
          "type": "Long"
        },
        "longArrayValues": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^longArray@.*$",
          "type": "Long"
        },
        "dateValues": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^calendar@.*$",
          "type": "Date"
        },
        "dateArrayValues": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^calendarArray@.*$",
          "type": "Date"
        },
        "doubleValues": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^double@.*$",
          "type": "Double"
        },
        "doubleArrayValues": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^doubleArray@.*$",
          "type": "Double"
        },
        "booleanValues": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^boolean@.*$",
          "type": "Boolean"
        },
        "booleanArrayValues": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^booleanArray@.*$",
          "type": "Boolean"
        },
        "metaString": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^@string@.*$",
          "type": "String"
        },
        "metaStringArray": {
          "jcr:primaryType": "nt:unstructured",
          "propertyIndex": true,
          "isRegexp": true,
          "name": "^@stringArray@.*$",
          "type": "String"
        },
        "uuid": {
          "jcr:primaryType": "nt:unstructured",
          "ordered": true,
          "propertyIndex": true,
          "name": "jcr:uuid",
          "type": "String"
        }
      }
    }
  }
}

 

 

Also, it looks like AEM has some background process that creates the node indexedData/master with jcr:mixinTypes=dam:IndexedFragmentData when the content fragment is updated and/or published.

Nikita_Mitroshin_0-1695064132163.png

Then contentFragments index uses this dam:IndexedFragmentData node type to index the content.

 

I have a GraphQL query to search for the content fragments of the specific model and sort them by the lastPublishedDate property.

This is a custom property, which is set by the custom background process, a service that implements com.day.cq.replication.Preprocessor.

The problem is that in most cases my custom background process works after AEM updates dam:IndexedFragmentData node of the content fragment.

The flow is the following:

  • Content fragment has lastPublishedDate=date_X
  • This content fragment has been re-published
  • AEM takes lastPublishedDate=date_X and puts it into indexedData/master/lastPublishedDate
  • My custom background process changes the lastPublishedDate to date_Y
  • As a result, sorting by lastPublishedDate picks up the outdated value (which is date_X) so the order is wrong.
  • After some time, re-indexation occurs and the problem is solved, but it may take 5-10 minutes which is not good on Prod

 

Alternatively, I've tried to remove /oak:index/contentFragments index and use only/oak:index/fragments.

The JCR queries which AEM generates for the same GraphQL query, depending on the index used, are:

  • /oak:index/contentFragments 
SELECT main.* FROM [dam:IndexedFragmentData] AS main WHERE ISDESCENDANTNODE(main, '/content/dam')
AND main.[@string@model] = '/conf/test/settings/dam/cfm/models/test-model'
AND (name() = 'master')
AND (([jcr:primaryType] IS NOT NULL)
AND ([jcr:primaryType] IS NOT NULL))
ORDER BY main.[calendar@lastPublishedDate] DESC OPTION (INDEX TAG[contentFragments])
  • /oak:index/fragments 
SELECT main.* FROM [dam:Asset] AS main WHERE ISDESCENDANTNODE(main, '/content/dam')
AND main.[jcr:content/contentFragment] = true
AND main.[jcr:content/data/cq:model] = '/conf/test/settings/dam/cfm/models/test-model'
AND (((main.[jcr:path] <> '' OR main.[jcr:path] IS NULL))
AND ([jcr:primaryType] IS NOT NULL))
ORDER BY main.[jcr:content/data/master/lastPublishedDate] DESC
OPTION (INDEX TAG[fragments])

Both queries and indexes seem to perform more or less the same and well, I've tested with ~1.5k of content fragments.

Using the fragments index I don't have the delay problem described above, the data is always sorted properly.

I assume that contentFragments was designed specifically for the CFs, while fragments is more generic and it covers all the dam:Assets, but because of my custom background process, it seems like it does not fit well.

 

So I'm wondering if it's fine to remove contentFragments index and rely on fragments or if there could be any other suggestions.

 

Thanks

Topics

Topics help categorize Community content and increase your ability to discover relevant content.

1 Accepted Solution

Avatar

Correct answer by
Level 2

Answering my own question

Tested the queries with a bit more data (4.5k content fragments) and results are the following

  • using /oak:index/contentFragments the query has about 170ms execution time
  • /oak:index/fragments has about 450ms

So it's better to use contentFragments index.

The solution for the background process that I've applied is to update jcr:content/indexedData/master at the same time I'm setting my custom property.

 

Hope all the above helps someone else

View solution in original post

7 Replies

Avatar

Correct answer by
Level 2

Answering my own question

Tested the queries with a bit more data (4.5k content fragments) and results are the following

  • using /oak:index/contentFragments the query has about 170ms execution time
  • /oak:index/fragments has about 450ms

So it's better to use contentFragments index.

The solution for the background process that I've applied is to update jcr:content/indexedData/master at the same time I'm setting my custom property.

 

Hope all the above helps someone else

Avatar

Level 4

I know this is old, but in case you're still monitoring this thread I'm curious how you determined under what circumstances your GraphQL query would generate one SQL2 query under the hood vs. the other. We are experiencing a strange situation where some environments when performing a GraphQL query seem to optimize for select * from [dam:Asset] and the same query on another environment optimizes for select * from [dam:IndexedFragmentData]. It's unclear what the difference is and I'm curious if you figured this out. 

Avatar

Level 2

Hi @Preston 

The way AEM decides which index to use is based on the cfGlobalVersion property of

 /content/dam node.

If the property is present and the value is more than 1 => [dam:IndexedFragmentData] is used. Otherwise [dam:Asset].

This property is set by a specific procedure which is described here:

So it might be the case that this procedure was executed on one env but not another one.

I hope that helps.

Avatar

Level 4

Thanks for answering. Do you know how to set this in a local environment using the Cloud SDK? Is it the same procedure as On Prem? 

Avatar

Level 2

I think locally using CRX/DE you can manually add cfGlobalVersion=1 to /content/dam JCR node and it will do the job, you can find more info about the cloud procedure here https://experienceleague.adobe.com/en/docs/experience-manager-cloud-service/content/headless/graphql...

Avatar

Administrator

@Nikita_Mitroshin I hope you found the AEM community helpful. We look forward to your return as either a learner or a mentor. The community flourishes with SMEs like you. Please encourage your AEM peers to contribute as well. Happy AEM learning!



Kautuk Sahni