Expand my Community achievements bar.

Guidelines for the Responsible Use of Generative AI in the Experience Cloud Community.

Custom OAK index to index the data by Latin alphabet to retrieve correctly sorted data has no effect.

Avatar

Level 1

I already asked this question in Stack Overflow but it has not been solved yet.

I paste the question here.

 

I have a set of assets which had a property "name". I want to get a dynamic number of those assets and I should get it alphabetically sorted by that "name" property. I query that with this query:

type=dam:Asset
path=/content/dam/en/foobar/contacts/
orderby=@jcr:content/data/master/@name
orderby.sort=asc
p.limit=3

and this is working, so in a set of names:

[Paloma, Abel, José, Eduardo]

it retrieves:

Abel, Eduardo, José.

The problem is with spanish alphabet, in which Á is the same letter as A. So in a set of:

[Paloma, Abel, José, Álvaro, Eduardo]

it retrieves:

Abel, Eduardo, José.

Being Álvaro excluded because its not part of the first 3 elements after ordeby it, when in should be the second, it should retrieve:

Abel, Álvaro, Eduardo.

So, to fix that, I've created a custom oak lucene index like below:

<?xml version="1.0" encoding="UTF-8"?>
<jcr:root xmlns:oak="http://jackrabbit.apache.org/oak/ns/1.0" xmlns:jcr="http://www.jcp.org/jcr/1.0" xmlns:nt="http://www.jcp.org/jcr/nt/1.0" xmlns:rep="internal"
    jcr:mixinTypes="[rep:AccessControllable]"
    jcr:primaryType="nt:unstructured">
    <socialLucene/>
    <workflowDataLucene/>
    <slingeventJob/>
    <jcrLanguage/>
    <versionStoreIndex/>
    <repMembers/>
    <cqReportsLucene/>
    <commerceLucene/>
    <counter/>
    <authorizables/>
    <enablementResourceName/>
    <externalPrincipalNames/>
    <cmLucene/>
    <foobarCFIndexFilter
        jcr:primaryType="oak:QueryIndexDefinition"
        async="[async,nrt]"
        evaluatePathRestrictions="{Boolean}true"
        includedPaths="[/content/dam/es/foobar,/content/dam/en/foobar]"
        queryPaths="[/content/dam/es/foobar,/content/dam/en/foobar]"
        reindex="{Boolean}false"
        reindexCount="{Long}24"
        seed="{Long}3850652403740003290"
        type="lucene">
        <analyzers jcr:primaryType="nt:unstructured">
            <default jcr:primaryType="nt:unstructured">
                <filters jcr:primaryType="nt:unstructured">
                    <Synonym
                        jcr:primaryType="nt:unstructured"
                        format="solr"
                        synonyms="synonyms.txt">
                        <synonyms.txt/>
                    </Synonym>
                </filters>
                <tokenizer
                    jcr:primaryType="nt:unstructured"
                    name="Classic"/>
            </default>
        </analyzers>
        <indexRules jcr:primaryType="nt:unstructured">
            <nt:base jcr:primaryType="nt:unstructured">
                <properties jcr:primaryType="nt:unstructured">
                    <title
                        jcr:primaryType="nt:unstructured"
                        analyzed="{Boolean}true"
                        isRegexp="{Boolean}false"
                        name="jcr:content/data/master/title"
                        nodeScopeIndex="{Boolean}true"
                        ordered="{Boolean}true"
                        propertyIndex="{Boolean}true"
                        type="String"/>
                    <date
                        jcr:primaryType="nt:unstructured"
                        name="jcr:content/data/master/date"
                        ordered="{Boolean}true"
                        propertyIndex="{Boolean}true"/>
                    <sectors
                        jcr:primaryType="nt:unstructured"
                        name="jcr:content/data/master/sectors"
                        propertyIndex="{Boolean}true"/>
                    <contentFragment
                        jcr:primaryType="nt:unstructured"
                        name="jcr:content/contentFragment"
                        propertyIndex="{Boolean}true"/>
                    <model
                        jcr:primaryType="nt:unstructured"
                        name="cq:model"
                        propertyIndex="{Boolean}true"/>
                    <name
                        jcr:primaryType="nt:unstructured"
                        analyzed="{Boolean}true"
                        isRegexp="{Boolean}false"
                        name="jcr:content/data/master/name"
                        nodeScopeIndex="{Boolean}true"
                        ordered="{Boolean}true"
                        propertyIndex="{Boolean}true"
                        type="String"/>
                </properties>
            </nt:base>
        </indexRules>
    </foobarCFIndexFilter>
    <cqProjectLucene/>
    <ntFolderDamLucene/>
    <acPrincipalName/>
    <uuid/>
    <damAssetLucene/>
    <rep:policy/>
    <cqPayloadPath/>
    <nodetypeLucene/>
    <nodetype/>
    <ntBaseLucene/>
    <reference/>
    <principalName/>
    <cqTagLucene/>
    <lucene/>
    <repTokenIndex/>
    <externalId/>
    <authorizableId/>
    <cqPageLucene/>
</jcr:root>

 

Where in the synonyms.txt I had:

á, a

Á, A

and so on. Also tried with a charFilter with Mapping equivalent chars. I have made sure that my custom oak index is the one my query is using with Query Performance Diagnosis tool. But nothing works, after reindex the query results are the same.

It seems that the custom index has no effect. What could be wrong?

 

2 Replies

Avatar

Community Advisor

Hi @pmasvidal 

Could you check if your query is using your your custom index.
You can check this in Explain query section of the query performance tool.

Avatar

Level 1

here is the json with the result of the Explain Query tool:

 

{
    "statement": "/jcr:root/content/dam/en/audiovisual/contacts//element(*, dam:Asset) order by jcr:content/data/master/@name",
    "language": "xpath",
    "explain": {
        "logs": [
            "Parsing xpath statement: explain /jcr:root/content/dam/en/audiovisual/contacts//element(*, dam:Asset) order by jcr:content/data/master/@name\r\n",
            "XPath > SQL2: explain select [jcr:path], [jcr:score], * from [dam:Asset] as a where isdescendantnode(a, '/content/dam/en/audiovisual/contacts') order by [jcr:content/data/master/name] /* xpath: /jcr:root/content/dam/en/audiovisual/contacts//element(*, dam:Asset) order by jcr:content/data/master/@name */\r\n",
            "cost using filter Filter(query=explain select [jcr:path], [jcr:score], * from [dam:Asset] as a where isdescendantnode(a, '/content/dam/en/audiovisual/contacts') order by [jcr:content/data/master/name] /* xpath: /jcr:root/content/dam/en/audiovisual/contacts//element(*, dam:Asset) order by jcr:content/data/master/@name */, path=/content/dam/en/audiovisual/contacts//*)\r\n",
            "cost for reference is Infinity\r\n",
            "cost for property is Infinity\r\n",
            "cost for nodeType is Infinity\r\n",
            "Applicable IndexingRule found IndexRule: nt:base\r\n",
            "Applicable IndexingRule found IndexRule: nt:base\r\n",
            "Applicable IndexingRule found IndexRule: nt:base\r\n",
            "Applicable IndexingRule found IndexRule: nt:base\r\n",
            "Applicable IndexingRule found IndexRule: nt:base\r\n",
            "Applicable IndexingRule found IndexRule: nt:base\r\n",
            "cost for [/oak:index/foobarCFIndexFilter] of type (lucene-property) with plan [lucene:foobarCFIndexFilter(/oak:index/foobarCFIndexFilter) :ancestors:/content/dam/en/audiovisual/contacts ordering:[{ propertyName : jcr:content/data/master/name, propertyType : UNDEFINED, order : ASCENDING }]] is 801,00\r\n",
            "cost for lucene-property is Infinity\r\n",
            "cost for aggregate lucene is Infinity\r\n",
            "looking for plans for paths : []\r\n",
            "cost for aggregate solr is Infinity\r\n",
            "cost for traverse is 2000.0\r\n"
        ],
        "plan": "[dam:Asset] as [a] /* lucene:foobarCFIndexFilter(/oak:index/foobarCFIndexFilter) :ancestors:/content/dam/en/audiovisual/contacts ordering:[{ propertyName : jcr:content/data/master/name, propertyType : UNDEFINED, order : ASCENDING }] where isdescendantnode([a], [/content/dam/en/audiovisual/contacts]) */",
        "propertyIndexes": [
            "foobarCFIndexFilter(/oak:index/foobarCFIndexFilter)"
        ]
    },
    "heuristics": {
        "count": 24,
        "countTime": 4,
        "executionTime": 1,
        "getNodesTime": 2,
        "totalTime": 7
    }
}

It seems that is using my foobarCFIndexFilter.