I have a set of assets which had a property "name". I want to get a dynamic number of those assets and I should get it alphabetically sorted by that "name" property. I query that with this query:
type=dam:Asset
path=/content/dam/en/foobar/contacts/
orderby=@jcr:content/data/master/@name
orderby.sort=asc
p.limit=3
and this is working, so in a set of names:
[Paloma, Abel, José, Eduardo]
it retrieves:
Abel, Eduardo, José.
The problem is with spanish alphabet, in which Á is the same letter as A. So in a set of:
[Paloma, Abel, José, Álvaro, Eduardo]
it retrieves:
Abel, Eduardo, José.
Being Álvaro excluded because its not part of the first 3 elements after ordeby it, when in should be the second, it should retrieve:
Abel, Álvaro, Eduardo.
So, to fix that, I've created a custom oak lucene index like below:
<?xml version="1.0" encoding="UTF-8"?>
<jcr:root xmlns:oak="http://jackrabbit.apache.org/oak/ns/1.0" xmlns:jcr="http://www.jcp.org/jcr/1.0" xmlns:nt="http://www.jcp.org/jcr/nt/1.0" xmlns:rep="internal"
jcr:mixinTypes="[rep:AccessControllable]"
jcr:primaryType="nt:unstructured">
<socialLucene/>
<workflowDataLucene/>
<slingeventJob/>
<jcrLanguage/>
<versionStoreIndex/>
<repMembers/>
<cqReportsLucene/>
<commerceLucene/>
<counter/>
<authorizables/>
<enablementResourceName/>
<externalPrincipalNames/>
<cmLucene/>
<foobarCFIndexFilter
jcr:primaryType="oak:QueryIndexDefinition"
async="[async,nrt]"
evaluatePathRestrictions="{Boolean}true"
includedPaths="[/content/dam/es/foobar,/content/dam/en/foobar]"
queryPaths="[/content/dam/es/foobar,/content/dam/en/foobar]"
reindex="{Boolean}false"
reindexCount="{Long}24"
seed="{Long}3850652403740003290"
type="lucene">
<analyzers jcr:primaryType="nt:unstructured">
<default jcr:primaryType="nt:unstructured">
<filters jcr:primaryType="nt:unstructured">
<Synonym
jcr:primaryType="nt:unstructured"
format="solr"
synonyms="synonyms.txt">
<synonyms.txt/>
</Synonym>
</filters>
<tokenizer
jcr:primaryType="nt:unstructured"
name="Classic"/>
</default>
</analyzers>
<indexRules jcr:primaryType="nt:unstructured">
<nt:base jcr:primaryType="nt:unstructured">
<properties jcr:primaryType="nt:unstructured">
<title
jcr:primaryType="nt:unstructured"
analyzed="{Boolean}true"
isRegexp="{Boolean}false"
name="jcr:content/data/master/title"
nodeScopeIndex="{Boolean}true"
ordered="{Boolean}true"
propertyIndex="{Boolean}true"
type="String"/>
<date
jcr:primaryType="nt:unstructured"
name="jcr:content/data/master/date"
ordered="{Boolean}true"
propertyIndex="{Boolean}true"/>
<sectors
jcr:primaryType="nt:unstructured"
name="jcr:content/data/master/sectors"
propertyIndex="{Boolean}true"/>
<contentFragment
jcr:primaryType="nt:unstructured"
name="jcr:content/contentFragment"
propertyIndex="{Boolean}true"/>
<model
jcr:primaryType="nt:unstructured"
name="cq:model"
propertyIndex="{Boolean}true"/>
<name
jcr:primaryType="nt:unstructured"
analyzed="{Boolean}true"
isRegexp="{Boolean}false"
name="jcr:content/data/master/name"
nodeScopeIndex="{Boolean}true"
ordered="{Boolean}true"
propertyIndex="{Boolean}true"
type="String"/>
</properties>
</nt:base>
</indexRules>
</foobarCFIndexFilter>
<cqProjectLucene/>
<ntFolderDamLucene/>
<acPrincipalName/>
<uuid/>
<damAssetLucene/>
<rep:policy/>
<cqPayloadPath/>
<nodetypeLucene/>
<nodetype/>
<ntBaseLucene/>
<reference/>
<principalName/>
<cqTagLucene/>
<lucene/>
<repTokenIndex/>
<externalId/>
<authorizableId/>
<cqPageLucene/>
</jcr:root>
Where in the synonyms.txt I had:
á, a
Á, A
and so on. Also tried with a charFilter with Mapping equivalent chars. I have made sure that my custom oak index is the one my query is using with Query Performance Diagnosis tool. But nothing works, after reindex the query results are the same.