Expand my Community achievements bar.

Stopwords in AEM 6.2

Avatar

Level 1

Hello All,

I am trying to configure indexing with stop-words.  Unfortunately, stopwords are not working.  can you please help to identify the problem.

Please find below the indexing configurations(AEM 6.2 SP1 CFP4).

<elnSearchArticles jcr:primaryType="oak:QueryIndexDefinition" type="lucene" reindexCount="{Long}3" reindex="{Boolean}false" name="eln:SearchArticles" indexPath="/oak:index/elnSearchArticles" includePropertyTypes="[String]" excludedPaths="[/var,/tmp,/apps,/bin,/lib,/system,/home,/etc,/jcr:system,/content/danonecorp,/content/dam]" evaluatePathRestrictions="{Boolean}true" compatVersion="{Long}2" async="async">

    <indexRules jcr:primaryType="nt:unstructured">

        <cq:Page jcr:primaryType="nt:unstructured">

            <aggregates jcr:primaryType="nt:unstructured">

                <cq:Page jcr:primaryType="nt:unstructured">

                    <include0 jcr:primaryType="nt:unstructured" relativeNode="{Boolean}true" path="jcr:content"/>

                </cq:Page>

                <cq:PageContent jcr:primaryType="nt:unstructured">

                    <include0 jcr:primaryType="nt:unstructured" relativeNode="{Boolean}false" path="*"/>

                    <include1 jcr:primaryType="nt:unstructured" relativeNode="{Boolean}false" path="*/*"/>

                    <include2 jcr:primaryType="nt:unstructured" relativeNode="{Boolean}false" path="*/*/*"/>

                    <include3 jcr:primaryType="nt:unstructured" relativeNode="{Boolean}false" path="*/*/*/*"/>

                </cq:PageContent>

                <nt:file jcr:primaryType="nt:unstructured">

                    <include0 jcr:primaryType="nt:unstructured" relativeNode="{Boolean}false" path="jcr:content"/>

                </nt:file>

            </aggregates>

            <analyzers jcr:primaryType="nt:unstructured">

                <default jcr:primaryType="nt:unstructured">

                    <charFilters jcr:primaryType="nt:unstructured">

                        <HTMLStrip jcr:primaryType="nt:unstructured"/>

                        <Mapping jcr:primaryType="nt:unstructured"/>

                    </charFilters>

                    <filters jcr:primaryType="nt:unstructured">

                        <Lowercase jcr:primaryType="nt:unstructured"/>

                        <Stop jcr:primaryType="nt:unstructured" words="enStopwords.txt">

                            <enStopwords.txt/>

                        </Stop>

                    </filters>

                    <tokenizer jcr:primaryType="nt:unstructured" name="Classic"/>

                </default>

            </analyzers>

            <properties jcr:primaryType="nt:unstructured">

                <jcrTitle jcr:primaryType="nt:unstructured" type="String" name="jcr:content/jcr:title" useInSuggest="{Boolean}true" useInSpellcheck="{Boolean}true" propertyIndex="{Boolean}true" ordered="{Boolean}false" nodeScopeIndex="{Boolean}true" boost="{Long}10" analyzed="{Boolean}true"/>

                <pageTitle jcr:primaryType="nt:unstructured" type="String" name="jcr:content/pageTitle" useInSuggest="{Boolean}true" useInSpellcheck="{Boolean}true" propertyIndex="{Boolean}true" ordered="{Boolean}false" nodeScopeIndex="{Boolean}true" boost="{Long}9" analyzed="{Boolean}true"/>

                <jcrDescription jcr:primaryType="nt:unstructured" type="String" name="jcr:content/jcr:description" useInSuggest="{Boolean}true" useInSpellcheck="{Boolean}true" propertyIndex="{Boolean}true" ordered="{Boolean}false" nodeScopeIndex="{Boolean}true" boost="{Long}7" analyzed="{Boolean}true"/>

                <navTitle jcr:primaryType="nt:unstructured" type="String" name="jcr:content/navTitle" useInSuggest="{Boolean}true" useInSpellcheck="{Boolean}true" propertyIndex="{Boolean}true" ordered="{Boolean}false" nodeScopeIndex="{Boolean}true" analyzed="{Boolean}true"/>

                <cqTags jcr:primaryType="nt:unstructured" type="String" name="jcr:content/cq:tags" useInSuggest="{Boolean}true" useInSpellcheck="{Boolean}true" propertyIndex="{Boolean}true" ordered="{Boolean}false" nodeScopeIndex="{Boolean}true" boost="{Long}5" analyzed="{Boolean}true"/>

                <headline jcr:primaryType="nt:unstructured" type="String" name="jcr:content/par/(.*)headline(.*)" ordered="{Boolean}false" nodeScopeIndex="{Boolean}true" boost="{Long}3" analyzed="{Boolean}true" isRegexp="{Boolean}true"/>

                <text jcr:primaryType="nt:unstructured" type="String" name="jcr:content/par/(.*)text(.*)" ordered="{Boolean}false" nodeScopeIndex="{Boolean}true" boost="{Long}2" analyzed="{Boolean}true" isRegexp="{Boolean}true"/>

                <nodeName jcr:primaryType="nt:unstructured" type="String" name=":nodeName" useInSuggest="{Boolean}true" useInSpellcheck="{Boolean}true" propertyIndex="{Boolean}true" ordered="{Boolean}false" nodeScopeIndex="{Boolean}true"/>

                <breadcrumb jcr:primaryType="nt:unstructured" name="jcr:content/par/(.*)breadcrumb(.*)" nodeScopeIndex="{Boolean}false" isRegexp="{Boolean}true" excludeFromAggregation="{Boolean}true"/>

                <toolbar jcr:primaryType="nt:unstructured" name="jcr:content/par/toolbar" nodeScopeIndex="{Boolean}false" isRegexp="{Boolean}true" excludeFromAggregation="{Boolean}true"/>

                <notIndexed jcr:primaryType="nt:unstructured" type="String" name="jcr:content/robotsNoIndex" propertyIndex="{Boolean}true" nodeScopeIndex="{Boolean}false"/>

            </properties>

        </cq:Page>

    </indexRules>

</elnSearchArticles>

7 Replies

Avatar

Employee Advisor

Do you get any exception or message in your logs related to this?

Avatar

Level 1

Did not find any exception in logs.

15.01.2019 16:38:27.323 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing will be performed for following indexes: [/oak:index/elnSearchArticles]

15.01.2019 16:38:27.479 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #10000 /content/geometrixx-outdoors/en/women/shirts/palau-summer/jcr:content/par/product/wmappl_2-l/image

15.01.2019 16:38:27.569 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #20000 /content/eln-gb/cow-and-gate/en/pregnancy/weeks/33-weeks-pregnant/jcr:content/par/column_grid_1566794423/center-col/reference_copy

15.01.2019 16:38:27.644 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #30000 /content/eln-gb/cow-and-gate/en/products/foods/7-months-lunch-dinner/tasty-tomato-and-courgette-pasta/jcr:content/par/column_grid_16013088/col-1

15.01.2019 16:38:27.732 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #40000 /content/eln-hk/nutricia/zh-hk/early-life-nutrition/toddler/health-lifestyle/12-to-24-Month-Old-Baby-Development/jcr:content/par/column_grid_copy_cop_596560253/col-3/relatedcontentteaser

15.01.2019 16:38:27.842 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #50000 /content/aptawelt/en/lightbox/Aptamil-1-with-Pronutra-Recipe-Lightbox/Important-Notice/jcr:content/par/image/cq:responsive

15.01.2019 16:38:27.924 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #60000 /content/aptawelt/de/baby/stillen/richtiges-anlegen-und-stillpositionen/ganz-entspannt-anlegen-und-stillen/jcr:content/par/relatedcontentteaser_93257484/cq:responsive

15.01.2019 16:38:28.016 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #70000 /content/aptawelt/de/schwangerschaft/fruehkindliche-praegung/baustein-der-gesundheit-das-gehirn/jcr:content/par/carelineinfo

15.01.2019 16:38:28.082 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #80000 /content/eln-ru/nutriclub/ru/0-12-months/health-development/motor-development-from-0-to-6/jcr:content/par/column_grid/col-1/expander_228018206

15.01.2019 16:38:28.170 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #90000 /content/aptawelt-experten/en/1000-tage/muttermilch-und-stillen/rund-ums-stillen/stillpositionen-protected/jcr:content/par/linklist

15.01.2019 16:38:28.342 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #100000 /content/aptawelt-experten/en/event-kalender/2015/02/170818/jcr:content/par

15.01.2019 16:38:28.517 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #110000 /content/aptawelt-experten/en/event-kalender/2014/08/162710/jcr:content/par

15.01.2019 16:38:28.710 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #120000 /content/aptawelt-experten/en/event-kalender/2017/05/178898/jcr:content

15.01.2019 16:38:28.858 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #130000 /content/aptawelt-experten/de/event-kalender/2018/01/180731/jcr:content/par

15.01.2019 16:38:29.003 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #140000 /content/campaigns/geometrixx-outdoors/master/article/winter-female-under30/under30-article/jcr:content

15.01.2019 16:38:29.497 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate /oak:index/elnSearchArticles => Indexed 10000 nodes in 2.184 s ...

15.01.2019 16:38:29.753 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #150000 /content/share/eln/references/ru_RU/eln-dach-runutriclub-ru-v3/stores/10000-10500/10375

15.01.2019 16:38:30.534 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #160000 /content/share/eln/references/ru_RU/eln-dach-runutriclub-ru-v3/stores/3000-3500/3086

15.01.2019 16:38:30.654 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate /oak:index/elnSearchArticles => Indexed 20000 nodes in 1.157 s ...

15.01.2019 16:38:30.898 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #170000 /content/share/eln/references/ru_RU/eln-dach-runutriclub-ru-v3/stores/3500-4000/3923

15.01.2019 16:38:31.641 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #180000 /content/share/eln/references/ru_RU/eln-dach-runutriclub-ru-v3/stores/6000-6500/6304

15.01.2019 16:38:31.761 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate /oak:index/elnSearchArticles => Indexed 30000 nodes in 1.106 s ...

15.01.2019 16:38:31.984 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #190000 /content/share/eln/references/ru_RU/eln-dach-runutriclub-ru-v3/stores/25500-26000/25932

15.01.2019 16:38:32.684 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #200000 /content/share/eln/references/ru_RU/eln-dach-runutriclub-ru-v3/stores/21000-21500/21121

15.01.2019 16:38:32.756 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #210000 /libs/cq/personalization/components/traits/profileproperty/age/dialog/items/jsObject

15.01.2019 16:38:32.790 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #220000 /libs/cq/gui/components/projects/admin/datasource/projectstiledatasource/projectstiledatasource.jsp/jcr:content

15.01.2019 16:38:32.827 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #230000 /libs/fd/fm/gui/content/forms/abtesting/startabtesting/jcr:content/body/items/form/items/wizard/items/startabtest

15.01.2019 16:38:32.897 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #240000 /libs/social/enablement/components/cards/clientlibs/card.css/jcr:content

15.01.2019 16:38:32.932 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #250000 /libs/mcm/salesforce/components/salesforceexporter/processargs/items/arguments/items/configuration

15.01.2019 16:38:32.963 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #260000 /libs/dam/gui/components/admin/childasset/rating.jsp

15.01.2019 16:38:33.007 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Reindexing Traversed #270000 /libs/wcm/foundation/components/textimage/cq:dialog/content/items/componentstyles/items/column

15.01.2019 16:38:33.994 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier [COW][/oak:index/elnSearchArticles] CopyOnWrite stats : Skipped copying 0 files with total size 0 B

15.01.2019 16:38:33.995 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.IndexUpdate Indexing report

    - /oak:index/elnSearchArticles*(38936)

15.01.2019 16:38:34.049 *INFO* [aysnc-index-update-async] org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate [async] Reindexing completed for indexes: [/oak:index/elnSearchArticles*(38936)] in 6.744 s

Avatar

Level 10

Could you check if you have enStopwords.txt under the 'Stop' node as nt:file?

+ filters (nt:unstructured) //The filters needs to be ordered

  + LowerCase

  + Stop

  - words = "stop1.txt, stop2.txt"

  + stop1.txt (nt:file)

Jackrabbit Oak – Lucene Index

Avatar

Level 1

yes .enStopwords.txt under the 'Stop' node as nt:file is available.

stopwords.PNG

Hi Rajan,

Can you make sure that enStopwords.txt is of type nt:file ?

because when I create a node of type nt:file, I dont see jcr:content under that.

yes, type is correct.  sorry can you explain this "because when I create a node of type nt:file, I dont see jcr:content under that."

fileType.PNG

Avatar

Level 1

Analyzers should be sibling of index rule and child of parent in indexing configuration.