{ "analyzer": { "text": { "type": "custom", "tokenizer": "standard", "filter": [ "homoglyph_norm", "lowercase", "mirandese_elision", "mirandese_stop" ], "char_filter": [ "nnbsp_norm" ] }, "plain": { "type": "custom", "char_filter": [ "nnbsp_norm", "word_break_helper" ], "tokenizer": "standard", "filter": [ "lowercase" ] }, "plain_search": { "type": "custom", "char_filter": [ "nnbsp_norm", "word_break_helper" ], "tokenizer": "standard", "filter": [ "lowercase" ] }, "short_text": { "type": "custom", "tokenizer": "whitespace", "filter": [ "lowercase", "aggressive_splitting", "asciifolding_preserve", "dedup_asciifolding" ] }, "short_text_search": { "type": "custom", "tokenizer": "whitespace", "filter": [ "lowercase", "aggressive_splitting" ] }, "source_text_plain": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase" ], "char_filter": [ "word_break_helper_source_text" ] }, "source_text_plain_search": { "type": "custom", "char_filter": [ "word_break_helper_source_text" ], "tokenizer": "standard", "filter": [ "lowercase" ] }, "suggest": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "suggest_shingle" ] }, "suggest_reverse": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "suggest_shingle", "reverse" ] }, "token_reverse": { "type": "custom", "tokenizer": "no_splitting", "filter": [ "reverse" ] }, "near_match": { "type": "custom", "char_filter": [ "near_space_flattener" ], "tokenizer": "no_splitting", "filter": [ "lowercase" ] }, "near_match_asciifolding": { "type": "custom", "char_filter": [ "near_space_flattener" ], "tokenizer": "no_splitting", "filter": [ "truncate_keyword", "lowercase", "asciifolding" ] }, "prefix": { "type": "custom", "char_filter": [ "near_space_flattener" ], "tokenizer": "prefix", "filter": [ "lowercase" ] }, "prefix_asciifolding": { "type": "custom", "char_filter": [ "near_space_flattener" ], "tokenizer": "prefix", "filter": [ "lowercase", "asciifolding" ] }, "word_prefix": { "type": "custom", "tokenizer": "standard", "filter": [ "lowercase", "prefix_ngram_filter" ] }, "keyword": { "type": "custom", "tokenizer": "no_splitting", "filter": [ "truncate_keyword" ] }, "lowercase_keyword": { "type": "custom", "tokenizer": "no_splitting", "filter": [ "truncate_keyword", "lowercase" ] }, "trigram": { "type": "custom", "tokenizer": "trigram", "filter": [ "lowercase" ] }, "text_search": { "type": "custom", "tokenizer": "standard", "filter": [ "homoglyph_norm", "lowercase", "mirandese_elision", "mirandese_stop" ], "char_filter": [ "nnbsp_norm" ] } }, "filter": { "suggest_shingle": { "type": "shingle", "min_shingle_size": 2, "max_shingle_size": 3, "output_unigrams": true }, "lowercase": { "type": "lowercase" }, "aggressive_splitting": { "type": "word_delimiter", "stem_english_possessive": false, "preserve_original": false }, "prefix_ngram_filter": { "type": "edgeNGram", "max_gram": 255 }, "asciifolding": { "type": "asciifolding", "preserve_original": false }, "asciifolding_preserve": { "type": "asciifolding", "preserve_original": true }, "truncate_keyword": { "type": "truncate", "length": 5000 }, "remove_empty": { "type": "length", "min": 1 }, "mirandese_elision": { "type": "elision", "articles_case": true, "articles": [ "l", "d", "qu" ] }, "mirandese_stop": { "type": "stop", "stopwords": [ "de", "a", "la", "l", "que", "quei", "i", "an", "ne", "en", "un", "pa", "para", "cun", "n\u00f3", "nan", "nun", "ua", "\u0169a", "ls", "los", "se", "na", "por", "mais", "m\u00e1s", "las", "cumo", "mas", "al", "el", "sou", "sue", "\u00f3", "u", "ou", "quando", "muito", "mui", "mi", "mos", "nos", "y\u00e1", "you", "tamien", "solo", "pul", "pula", "ant\u00e9", "at\u00e9", "esso", "isso", "eilha", "antre", "a\u00e7puis", "adepuis", "adespuis", "apuis", "\u00e7puis", "depuis", "sien", "sin", "mesmo", "miesmo", "als", "sous", "quien", "nas", "me", "mi", "esse", "eilhes", "tu", "essa", "nun", "nien", "nin", "sues", "miu", "mie", "nua", "n\u0169a", "puls", "eilhas", "qual", "n\u00f3s", "le", "deilhes", "essas", "esses", "pulas", "este", "del", "tu", "ti", "te", "b\u00f3s", "bos", "les", "mius", "mies", "tou", "tue", "tous", "tues", "nuosso", "nuossa", "nuossos", "nuossas", "deilha", "deilhas", "esta", "estes", "estas", "aquel", "aqueilha", "aqueilhes", "aqueilhas", "esto", "isto", "aqueilho", "aquilho", "stou", "st\u00e1", "stamos", "stan", "stube", "stubo", "stubimos", "stub\u00edrun", "staba", "st\u00e1bamos", "st\u00e1ban", "stubira", "stub\u00edramos", "steia", "st\u00e9iamos", "stemos", "st\u00e9ian", "sten", "stubisse", "stub\u00edssemos", "stub\u00edssen", "stubir", "stubirmos", "stub\u00edren", "hei", "hai", "h\u00e1", "hemos", "han", "hoube", "houbimos", "houb\u00edrun", "houbira", "houb\u00edramos", "haba", "haia", "h\u00e1bamos", "h\u00e1iamos", "h\u00e1ban", "h\u00e1ian", "houbisse", "habisse", "houb\u00edssemos", "hab\u00edssemos", "houb\u00edssen", "hab\u00edssen", "houbir", "houbirmos", "houb\u00edren", "sou", "somos", "son", "sano", "era", "\u00e9ramos", "\u00e9ran", "fui", "fui", "fumos", "f\u00farun", "fura", "f\u00faramos", "seia", "s\u00e9iamos", "s\u00e9ian", "fusse", "f\u00fassemos", "f\u00fassen", "fur", "furmos", "f\u00faren", "serei", "ser\u00e1", "seremos", "seran", "serano", "serie", "seriemos", "serien", "tengo", "ten", "tenemos", "t\u00e9nen", "tenie", "teniemos", "tenien", "tube", "tubo", "tubimos", "tub\u00edrun", "tubira", "tub\u00edramos", "tenga", "t\u00e9ngamos", "t\u00e9ngan", "tubisse", "tub\u00edssemos", "tub\u00edssen", "tubir", "tubirmos", "tub\u00edren", "tenerei", "tener\u00e1", "teneremos", "teneran", "teneria", "teneriemos", "tenerien", "ye", "fui", "ten", "ser", "hai", "h\u00e1", "st\u00e1", "era", "tener", "stan", "tenie", "f\u00faran", "t\u00e9nen", "habie", "seia", "ser\u00e1", "tengo", "fusse" ] }, "dedup_asciifolding": { "type": "unique", "only_on_same_position": true } }, "tokenizer": { "prefix": { "type": "edgeNGram", "max_gram": 255 }, "no_splitting": { "type": "keyword" }, "trigram": { "type": "nGram", "min_gram": 3, "max_gram": 3 } }, "char_filter": { "near_space_flattener": { "type": "mapping", "mappings": [ "'=>\\u0020", "\\u2019=>\\u0020", "\\u02BC=>\\u0020", "_=>\\u0020", "-=>\\u0020" ] }, "nnbsp_norm": { "type": "mapping", "mappings": [ "\\u202F=>\\u0020" ] }, "word_break_helper": { "type": "mapping", "mappings": [ "_=>\\u0020", ".=>\\u0020", "(=>\\u0020", ")=>\\u0020" ] }, "word_break_helper_source_text": { "type": "mapping", "mappings": [ "_=>\\u0020", ".=>\\u0020", "(=>\\u0020", ")=>\\u0020", ":=>\\u0020" ] }, "dotted_I_fix": { "type": "mapping", "mappings": [ "\u0130=>I" ] } } }