SEARU.ORG
当前位置:SEARU.ORG > Linux 新闻 > 正文

ES中如何使用逗号来分词

使用软件版本:elasticsearch-2.2.0

1. setting:

curl -XPOST 'http://localhost:9200/data' -d '{
    "settings": {
        "analysis": {
            "analyzer": {
                "comma": {
                     "type": "pattern",
                     "pattern":","
                    }
                }
            }
        }
    }
}
'

return:

{"acknowledged":true}

2. view index:

curl -XGET 'http://localhost:9200/data'

return:

{
   "data": {
      "aliases": {},
      "mappings": {},
      "settings": {
         "index": {
            "creation_date": "1456931889151",
            "analysis": {
               "analyzer": {
                  "comma": {
                     "pattern": ",",
                     "type": "pattern"
                  }
               }
            },
            "number_of_shards": "5",
            "number_of_replicas": "1",
            "uuid": "aXyFMRzKQ0m_Ex8N2yJeSA",
            "version": {
               "created": "2020099"
            }
         }
      },
      "warmers": {}
   }
}

3. mapping:

curl -XPOST 'http://localhost:9200/data/_mapping/record' -d '{
    "properties": {
        "id": { "type": "string", "index": "not_analyzed" },
        "number": { "type": "string", "analyzer": "comma", "search_analyzer": "comma" }
    }
}
'

return:

{"acknowledged":true}

4. view index:

curl -XGET 'http://localhost:9200/data'

return:

{
   "data": {
      "aliases": {},
      "mappings": {
         "record": {
            "properties": {
               "id": {
                  "type": "string",
                  "index": "not_analyzed"
               },
               "number": {
                  "type": "string",
                  "analyzer": "comma"
               }
            }
         }
      },
      "settings": {
         "index": {
            "creation_date": "1456972030705",
            "analysis": {
               "analyzer": {
                  "comma": {
                     "pattern": ",",
                     "type": "pattern"
                  }
               }
            },
            "number_of_shards": "5",
            "number_of_replicas": "1",
            "uuid": "A9Z76U9DR0OBqn29smtq8w",
            "version": {
               "created": "2020099"
            }
         }
      },
      "warmers": {}
   }
}

5. verify analyze:

curl -GET 'http://127.0.0.1:9200/data/_analyze?analyzer=comma&text=2,3,4,5,100-100'

return:

{
   "tokens": [
      {
         "token": "2",
         "start_offset": 0,
         "end_offset": 1,
         "type": "word",
         "position": 0
      },
      {
         "token": "3",
         "start_offset": 2,
         "end_offset": 3,
         "type": "word",
         "position": 101
      },
      {
         "token": "4",
         "start_offset": 4,
         "end_offset": 5,
         "type": "word",
         "position": 202
      },
      {
         "token": "5",
         "start_offset": 6,
         "end_offset": 7,
         "type": "word",
         "position": 303
      },
      {
         "token": "100-100",
         "start_offset": 8,
         "end_offset": 15,
         "type": "word",
         "position": 404
      }
   ]
}

6. post data:

curl -PUT 'http://localhost:9200/data/record' -d '{
    "id" : "001CV",
    "number" : "2,3,4,5,100-100,1010"
}
'

return:

{
   "_index": "data",
   "_type": "record",
   "_id": "AVM3kt-GiEDWd2i_MREb",
   "_version": 1,
   "_shards": {
      "total": 2,
      "successful": 1,
      "failed": 0
   },
   "created": true
}

7. post data:

curl -PUT 'http://localhost:9200/data/record' -d '{
    "id" : "002CV",
    "number" : "9999,8888"
}
'

return:

{
   "_index": "data",
   "_type": "record",
   "_id": "AVM3k7vIiEDWd2i_MREc",
   "_version": 1,
   "_shards": {
      "total": 2,
      "successful": 1,
      "failed": 0
   },
   "created": true
}

8. post data:

curl -PUT 'http://localhost:9200/data/record' -d '{
    "id" : "002CV",
    "number" : "2,8888"
}
'

return:

{
   "_index": "data",
   "_type": "record",
   "_id": "AVM3mCGMiEDWd2i_MREh",
   "_version": 1,
   "_shards": {
      "total": 2,
      "successful": 1,
      "failed": 0
   },
   "created": true
}

9. search data:

curl -XGET 'http://localhost:9200/data/record/_search?q=number:9999'

return:

{
   "took": 41,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 1,
      "max_score": 0.19178301,
      "hits": [
         {
            "_index": "data",
            "_type": "record",
            "_id": "AVM3k7vIiEDWd2i_MREc",
            "_score": 0.19178301,
            "_source": {
               "id": "002CV",
               "number": "9999,8888"
            }
         }
      ]
   }
}

10. search data:

curl -XGET 'http://localhost:9200/data/record/_search?q=number:2'

return:

{
   "took": 2,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 2,
      "max_score": 0.37158427,
      "hits": [
         {
            "_index": "data",
            "_type": "record",
            "_id": "AVM3mCGMiEDWd2i_MREh",
            "_score": 0.37158427,
            "_source": {
               "id": "002CV",
               "number": "2,8888"
            }
         },
         {
            "_index": "data",
            "_type": "record",
            "_id": "AVM3kt-GiEDWd2i_MREb",
            "_score": 0.22295055,
            "_source": {
               "id": "001CV",
               "number": "2,3,4,5,100-100,1010"
            }
         }
      ]
   }
}

11. search data:

curl -XGET 'http://localhost:9200/data/record/_search?q=number:8888,100-100'

return:

{
   "took": 3,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 3,
      "max_score": 0.22097087,
      "hits": [
         {
            "_index": "data",
            "_type": "record",
            "_id": "AVM3mCGMiEDWd2i_MREh",
            "_score": 0.22097087,
            "_source": {
               "id": "002CV",
               "number": "2,8888"
            }
         },
         {
            "_index": "data",
            "_type": "record",
            "_id": "AVM3kt-GiEDWd2i_MREb",
            "_score": 0.13258252,
            "_source": {
               "id": "001CV",
               "number": "2,3,4,5,100-100,1010"
            }
         },
         {
            "_index": "data",
            "_type": "record",
            "_id": "AVM3k7vIiEDWd2i_MREc",
            "_score": 0.028130025,
            "_source": {
               "id": "002CV",
               "number": "9999,8888"
            }
         }
      ]
   }
}

12. search data:

curl -XPOST 'http://localhost:9200/data/record/_search' -d '{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "number": "2"
          }
        }
      ],
      "must_not": [
        {
          "term": {
            "number": "8888"
          }
        }
      ]
    }
  }
}'

return:

{
   "took": 3,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 1,
      "max_score": 0.22295055,
      "hits": [
         {
            "_index": "data",
            "_type": "record",
            "_id": "AVM3kt-GiEDWd2i_MREb",
            "_score": 0.22295055,
            "_source": {
               "id": "001CV",
               "number": "2,3,4,5,100-100,1010"
            }
         }
      ]
   }
}

未经允许不得转载:SEARU.ORG » ES中如何使用逗号来分词

赞 (0)
分享到:更多 ()

评论 0