一、Date类型简介
elasticsearch通过JSON格式来承载数据的,而JSON中是没有Date对应的数据类型的,但是elasticsearch可以通过以下三种方式处理JSON承载的Date数据
- 符合特定格式化的日期字符串;
- 基于milliseconds-since-the-epoch的一个长整型数字;
- 基于seconds-since-the-epoch的一个长整型数字;
索引数据的时候,elasticsearch内部会基于UTC时间,将传入的数据转化为基于milliseconds-since-the-epoch的一个长整型数字;查询数据的时候,elasticsearch内部会将查询转化为range查询;
二、测试数据准备
创建mapping,设置create_date的type为date
PUT my_date_index { "mappings": { "_doc": { "properties": { "create_date": { "type": "date" } } } } }
索引以下三个document
PUT my_date_index/_doc/1 { "create_date": "2015-01-01" } PUT my_date_index/_doc/2 { "create_date": "2015-01-01T12:10:30Z" } PUT my_date_index/_doc/3 { "create_date": 1420070400001 }
三、日期查询的诡异之处
我们希望可以通过以下查询命中2015-01-01的记录
POST my_date_index/_search { "query": { "term": { "create_date": "2015-01-01" } } }
查看执行结果发现命中了三条数据
{ "took" : 0, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 3, "max_score" : 1.0, "hits" : [ { "_index" : "my_date_index", "_type" : "_doc", "_id" : "2", "_score" : 1.0, "_source" : { "create_date" : "2015-01-01T12:10:30Z" } }, { "_index" : "my_date_index", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "create_date" : "2015-01-01" } }, { "_index" : "my_date_index", "_type" : "_doc", "_id" : "3", "_score" : 1.0, "_source" : { "create_date" : 1420070400001 } } ] } }
通过以下可以看到elasticsearch内部确实将查询重写为一个范围查询create_date:[1420070400000 TO 1420156799999]
POST my_date_index/_search { "profile": "true", "query": { "term": { "create_date": "2015-01-01" } } } { "id" : "[eD2KQtMGSla7jzJQBQVAfQ][my_date_index][0]", "searches" : [ { "query" : [ { "type" : "IndexOrDocValuesQuery", "description" : "create_date:[1420070400000 TO 1420156799999]", "time_in_nanos" : 2101, "breakdown" : { "score" : 0, "build_scorer_count" : 0, "match_count" : 0, "create_weight" : 2100, "next_doc" : 0, "match" : 0, "create_weight_count" : 1, "next_doc_count" : 0, "score_count" : 0, "build_scorer" : 0, "advance" : 0, "advance_count" : 0 } } ], "rewrite_time" : 2200, "collector" : [ { "name" : "CancellableCollector", "reason" : "search_cancelled", "time_in_nanos" : 700, "children" : [ { "name" : "SimpleTopScoreDocCollector", "reason" : "search_top_hits", "time_in_nanos" : 200 } ] } ] } ], "aggregations" : [ ] }
接下来我们来分析一下Date数据类型的term查询
我们可以看到termQuery查询直接调用了rangeQuery,并将传入的日期参数作为range的两个范围值;
DateFieldType @Override public Query termQuery(Object value, @Nullable QueryShardContext context) { Query query = rangeQuery(value, value, true, true, ShapeRelation.INTERSECTS, null, null, context); if (boost() != 1f) { query = new BoostQuery(query, boost()); } return query; }
rangeQuery中会调用parseToMilliseconds计算查询的两个范围值
DateFieldType @Override public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, ShapeRelation relation, @Nullable DateTimeZone timeZone, @Nullable DateMathParser forcedDateParser, QueryShardContext context) { failIfNotIndexed(); if (relation == ShapeRelation.DISJOINT) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support DISJOINT ranges"); } DateMathParser parser = forcedDateParser == null ? dateMathParser : forcedDateParser; long l, u; if (lowerTerm == null) { l = Long.MIN_VALUE; } else { l = parseToMilliseconds(lowerTerm, !includeLower, timeZone, parser, context); if (includeLower == false) { ++l; } } if (upperTerm == null) { u = Long.MAX_VALUE; } else { u = parseToMilliseconds(upperTerm, includeUpper, timeZone, parser, context); if (includeUpper == false) { --u; } } Query query = LongPoint.newRangeQuery(name(), l, u); if (hasDocValues()) { Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); query = new IndexOrDocValuesQuery(query, dvQuery); } return query; }
通过以下代码可以看到,左边界的值会覆盖new MutableDateTime(1970, 1, 1, 0, 0, 0, 0, DateTimeZone.UTC)对应的位置的数字,右边界的值会覆盖ew MutableDateTime(1970, 1, 1, 23, 59, 59, 999, DateTimeZone.UTC)对应位置的数字;所以我们查询中输入2015-01-01,相当于查询这一天之内的所有记录;
JodaDateMathParser private long parseDateTime(String value, DateTimeZone timeZone, boolean roundUpIfNoTime) { DateTimeFormatter parser = dateTimeFormatter.parser; if (timeZone != null) { parser = parser.withZone(timeZone); } try { MutableDateTime date; // We use 01/01/1970 as a base date so that things keep working with date // fields that are filled with times without dates if (roundUpIfNoTime) { date = new MutableDateTime(1970, 1, 1, 23, 59, 59, 999, DateTimeZone.UTC); } else { date = new MutableDateTime(1970, 1, 1, 0, 0, 0, 0, DateTimeZone.UTC); } final int end = parser.parseInto(date, value, 0); if (end < 0) { int position = ~end; throw new IllegalArgumentException("Parse failure at index [" + position + "] of [" + value + "]"); } else if (end != value.length()) { throw new IllegalArgumentException("Unrecognized chars at the end of [" + value + "]: [" + value.substring(end) + "]"); } return date.getMillis(); } catch (IllegalArgumentException e) { throw new ElasticsearchParseException("failed to parse date field [{}] with format [{}]", e, value, dateTimeFormatter.pattern()); } }
一般我们使用的日期都是精确到秒,那么只要我们将输入数据精确到秒基本上就可以命中记录;如果还是命中多个记录,那么就需要将数据的精度提高到毫秒,并且查询输入的时候也需要带上毫秒;
POST my_date_index/_search { "query": { "term": { "create_date": "2015-01-01T12:10:30Z" } } } { "took" : 10, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 1, "max_score" : 1.0, "hits" : [ { "_index" : "my_date_index", "_type" : "_doc", "_id" : "2", "_score" : 1.0, "_source" : { "create_date" : "2015-01-01T12:10:30Z" } } ] } }
四、自定义时间字符串的解析格式
elasticsearch中date默认的日期格式是表征epoch_millis的长整型数字或者符合strict_date_optional_time格式的字符串;
public static final DateFormatter DEFAULT_DATE_TIME_FORMATTER = DateFormatter.forPattern("strict_date_optional_time||epoch_millis");
strict_date_optional_time
strict限制时间字符串中的年月日部分必须是4、2、2个数字,不足部分在前边补0,例如20230123;
date_optional_time则要求字符串可以不包含时间部分,但是必须包含日期部分;
strict_date_optional_time支持的完整的时间格式如下
date-opt-time = date-element ['T' [time-element] [offset]] date-element = std-date-element | ord-date-element | week-date-element std-date-element = yyyy ['-' MM ['-' dd]] ord-date-element = yyyy ['-' DDD] week-date-element = xxxx '-W' ww ['-' e] time-element = HH [minute-element] | [fraction] minute-element = ':' mm [second-element] | [fraction] second-element = ':' ss [fraction] fraction = ('.' | ',') digit+
我们使用2015/01/01搜索的时候,elasticsearch无法解析就会报错
POST my_date_index/_search { "profile": "true", "query": { "term": { "create_date": "2015/01/01" } } } { "error": { "root_cause": [ { "type": "parse_exception", "reason": "failed to parse date field [2015/01/01] with format [strict_date_optional_time||epoch_millis]" } ], "type": "search_phase_execution_exception", "reason": "all shards failed", "phase": "query", "grouped": true, "failed_shards": [ { "shard": 0, "index": "my_date_index", "node": "eD2KQtMGSla7jzJQBQVAfQ", "reason": { "type": "query_shard_exception", "reason": "failed to create query: {n "term" : {n "create_date" : {n "value" : "2015/01/01",n "boost" : 1.0n }n }n}", "index_uuid": "9MTRkZcMTnK8GgK9vKwUuA", "index": "my_date_index", "caused_by": { "type": "parse_exception", "reason": "failed to parse date field [2015/01/01] with format [strict_date_optional_time||epoch_millis]", "caused_by": { "type": "illegal_argument_exception", "reason": "Unrecognized chars at the end of [2015/01/01]: [/01/01]" } } } } ], "caused_by": { "type": "parse_exception", "reason": "failed to parse date field [2015/01/01] with format [strict_date_optional_time||epoch_millis]", "caused_by": { "type": "illegal_argument_exception", "reason": "Unrecognized chars at the end of [2015/01/01]: [/01/01]" } } }, "status": 400 }
我们可以在mapping或者在搜索的时候指定format
POST my_date_index/_search { "query": { "range" : { "create_date" : { "gte": "2015/01/01", "lte": "2015/01/01", "format": "yyyy/MM/dd" } } } } { "took" : 1, "timed_out" : false, "_shards" : { "total" : 5, "successful" : 5, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : 3, "max_score" : 1.0, "hits" : [ { "_index" : "my_date_index", "_type" : "_doc", "_id" : "2", "_score" : 1.0, "_source" : { "create_date" : "2015-01-01T12:10:30Z" } }, { "_index" : "my_date_index", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "create_date" : "2015-01-01" } }, { "_index" : "my_date_index", "_type" : "_doc", "_id" : "3", "_score" : 1.0, "_source" : { "create_date" : 1420070400001 } } ] } }