diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-08-25 18:09:18 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-08-25 18:15:53 -0700 |
commit | 55f9b88ba1d63db75ef9cde3cf94e5c98526ad2f (patch) | |
tree | 68d31143996afce99dfa346042c1123dd944f766 | |
parent | a66caf9b7ae476f526a706d31ed5ef4f8d361b00 (diff) | |
download | es-public-proxy-55f9b88ba1d63db75ef9cde3cf94e5c98526ad2f.tar.gz es-public-proxy-55f9b88ba1d63db75ef9cde3cf94e5c98526ad2f.zip |
significant increase in parse and test coverage
24 files changed, 737 insertions, 40 deletions
@@ -1,18 +1,31 @@ -TODO: see what other requests the default python and javascript client libraries use - -## basics - -- config: TOML, env, args -- filter requests by method and endpoint -- filter query parameters -- parse request bodies (queries) -- method/body for denied requests -- async streaming responses +x filter requests by method and endpoint +x parse query parameters (typed) +x re-serialize query parameters into upstream request URL +x method/body for denied requests +- create full request objects in tests +- bad request tests (parse error, disabled query types, etc) +- "completeness" over all the below query types and parameters +- real fatcat+scholar request tests - minimize tokio feature flags +- see what other requests the default python and javascript client libraries use +- put ProxyConfig in Box? less cloning? +- config: TOML, env, args + => separate parse schema from options schema + => example file +- refactor unwrap() into error handling +- package/release + => generate .deb file + => Makefile + => manpage + => example config? + => README + => push git repo +- test in with https://search.qa.fatcat.wiki and observable (javascript) + +parse method+path (index) -> which validation method +method+path+params+body -> status, upstream request -factoring: -- validate query method (method, path, query, body) ## general endpoints @@ -29,7 +42,7 @@ factoring: ## per-index endpoints - basic info; mapping - (?) + GET /<index>/_mapping - count GET /<index>/_count - get document @@ -40,6 +53,7 @@ factoring: - search GET /<index>/_search POST /<index>/_search +- mapping later: @@ -65,31 +79,34 @@ fulltext: (bare str allowed) value (str) - multi_match -- query_string +x query_string - simple_query_string term-level: -- range +x exists + field (str) +x ids + values (array of str) +x prefix +x range <field> gt, gte, lt, lte: str or number -- term +- NOT regex (?) +x term <field> value: str or number -- terms +x terms + does not support "lookup" from other indices <field> (array of str or number) -- wildcard +x wildcard <field> value (str) boost (float, optional) rewrite (str, optional) -- exists - field (str) -- ids - values (array of str) -- match_all +x match_all boost (float, optional) -- match_none +x match_none boost (float, optional) @@ -100,6 +117,10 @@ TODO: ## additional stuff -- HTTP content-encoding: gzip -- content-type header; always JSON? +- headers + - HTTP content-encoding: gzip + - content-type header; always JSON? + - set "Via" header in responses (indicating proxy version/context) - https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html +- logging +- switch from 'url' to 'percent-encoding' (?) diff --git a/src/parse.rs b/src/parse.rs index 0bd1eeb..2a4c0a5 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -12,46 +12,85 @@ pub struct ApiRequest { #[derive(Serialize, Deserialize, Debug, Default)] #[serde(deny_unknown_fields)] pub struct UrlQueryParams { + #[serde(skip_serializing_if="Option::is_none")] pub allow_no_indices: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub allow_partial_search_results: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub batched_reduce_size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pub ccs_minimize_roundtrips: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub docvalue_fields: Option<String>, // array of strings, comma-separated + #[serde(skip_serializing_if="Option::is_none")] pub expand_wildcards: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub explain: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub from: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pub ignore_throttled: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub ignore_unavailable: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub max_concurrent_shard_requests: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pub pre_filter_shard_size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pub preference: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub q: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub request_cache: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub rest_total_hits_as_int: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub routing: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub scroll: Option<String>, // string is "time value" + #[serde(skip_serializing_if="Option::is_none")] pub search_type: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub seq_no_primary_term: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pub sort: Option<String>, // array of strings, comma-separated + #[serde(skip_serializing_if="Option::is_none")] pub _source: Option<bool>, // TODO: bool or string + #[serde(skip_serializing_if="Option::is_none")] pub _source_excludes: Option<String>, // array of strings, comma-separated + #[serde(skip_serializing_if="Option::is_none")] pub _source_includes: Option<String>, // array of strings, comma-separated + #[serde(skip_serializing_if="Option::is_none")] pub stats: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub stored_fields: Option<String>, // array of strings, comma-separated + #[serde(skip_serializing_if="Option::is_none")] pub suggest_field: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub suggest_text: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub terminate_after: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pub timeout: Option<String>, // string is "time units" + #[serde(skip_serializing_if="Option::is_none")] pub track_scores: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub track_total_hits: Option<bool>, // XXX: bool or integer + #[serde(skip_serializing_if="Option::is_none")] pub typed_keys: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub version: Option<bool>, // additional generic params + #[serde(skip_serializing_if="Option::is_none")] pub human: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub pretty: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub filter_path: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] pub error_trace: Option<bool>, } @@ -59,74 +98,142 @@ pub struct UrlQueryParams { #[derive(Serialize, Deserialize, Debug)] #[serde(deny_unknown_fields)] pub struct SearchBody { + #[serde(skip_serializing_if="Option::is_none")] pub query: Option<ApiQuery>, + #[serde(skip_serializing_if="Option::is_none")] pub highlight: Option<ApiHighlight>, + #[serde(skip_serializing_if="Option::is_none")] pub collapse: Option<ApiCollapse>, + #[serde(skip_serializing_if="Option::is_none")] pub post_filter: Option<ApiQuery>, // TODO: leaf query only? + #[serde(skip_serializing_if="Option::is_none")] pub rescore: Option<ApiRescore>, // TODO: single or an array of rescore objects // script_fields disabled + #[serde(skip_serializing_if="Option::is_none")] + pub aggs: Option<HashMap<String, ApiAggregation>>, + #[serde(skip_serializing_if="Option::is_none")] + pub aggregations: Option<HashMap<String, ApiAggregation>>, // https://www.elastic.co/guide/en/elasticsearch/reference/current/sort-search-results.html + #[serde(skip_serializing_if="Option::is_none")] pub sort: Option<Vec<SortElement>>, + #[serde(skip_serializing_if="Option::is_none")] pub slice: Option<ApiSlice>, + #[serde(skip_serializing_if="Option::is_none")] pub stored_fields: Option<String>, // array of strings, or "_none_" // overlap with URL query parameters + #[serde(skip_serializing_if="Option::is_none")] pub docvalue_fields: Option<Vec<DocValOrString>>, + #[serde(skip_serializing_if="Option::is_none")] pub explain: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub from: Option<u32>, - pub min_score: Option<f64>, + #[serde(skip_serializing_if="Option::is_none")] + pub min_score: Option<Num>, + #[serde(skip_serializing_if="Option::is_none")] pub seq_no_primary_term: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] pub size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pub _source: Option<bool>, // XXX: bool, string, or object + #[serde(skip_serializing_if="Option::is_none")] pub terminate_after: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pub timeout: Option<String>, // string is "time units" } #[derive(Serialize, Deserialize, Debug)] #[serde(deny_unknown_fields)] pub struct ScrollBody { - pub scroll_id: String, + pub scroll_id: StringOrArray, + #[serde(skip_serializing_if="Option::is_none")] pub scroll: Option<String>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct ApiSlice { id: u32, max: u32, + #[serde(skip_serializing_if="Option::is_none")] field: Option<String>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct ApiRescore{ + #[serde(skip_serializing_if="Option::is_none")] pub query: Option<ApiQuery>, + #[serde(skip_serializing_if="Option::is_none")] pub window_size: Option<u32>, } // TODO: could revert to having query types as an enum, with flattening #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct ApiQuery { // compound queries #[serde(rename = "bool")] + #[serde(skip_serializing_if="Option::is_none")] bool_query: Option<BoolQuery>, + #[serde(skip_serializing_if="Option::is_none")] boosting: Option<BoostingQuery>, + #[serde(skip_serializing_if="Option::is_none")] constant_score: Option<ConstantScoreQuery>, // fulltext (leaf) queries - - // term-level (leaf) queries #[serde(rename = "match")] - match_query: Option<HashMap<String, QueryFieldOrString>>, + #[serde(skip_serializing_if="Option::is_none")] + match_query: Option<HashMap<String, MatchQueryOrString>>, + #[serde(skip_serializing_if="Option::is_none")] match_phrase: Option<HashMap<String, QueryFieldOrString>>, - query_string: Option<QueryField>, + #[serde(skip_serializing_if="Option::is_none")] + multi_match: Option<MultiMatchQuery>, + #[serde(skip_serializing_if="Option::is_none")] + query_string: Option<QueryStringQuery>, + #[serde(skip_serializing_if="Option::is_none")] + simple_query_string: Option<QueryStringQuery>, + + // term-level (leaf) queries + #[serde(skip_serializing_if="Option::is_none")] + exists: Option<SimpleFieldOrString>, + #[serde(skip_serializing_if="Option::is_none")] + match_all: Option<SimpleBoost>, + #[serde(skip_serializing_if="Option::is_none")] + match_none: Option<SimpleBoost>, + #[serde(skip_serializing_if="Option::is_none")] + ids: Option<IdsQuery>, + #[serde(skip_serializing_if="Option::is_none")] + wildcard: Option<HashMap<String, TermQueryOrString>>, // also works for wildcard + #[serde(skip_serializing_if="Option::is_none")] + prefix: Option<HashMap<String, TermQueryOrString>>, // also works for prefix query + #[serde(skip_serializing_if="Option::is_none")] + range: Option<HashMap<String, RangeQuery>>, + #[serde(skip_serializing_if="Option::is_none")] + term: Option<HashMap<String, TermQueryOrString>>, + #[serde(skip_serializing_if="Option::is_none")] + // TODO: boost in terms query + terms: Option<HashMap<String, Vec<String>>>, // other + #[serde(skip_serializing_if="Option::is_none")] nested: Option<NestedQuery>, + #[serde(skip_serializing_if="Option::is_none")] rescore_query: Option<Box<ApiQuery>>, + + // fields as part of a rescore query + #[serde(skip_serializing_if="Option::is_none")] + score_mode: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + query_weight: Option<Num>, + #[serde(skip_serializing_if="Option::is_none")] + rescore_query_weight: Option<Num>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct ApiHighlight{ // TODO: fields could also be an array of strings? fields: HashMap<String, HighlightField>, @@ -136,13 +243,19 @@ pub struct ApiHighlight{ } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] #[serde(untagged)] pub enum SortMapValue { String(String), - Object { order: String, mode: Option<String> }, + Object { + order: String, + #[serde(skip_serializing_if="Option::is_none")] + mode: Option<String>, + }, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] #[serde(untagged)] pub enum SortElement{ String(String), @@ -150,13 +263,208 @@ pub enum SortElement{ } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] #[serde(untagged)] pub enum DocValOrString { String(String), - Object {field: String, format: Option<String>}, + Object { + field: String, + #[serde(skip_serializing_if="Option::is_none")] + format: Option<String>, + }, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +#[serde(untagged)] +pub enum MatchQueryOrString { + Object(MatchQuery), + String(String), +} + +// https://www.elastic.co/guide/en/elasticsearch/reference/7.9/query-dsl-match-query.html +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct MatchQuery { + query: String, + + #[serde(flatten)] + options: MatchOptions, +} + +// https://www.elastic.co/guide/en/elasticsearch/reference/7.9/query-dsl-multi-match-query.html +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct MultiMatchQuery { + query: String, + fields: Vec<String>, + #[serde(skip_serializing_if="Option::is_none")] + #[serde(rename="type")] + query_type: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + tie_breaker: Option<Num>, + #[serde(flatten)] + options: MatchOptions, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct MatchOptions { + #[serde(skip_serializing_if="Option::is_none")] + analyzer: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + auto_generate_synonyms_phrase_query: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + fuzziness: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + max_expansions: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + prefix_length: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + fuzzy_transpositions: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + fuzzy_rewrite: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + lenient: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + operator: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + minimum_should_match: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + zero_terms_query: Option<String>, +} + +// https://www.elastic.co/guide/en/elasticsearch/reference/7.9/query-dsl-query-string-query.html +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct QueryStringQuery { + query: String, + #[serde(skip_serializing_if="Option::is_none")] + #[serde(rename="type")] + query_type: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + default_field: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + allow_leading_wildcard: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + analyze_wildcard: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + analyzer: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + auto_generate_synonyms_phrase_query: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + boost: Option<Num>, + #[serde(skip_serializing_if="Option::is_none")] + default_operator: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + enable_position_increments: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + fields: Option<Vec<String>>, + #[serde(skip_serializing_if="Option::is_none")] + fuzziness: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + fuzzy_max_expansions: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + fuzzy_prefix_length: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + fuzzy_transpositions: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + lenient: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + max_determinized_states: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + minimum_should_match: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + quote_analyzer: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + phrase_slop: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + quote_field_suffix: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + rewrite: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + time_zone: Option<String>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct SimpleBoost { + #[serde(skip_serializing_if="Option::is_none")] + boost: Option<Num>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct IdsQuery { + values: Vec<String>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +#[serde(untagged)] +pub enum TermQueryOrString { + String(String), + Object(TermQuery), +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct TermQuery { + value: String, + #[serde(skip_serializing_if="Option::is_none")] + rewrite: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + boost: Option<Num>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +#[serde(untagged)] +pub enum StringOrNum { + String(String), + Int(u64), + Float(f64), +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +#[serde(untagged)] +pub enum Num { + Int(u64), + Float(f64), +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +#[serde(untagged)] +pub enum StringOrArray { + String(String), + Array(Vec<String>), +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct RangeQuery { + #[serde(skip_serializing_if="Option::is_none")] + gt: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + gte: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + lt: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + lte: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + format: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + relation: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + timezone: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + boost: Option<Num>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] #[serde(untagged)] pub enum QueryFieldOrString { Object(QueryField), @@ -164,77 +472,128 @@ pub enum QueryFieldOrString { } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct QueryField{ query: String, + #[serde(skip_serializing_if="Option::is_none")] fuzziness: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] slop: Option<u32>, - boost: Option<f64>, + #[serde(skip_serializing_if="Option::is_none")] + boost: Option<Num>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct BoolQuery { + #[serde(skip_serializing_if="Option::is_none")] must: Option<Box<ApiQuery>>, + #[serde(skip_serializing_if="Option::is_none")] filter: Option<Box<ApiQuery>>, + #[serde(skip_serializing_if="Option::is_none")] should: Option<Box<ApiQuery>>, + #[serde(skip_serializing_if="Option::is_none")] must_not: Option<Box<ApiQuery>>, - minimum_should_match: Option<u32>, - boost: Option<f64>, + #[serde(skip_serializing_if="Option::is_none")] + minimum_should_match: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + boost: Option<Num>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct NestedQuery { path: String, query: Box<ApiQuery>, + #[serde(skip_serializing_if="Option::is_none")] score_mode: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] ignore_unmapped: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + inner_hits: Option<InnerHitsOneOrMore>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct BoostingQuery { positive: Box<ApiQuery>, negative: Box<ApiQuery>, - negative_boost: f64, + negative_boost: Num, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct ConstantScoreQuery { filter: Box<ApiQuery>, - boost: Option<f64>, + #[serde(skip_serializing_if="Option::is_none")] + boost: Option<Num>, } // https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct HighlightField{ + #[serde(skip_serializing_if="Option::is_none")] boundary_chars: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] boundary_max_scan: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] boundary_scanner: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] boundary_scanner_locale: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] encoder: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] force_source: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] fragmenter: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] fragment_offset: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] fragment_size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] highlight_query: Option<ApiQuery>, + #[serde(skip_serializing_if="Option::is_none")] matched_fields: Option<Vec<String>>, + #[serde(skip_serializing_if="Option::is_none")] no_match_size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] number_of_fragments: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] order: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] phrase_limit: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] pre_tags: Option<Vec<String>>, + #[serde(skip_serializing_if="Option::is_none")] post_tags: Option<Vec<String>>, + #[serde(skip_serializing_if="Option::is_none")] require_field_match: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] tags_schema: Option<String>, #[serde(rename = "type")] + #[serde(skip_serializing_if="Option::is_none")] highlight_type: Option<String>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +#[serde(untagged)] +pub enum SimpleFieldOrString { + String(String), + Object { field: String }, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct ApiCollapse{ field: String, + #[serde(skip_serializing_if="Option::is_none")] inner_hits: Option<InnerHitsOneOrMore>, } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] #[serde(untagged)] pub enum InnerHitsOneOrMore { Single(InnerHits), @@ -242,9 +601,118 @@ pub enum InnerHitsOneOrMore { } #[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] pub struct InnerHits { + #[serde(skip_serializing_if="Option::is_none")] from: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] sort: Option<Vec<SortElement>>, + #[serde(skip_serializing_if="Option::is_none")] name: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + collapse: Option<Box<ApiCollapse>>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct ApiAggregation { + // bucket type aggregations + #[serde(skip_serializing_if="Option::is_none")] + nested: Option<NestedAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + filter: Option<ApiQuery>, + #[serde(skip_serializing_if="Option::is_none")] + histogram: Option<SimpleAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + terms: Option<TermsAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + significant_terms: Option<SimpleAggregation>, + + // metrics type aggregations + #[serde(skip_serializing_if="Option::is_none")] + avg: Option<SimpleAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + min: Option<SimpleAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + max: Option<SimpleAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + sum: Option<SimpleAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + value_count: Option<SimpleAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + stats: Option<SimpleAggregation>, + #[serde(skip_serializing_if="Option::is_none")] + percentiles: Option<SimpleAggregation>, + + // nested aggregations + #[serde(skip_serializing_if="Option::is_none")] + aggs: Option<HashMap<String, Box<ApiAggregation>>>, + #[serde(skip_serializing_if="Option::is_none")] + aggregations: Option<HashMap<String, Box<ApiAggregation>>>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct NestedAggregation{ + path: String, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct SimpleAggregation{ + field: String, + #[serde(skip_serializing_if="Option::is_none")] + interval: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + missing: Option<StringOrNum>, + #[serde(skip_serializing_if="Option::is_none")] + keyed: Option<bool>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct DateHistogramAggregation{ + field: String, + #[serde(skip_serializing_if="Option::is_none")] + fixed_interval: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + calendar_interval: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + format: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + time_zone: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + offset: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + order: Option<HashMap<String, String>>, + #[serde(skip_serializing_if="Option::is_none")] + keyed: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + missing: Option<StringOrNum>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(deny_unknown_fields)] +pub struct TermsAggregation { + field: String, + #[serde(skip_serializing_if="Option::is_none")] + size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + shard_size: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + min_doc_count: Option<u32>, + #[serde(skip_serializing_if="Option::is_none")] + show_term_doc_count_error: Option<bool>, + #[serde(skip_serializing_if="Option::is_none")] + order: Option<HashMap<String, String>>, + #[serde(skip_serializing_if="Option::is_none")] + include: Option<StringOrArray>, + #[serde(skip_serializing_if="Option::is_none")] + exclude: Option<StringOrArray>, + #[serde(skip_serializing_if="Option::is_none")] + execution_hint: Option<String>, + #[serde(skip_serializing_if="Option::is_none")] + missing: Option<StringOrNum>, } diff --git a/tests/files/GET_range.txt b/tests/files/GET_range.txt new file mode 100644 index 0000000..a5321c3 --- /dev/null +++ b/tests/files/GET_range.txt @@ -0,0 +1,12 @@ +GET /some-index/_search +{ + "query": { + "range": { + "age": { + "gte": 10, + "lte": 20, + "boost": 2.0 + } + } + } +} diff --git a/tests/files/GET_search_agg_filter.txt b/tests/files/GET_search_agg_filter.txt new file mode 100644 index 0000000..538380a --- /dev/null +++ b/tests/files/GET_search_agg_filter.txt @@ -0,0 +1,28 @@ +GET /shirts/_search +{ + "query": { + "bool": { + "filter": { + "term": { "brand": "gucci" } + } + } + }, + "aggs": { + "colors": { + "terms": { "field": "color" } + }, + "color_red": { + "filter": { + "term": { "color": "red" } + }, + "aggs": { + "models": { + "terms": { "field": "model" } + } + } + } + }, + "post_filter": { + "term": { "color": "red" } + } +} diff --git a/tests/files/GET_search_agg_filter_simple.txt b/tests/files/GET_search_agg_filter_simple.txt new file mode 100644 index 0000000..71a9777 --- /dev/null +++ b/tests/files/GET_search_agg_filter_simple.txt @@ -0,0 +1,11 @@ +POST /sales/_search?size=0 +{ + "aggs": { + "t_shirts": { + "filter": { "term": { "type": "t-shirt" } }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } + } + } +} diff --git a/tests/files/GET_search_agg_histogram.txt b/tests/files/GET_search_agg_histogram.txt new file mode 100644 index 0000000..4689ad1 --- /dev/null +++ b/tests/files/GET_search_agg_histogram.txt @@ -0,0 +1,13 @@ +POST /sales/_search?size=0 +{ + "aggs": { + "quantity": { + "histogram": { + "field": "quantity", + "interval": 10, + "keyed": true, + "missing": 0 + } + } + } +} diff --git a/tests/files/GET_search_agg_max.txt b/tests/files/GET_search_agg_max.txt new file mode 100644 index 0000000..1377afc --- /dev/null +++ b/tests/files/GET_search_agg_max.txt @@ -0,0 +1,11 @@ +POST /sales/_search +{ + "aggs" : { + "grade_max" : { + "max" : { + "field" : "grade", + "missing": 10 + } + } + } +} diff --git a/tests/files/GET_search_agg_nested.txt b/tests/files/GET_search_agg_nested.txt new file mode 100644 index 0000000..8e5d1c1 --- /dev/null +++ b/tests/files/GET_search_agg_nested.txt @@ -0,0 +1,16 @@ +GET /products/_search +{ + "query": { + "match": { "name": "led tv" } + }, + "aggs": { + "resellers": { + "nested": { + "path": "resellers" + }, + "aggs": { + "min_price": { "min": { "field": "resellers.price" } } + } + } + } +} diff --git a/tests/files/GET_search_exists.txt b/tests/files/GET_search_exists.txt new file mode 100644 index 0000000..fe7d722 --- /dev/null +++ b/tests/files/GET_search_exists.txt @@ -0,0 +1,8 @@ +GET /some-index/_search +{ + "query": { + "exists": { + "field": "user" + } + } +} diff --git a/tests/files/GET_search_fatcat_preservation_year.txt b/tests/files/GET_search_fatcat_preservation_year.txt new file mode 100644 index 0000000..539677b --- /dev/null +++ b/tests/files/GET_search_fatcat_preservation_year.txt @@ -0,0 +1,2 @@ +GET /fatcat_release/_search +{"query": {"bool": {"filter": [{"term": {"container_id": "yh4zdhfsobdolesogv6czydwqi"}}, {"range": {"release_year": {"gte": 1771, "lte": 2020}}}]}}, "aggs": {"year_preservation": {"composite": {"size": 1500, "sources": [{"year": {"histogram": {"field": "release_year", "interval": 1}}}, {"preservation": {"terms": {"field": "preservation"}}}]}}}, "from": 0, "size": 0} diff --git a/tests/files/GET_search_fatcat_scholar_basic.txt b/tests/files/GET_search_fatcat_scholar_basic.txt new file mode 100644 index 0000000..c60e62f --- /dev/null +++ b/tests/files/GET_search_fatcat_scholar_basic.txt @@ -0,0 +1,2 @@ +GET /scholar_fulltext/_search +{"query": {"bool": {"filter": [{"terms": {"type": ["article-journal", "paper-conference", "chapter"]}}, {"terms": {"access_type": ["wayback", "ia_file", "ia_sim"]}}], "must": [{"boosting": {"positive": {"bool": {"must": [{"query_string": {"query": "coffee", "default_operator": "AND", "analyze_wildcard": true, "allow_leading_wildcard": false, "lenient": true, "quote_field_suffix": ".exact", "fields": ["title^5", "biblio_all^3", "abstracts.body^2", "fulltext.body", "everything"]}}], "should": [{"terms": {"access_type": ["ia_sim", "ia_file", "wayback"]}}]}}, "negative": {"bool": {"should": [{"bool": {"must_not": [{"exists": {"field": "title"}}]}}, {"bool": {"must_not": [{"exists": {"field": "year"}}]}}, {"bool": {"must_not": [{"exists": {"field": "type"}}]}}, {"bool": {"must_not": [{"exists": {"field": "stage"}}]}}, {"bool": {"must_not": [{"exists": {"field": "biblio.container_ident"}}]}}]}}, "negative_boost": 0.5}}]}}, "collapse": {"field": "collapse_key", "inner_hits": {"name": "more_pages", "size": 0}}, "from": 0, "size": 15, "highlight": {"fields": {"abstracts.body": {"number_of_fragments": 2, "fragment_size": 300}, "fulltext.body": {"number_of_fragments": 2, "fragment_size": 300}, "fulltext.acknowledgment": {"number_of_fragments": 2, "fragment_size": 300}, "fulltext.annex": {"number_of_fragments": 2, "fragment_size": 300}}}} diff --git a/tests/files/GET_search_ids.txt b/tests/files/GET_search_ids.txt new file mode 100644 index 0000000..f22c8b1 --- /dev/null +++ b/tests/files/GET_search_ids.txt @@ -0,0 +1,8 @@ +GET /some-index/_search +{ + "query": { + "ids" : { + "values" : ["1", "4", "100"] + } + } +} diff --git a/tests/files/GET_search_match_all.txt b/tests/files/GET_search_match_all.txt new file mode 100644 index 0000000..1d9622c --- /dev/null +++ b/tests/files/GET_search_match_all.txt @@ -0,0 +1,6 @@ +GET /some-index/_search +{ + "query": { + "match_all": { "boost" : 1.2 } + } +} diff --git a/tests/files/GET_search_match_none.txt b/tests/files/GET_search_match_none.txt new file mode 100644 index 0000000..12f50fc --- /dev/null +++ b/tests/files/GET_search_match_none.txt @@ -0,0 +1,6 @@ +GET /some-index/_search +{ + "query": { + "match_none": {} + } +} diff --git a/tests/files/GET_search_multi_match_tie.txt b/tests/files/GET_search_multi_match_tie.txt new file mode 100644 index 0000000..54f71db --- /dev/null +++ b/tests/files/GET_search_multi_match_tie.txt @@ -0,0 +1,11 @@ +GET /some-index/_search +{ + "query": { + "multi_match" : { + "query": "brown fox", + "type": "best_fields", + "fields": [ "subject", "message" ], + "tie_breaker": 0.3 + } + } +} diff --git a/tests/files/GET_search_prefix.txt b/tests/files/GET_search_prefix.txt new file mode 100644 index 0000000..3e35556 --- /dev/null +++ b/tests/files/GET_search_prefix.txt @@ -0,0 +1,10 @@ +GET /some-index/_search +{ + "query": { + "prefix": { + "user.id": { + "value": "ki" + } + } + } +} diff --git a/tests/files/GET_search_querystring.txt b/tests/files/GET_search_querystring.txt new file mode 100644 index 0000000..1c6e31e --- /dev/null +++ b/tests/files/GET_search_querystring.txt @@ -0,0 +1,14 @@ +GET /some-index/_search +{ + "query": { + "query_string": { + "fields": [ + "title", + "content" + ], + "query": "this OR that OR thus", + "type": "cross_fields", + "minimum_should_match": 2 + } + } +} diff --git a/tests/files/GET_search_simplequerystring.txt b/tests/files/GET_search_simplequerystring.txt new file mode 100644 index 0000000..2ae5507 --- /dev/null +++ b/tests/files/GET_search_simplequerystring.txt @@ -0,0 +1,10 @@ +GET /some-index/_search +{ + "query": { + "simple_query_string" : { + "query": "\"fried eggs\" +(eggplant | potato) -frittata", + "fields": ["title^5", "body"], + "default_operator": "and" + } + } +} diff --git a/tests/files/GET_search_term.txt b/tests/files/GET_search_term.txt new file mode 100644 index 0000000..149587f --- /dev/null +++ b/tests/files/GET_search_term.txt @@ -0,0 +1,11 @@ +GET /some-value/_search +{ + "query": { + "term": { + "user.id": { + "value": "kimchy", + "boost": 1.0 + } + } + } +} diff --git a/tests/files/GET_search_term_short.txt b/tests/files/GET_search_term_short.txt new file mode 100644 index 0000000..937a7b5 --- /dev/null +++ b/tests/files/GET_search_term_short.txt @@ -0,0 +1,8 @@ +GET /my-index-000001/_search?pretty=true +{ + "query": { + "match": { + "full_text": "Quick Brown Foxes!" + } + } +} diff --git a/tests/files/GET_search_wildcard.txt b/tests/files/GET_search_wildcard.txt new file mode 100644 index 0000000..6eb199d --- /dev/null +++ b/tests/files/GET_search_wildcard.txt @@ -0,0 +1,12 @@ +GET /some-index/_search +{ + "query": { + "wildcard": { + "user.id": { + "value": "ki*y", + "boost": 1.0, + "rewrite": "constant_score" + } + } + } +} diff --git a/tests/files/POST_search_inner.txt b/tests/files/POST_search_inner.txt.disabled index 56c5acc..56c5acc 100644 --- a/tests/files/POST_search_inner.txt +++ b/tests/files/POST_search_inner.txt.disabled diff --git a/tests/files/scroll/DELETE_scroll_multi.txt b/tests/files/scroll/DELETE_scroll_multi.txt new file mode 100644 index 0000000..43ba8e2 --- /dev/null +++ b/tests/files/scroll/DELETE_scroll_multi.txt @@ -0,0 +1,7 @@ +DELETE /_search/scroll +{ + "scroll_id" : [ + "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAAD4WYm9laVYtZndUQlNsdDcwakFMNjU1QQ==", + "DnF1ZXJ5VGhlbkZldGNoBQAAAAAAAAABFmtSWWRRWUJrU2o2ZExpSGJCVmQxYUEAAAAAAAAAAxZrUllkUVlCa1NqNmRMaUhiQlZkMWFBAAAAAAAAAAIWa1JZZFFZQmtTajZkTGlIYkJWZDFhQQAAAAAAAAAFFmtSWWRRWUJrU2o2ZExpSGJCVmQxYUEAAAAAAAAABBZrUllkUVlCa1NqNmRMaUhiQlZkMWFB" + ] +} diff --git a/tests/parse_es_requests.rs b/tests/parse_es_requests.rs index d453392..8e84148 100644 --- a/tests/parse_es_requests.rs +++ b/tests/parse_es_requests.rs @@ -36,7 +36,9 @@ fn parse_search_bodies() { if let Some(body) = parts.body { println!("parsing: {}", path.display()); println!("BODY: {}", body); - let _parsed: SearchBody = serde_json::from_str(&body).unwrap(); + let parsed: SearchBody = serde_json::from_str(&body).unwrap(); + let raw_val: serde_json::Value = serde_json::from_str(&body).unwrap(); + assert_eq!(raw_val, serde_json::to_value(parsed).unwrap()); } } } |