--- a/mozautoeslib/eslib.py
+++ b/mozautoeslib/eslib.py
@@ -1,13 +1,13 @@
import logging
# logging.basicConfig(filename='mozautoeslib.log', level=logging.DEBUG)
from pyes import *
-from pyes.facets import QueryFacet
+from pyes.facets import QueryFacet, StatisticalFacet
class ESLib(object):
"""Class with convenience methods for making common types of
ElasticSearch queries.
"""
def __init__(self, server, index, doc_type=None):
"""Initialize an ESLib object with server address, index, and doc_type.
@@ -75,19 +75,24 @@ class ESLib(object):
else:
boolquery.add_must_not(query)
def _make_bool_query(self, include={}, exclude={}, sort=None):
"""Generate a simple bool query to include fields in 'include', and
exclude fields in 'exclude'.
"""
+ if not include and not exclude:
+ return MatchAllQuery()
+
boolquery = BoolQuery()
- self._add_fieldlist_to_boolquery(boolquery, include, True)
- self._add_fieldlist_to_boolquery(boolquery, exclude, False)
+ if include:
+ self._add_fieldlist_to_boolquery(boolquery, include, True)
+ if exclude:
+ self._add_fieldlist_to_boolquery(boolquery, exclude, False)
if sort:
boolquery.sort = sort
return boolquery
def ORQuery(self, ORItems, size=10000, doc_type=None, useFieldQueries=False):
"""Return a list of hits that match any of the combination of terms
specified in the ORItems list of dicts.
@@ -252,16 +257,35 @@ class ESLib(object):
indexes=[self.read_index],
doc_types=self.doc_type)
if 'facets' in result:
return result['facets']
raise Exception("Key 'facets' not found in response data")
+ def statisticalQuery(self, include={}, exclude={}, doc_type=None, fields=None):
+ if doc_type:
+ self.doc_type = doc_type
+
+ masterquery = self._make_bool_query(include, exclude)
+ q = Search(query=masterquery, size=0)
+
+ for field in fields:
+ q.facet.facets.append(StatisticalFacet(name=field, field=field))
+
+ result = self.connection.search(query=q,
+ indexes=[self.read_index],
+ doc_types=self.doc_type)
+
+ if 'facets' in result:
+ return result['facets']
+
+ raise Exception("Key 'facets' not found in response data")
+
def frequency(self, include={}, exclude={}, frequency_fields=[], size=30000, doc_type=None):
"""Return a count of the 'size' most frequent terms that are produced
by a query.
For example, the following produces a list of the top 50 bugs
in a date range:
result = eslib.frequency(include = {