--- a/mozautoeslib/eslib.py
+++ b/mozautoeslib/eslib.py
@@ -125,18 +125,16 @@ class ESLib(object):
doc_types=self.doc_type)
if result and result['hits'] and result['hits']['hits']:
# partially flatten the data
for hit in result['hits']['hits']:
if not '_source' in hit:
raise Exception("Key ['_source'] not found in response hit")
resultlist.append(hit['_source'])
- else:
- raise Exception("Key ['hits']['hits'] not found in response data")
return resultlist
def query(self, include={}, exclude={}, size=None, doc_type=None, sort=None,
withSource=False):
"""Return a list of hits which match all the fields in 'include',
but none of the fields in 'exclude', up to a maximum of 'size' hits,
or all hits when 'size' is None.
@@ -171,33 +169,37 @@ class ESLib(object):
if not 'count' in count:
raise Exception("Key ['count'] not found in count response data")
query_size = count['count']
# there's no data to return, so don't bother searching
if query_size == 0:
return []
- q = Search(query=boolquery, sort=sort, size=query_size)
- result = self.connection.search(query=q,
- indexes=[self.read_index],
- doc_types=self.doc_type)
- #print json.dumps(result, indent=2)
+ chunk_size = 2000
+ for x in range(0,(query_size-1)/chunk_size + 1):
+ start = x * chunk_size
+ this_size = query_size - x*chunk_size if query_size - x*chunk_size < chunk_size else chunk_size
+ if this_size > 0:
+ q = Search(query=boolquery, sort=sort, size=this_size, start=start)
+ result = self.connection.search(query=q,
+ indexes=[self.read_index],
+ doc_types=self.doc_type)
- if result and result['hits'] and result['hits']['hits']:
- # partially flatten the data
- for hit in result['hits']['hits']:
- if withSource:
- resultlist.append(hit)
+ if result and result['hits'] and result['hits']['hits']:
+ # partially flatten the data
+ for hit in result['hits']['hits']:
+ if withSource:
+ resultlist.append(hit)
+ else:
+ if not '_source' in hit:
+ raise Exception("Key ['_source'] not found in response hit")
+ resultlist.append(hit['_source'])
else:
- if not '_source' in hit:
- raise Exception("Key ['_source'] not found in response hit")
- resultlist.append(hit['_source'])
- else:
- raise Exception("Key ['hits']['hits'] not found in response data")
+ raise Exception("Key ['hits']['hits'] not found in response data")
return resultlist
def aggregates(self, include={}, exclude={}, aggregate_by={}, doc_type=None):
"""Return a count of hits that match all possible combinations of fields
in aggregate_by.
Example: