The code below is rather long and might be just for our use case but the basic idea is the following:
Find out date interval length and additional needed filtering to never run into the "more the 2'000" limit. In my case I could have weekly date range filter but would need to apply some additional filters
Then run it like this:
report_id = '00O4…'
sf = SalesforceReport(user, pass, token, report_id)
it = sf.iterate_over_dates_and_filters(datetime.date(2020,2,1),
'Invoice__c.InvoiceDate__c', 'Opportunity.CustomField__c',
[('a', 'startswith'), ('b', 'startswith'), …])
for row in it:
# do something with the dict
The iterator goes through every week (if you need daily iterators or monthly then you'd need to change the code, but the change should be minimal) since 2020-02-01 and applies the filter CustomField__c.startswith('a'), then CustomField__c.startswith('b'), … and acts as a generator so you don't need to mess with the filter cycling yourself.
The iterator throws an Exception if there's a query which returns more than 2000 rows, just to be sure that the data is not incomplete.
One warning here: SF has a limit of max 500 queries per hour. Say if you have one year with 52 weeks and 10 additional filters you'd already run into that limit.
Here's the class (relies on simple_salesforce
)
import simple_salesforce
import json
import datetime
"""
helper class to iterate over salesforce report data
and manouvering around the 2000 max limit
"""
class SalesforceReport(simple_salesforce.Salesforce):
def __init__(self, username, password, security_token, report_id):
super(SalesforceReport, self).__init__(username=username, password=password, security_token=security_token)
self.report_id = report_id
self._fetch_describe()
def _fetch_describe(self):
url = f'{self.base_url}analytics/reports/{self.report_id}/describe'
result = self._call_salesforce('GET', url)
self.filters = dict(result.json()['reportMetadata'])
def apply_report_filter(self, column, operator, value, replace=True):
"""
adds/replaces filter, example:
apply_report_filter('Opportunity.InsertionId__c', 'startsWith', 'hbob').
For date filters use apply_standard_date_filter.
column: needs to correspond to a column in your report, AND the report
needs to have this filter configured (so in the UI the filter
can be applied)
operator: equals, notEqual, lessThan, greaterThan, lessOrEqual,
greaterOrEqual, contains, notContain, startsWith, includes
see https://sforce.co/2Tb5SrS for up to date list
value: value as a string
replace: if set to True, then if there's already a restriction on column
this restriction will be replaced, otherwise it's added additionally
"""
filters = self.filters['reportFilters']
if replace:
filters = [f for f in filters if not f['column'] == column]
filters.append(dict(
column=column,
isRunPageEditable=True,
operator=operator,
value=value))
self.filters['reportFilters'] = filters
def apply_standard_date_filter(self, column, startDate, endDate):
"""
replace date filter. The date filter needs to be available as a filter in the
UI already
Example: apply_standard_date_filter('Invoice__c.InvoiceDate__c', d_from, d_to)
column: needs to correspond to a column in your report
startDate, endDate: instance of datetime.date
"""
self.filters['standardDateFilter'] = dict(
column=column,
durationValue='CUSTOM',
startDate=startDate.strftime('%Y-%m-%d'),
endDate=endDate.strftime('%Y-%m-%d')
)
def query_report(self):
"""
return generator which yields one report row as dict at a time
"""
url = self.base_url + f"analytics/reports/query"
result = self._call_salesforce('POST', url, data=json.dumps(dict(reportMetadata=self.filters)))
r = result.json()
columns = r['reportMetadata']['detailColumns']
if not r['allData']:
raise Exception('got more than 2000 rows! Quitting as data would be incomplete')
for row in r['factMap']['T!T']['rows']:
values = []
for c in row['dataCells']:
t = type(c['value'])
if t == str or t == type(None) or t == int:
values.append(c['value'])
elif t == dict and 'amount' in c['value']:
values.append(c['value']['amount'])
else:
print(f"don't know how to handle {c}")
values.append(c['value'])
yield dict(zip(columns, values))
def iterate_over_dates_and_filters(self, startDate, date_column, filter_column, filter_tuples):
"""
return generator which iterates over every week and applies the filters
each for column
"""
date_runner = startDate
while True:
print(date_runner)
self.apply_standard_date_filter(date_column, date_runner, date_runner + datetime.timedelta(days=6))
for val, op in filter_tuples:
print(val)
self.apply_report_filter(filter_column, op, val)
for row in self.query_report():
yield row
date_runner += datetime.timedelta(days=7)
if date_runner > datetime.date.today():
break