From 2a734c61fe74febc90c26bb62a1766609eda4a08 Mon Sep 17 00:00:00 2001 From: Harmon Date: Mon, 11 Nov 2019 06:22:05 -0600 Subject: [PATCH 1/2] Add API.search_30_day and API.search_full_archive --- docs/api.rst | 99 +++++++++++++++++++++++++++++++++++++++++++++++- tweepy/api.py | 28 ++++++++++++++ tweepy/models.py | 20 ++++++---- 3 files changed, 137 insertions(+), 10 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 6c9e9c0bc..e5c283c5f 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -713,8 +713,8 @@ Saved Searches Methods :rtype: :class:`SavedSearch` object -Help Methods ------------- +Search Methods +-------------- .. method:: API.search(q, [geocode], [lang], [locale], [result_type], \ [count], [until], [since_id], [max_id], \ @@ -768,6 +768,101 @@ Help Methods :rtype: :class:`SearchResults` object +.. method:: API.search_30_day(environment_name, query, [tag], [fromDate], \ + [toDate], [maxResults], [next]) + + Premium search that provides Tweets posted within the last 30 days. + + :param environment_name: The (case-sensitive) label associated with your + search developer environment, as displayed at + https://developer.twitter.com/en/account/environments. + :param query: The equivalent of one premium rule/filter, with up to 1,024 + characters (256 with Sandbox dev environments). + This parameter should include ALL portions of the rule/filter, including + all operators, and portions of the rule should not be separated into + other parameters of the query. + :param tag: Tags can be used to segregate rules and their matching data into + different logical groups. If a rule tag is provided, the rule tag is + included in the 'matching_rules' attribute. + It is recommended to assign rule-specific UUIDs to rule tags and maintain + desired mappings on the client side. + :param fromDate: The oldest UTC timestamp (from most recent 30 days) from + which the Tweets will be provided. Timestamp is in minute granularity and + is inclusive (i.e. 12:00 includes the 00 minute). + Specified: Using only the fromDate with no toDate parameter will deliver + results for the query going back in time from now( ) until the fromDate. + Not Specified: If a fromDate is not specified, the API will deliver all + of the results for 30 days prior to now( ) or the toDate (if specified). + If neither the fromDate or toDate parameter is used, the API will deliver + all results for the most recent 30 days, starting at the time of the + request, going backwards. + :param toDate: The latest, most recent UTC timestamp to which the Tweets + will be provided. Timestamp is in minute granularity and is not inclusive + (i.e. 11:59 does not include the 59th minute of the hour). + Specified: Using only the toDate with no fromDate parameter will deliver + the most recent 30 days of data prior to the toDate. + Not Specified: If a toDate is not specified, the API will deliver all of + the results from now( ) for the query going back in time to the fromDate. + If neither the fromDate or toDate parameter is used, the API will deliver + all results for the entire 30-day index, starting at the time of the + request, going backwards. + :param maxResults: The maximum number of search results to be returned by a + request. A number between 10 and the system limit (currently 500, 100 for + Sandbox environments). By default, a request response will return 100 + results. + :param next: This parameter is used to get the next 'page' of results. The + value used with the parameter is pulled directly from the response + provided by the API, and should not be modified. + + +.. method:: API.search_full_archive(environment_name, query, [tag], \ + [fromDate], [toDate], [maxResults], [next]) + + Premium search that provides Tweets from as early as 2006, starting with the + first Tweet posted in March 2006. + + :param environment_name: The (case-sensitive) label associated with your + search developer environment, as displayed at + https://developer.twitter.com/en/account/environments. + :param query: The equivalent of one premium rule/filter, with up to 1,024 + characters (256 with Sandbox dev environments). + This parameter should include ALL portions of the rule/filter, including + all operators, and portions of the rule should not be separated into + other parameters of the query. + :param tag: Tags can be used to segregate rules and their matching data into + different logical groups. If a rule tag is provided, the rule tag is + included in the 'matching_rules' attribute. + It is recommended to assign rule-specific UUIDs to rule tags and maintain + desired mappings on the client side. + :param fromDate: The oldest UTC timestamp (from most recent 30 days) from + which the Tweets will be provided. Timestamp is in minute granularity and + is inclusive (i.e. 12:00 includes the 00 minute). + Specified: Using only the fromDate with no toDate parameter will deliver + results for the query going back in time from now( ) until the fromDate. + Not Specified: If a fromDate is not specified, the API will deliver all + of the results for 30 days prior to now( ) or the toDate (if specified). + If neither the fromDate or toDate parameter is used, the API will deliver + all results for the most recent 30 days, starting at the time of the + request, going backwards. + :param toDate: The latest, most recent UTC timestamp to which the Tweets + will be provided. Timestamp is in minute granularity and is not inclusive + (i.e. 11:59 does not include the 59th minute of the hour). + Specified: Using only the toDate with no fromDate parameter will deliver + the most recent 30 days of data prior to the toDate. + Not Specified: If a toDate is not specified, the API will deliver all of + the results from now( ) for the query going back in time to the fromDate. + If neither the fromDate or toDate parameter is used, the API will deliver + all results for the entire 30-day index, starting at the time of the + request, going backwards. + :param maxResults: The maximum number of search results to be returned by a + request. A number between 10 and the system limit (currently 500, 100 for + Sandbox environments). By default, a request response will return 100 + results. + :param next: This parameter is used to get the next 'page' of results. The + value used with the parameter is pulled directly from the response + provided by the API, and should not be modified. + + List Methods ------------ diff --git a/tweepy/api.py b/tweepy/api.py index c8551a3b6..eb442e1f1 100644 --- a/tweepy/api.py +++ b/tweepy/api.py @@ -1279,6 +1279,34 @@ def search(self): 'max_id', 'until', 'result_type', 'count', 'include_entities'] ) + + def search_30_day(self, environment_name, *args, **kwargs): + """ :reference: https://developer.twitter.com/en/docs/tweets/search/api-reference/premium-search + :allowed_param: 'query', 'tag', 'fromDate', 'toDate', 'maxResults', + 'next' + """ + return bind_api( + api=self, + path='/tweets/search/30day/{}.json'.format(environment_name), + payload_type='status', payload_list=True, + allowed_param=['query', 'tag', 'fromDate', 'toDate', 'maxResults', + 'next'], + require_auth=True + )(*args, **kwargs) + + def search_full_archive(self, environment_name, *args, **kwargs): + """ :reference: https://developer.twitter.com/en/docs/tweets/search/api-reference/premium-search + :allowed_param: 'query', 'tag', 'fromDate', 'toDate', 'maxResults', + 'next' + """ + return bind_api( + api=self, + path='/tweets/search/fullarchive/{}.json'.format(environment_name), + payload_type='status', payload_list=True, + allowed_param=['query', 'tag', 'fromDate', 'toDate', 'maxResults', + 'next'], + require_auth=True + )(*args, **kwargs) @property def reverse_geocode(self): diff --git a/tweepy/models.py b/tweepy/models.py index 7c33d8f52..5cea45299 100644 --- a/tweepy/models.py +++ b/tweepy/models.py @@ -61,14 +61,18 @@ def parse_list(cls, api, json_list): """ results = ResultSet() - # Handle map parameter for statuses/lookup - if isinstance(json_list, dict) and 'id' in json_list: - for _id, obj in json_list['id'].items(): - if obj: - results.append(cls.parse(api, obj)) - else: - results.append(cls.parse(api, {'id': int(_id)})) - return results + if isinstance(json_list, dict): + # Handle map parameter for statuses/lookup + if 'id' in json_list: + for _id, obj in json_list['id'].items(): + if obj: + results.append(cls.parse(api, obj)) + else: + results.append(cls.parse(api, {'id': int(_id)})) + return results + # Handle premium search + if 'results' in json_list: + json_list = json_list['results'] for obj in json_list: if obj: From cb3f22af66ddf9ddca3b57ff0d6d24a51902ff42 Mon Sep 17 00:00:00 2001 From: Harmon Date: Thu, 5 Dec 2019 20:12:19 -0600 Subject: [PATCH 2/2] Add cursor support for API.search_30_day and API.search_full_archive Adds pagination decorator and NextIterator --- tweepy/api.py | 4 +++- tweepy/binder.py | 10 +++++++++- tweepy/cursor.py | 24 ++++++++++++++++++++++++ tweepy/parsers.py | 4 +++- 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/tweepy/api.py b/tweepy/api.py index eb442e1f1..1836ee33a 100644 --- a/tweepy/api.py +++ b/tweepy/api.py @@ -7,7 +7,7 @@ import six -from tweepy.binder import bind_api +from tweepy.binder import bind_api, pagination from tweepy.error import TweepError from tweepy.parsers import ModelParser, Parser from tweepy.utils import list_to_csv @@ -1280,6 +1280,7 @@ def search(self): 'include_entities'] ) + @pagination(mode='next') def search_30_day(self, environment_name, *args, **kwargs): """ :reference: https://developer.twitter.com/en/docs/tweets/search/api-reference/premium-search :allowed_param: 'query', 'tag', 'fromDate', 'toDate', 'maxResults', @@ -1294,6 +1295,7 @@ def search_30_day(self, environment_name, *args, **kwargs): require_auth=True )(*args, **kwargs) + @pagination(mode='next') def search_full_archive(self, environment_name, *args, **kwargs): """ :reference: https://developer.twitter.com/en/docs/tweets/search/api-reference/premium-search :allowed_param: 'query', 'tag', 'fromDate', 'toDate', 'maxResults', diff --git a/tweepy/binder.py b/tweepy/binder.py index 846cfbf35..88f98a492 100644 --- a/tweepy/binder.py +++ b/tweepy/binder.py @@ -234,7 +234,8 @@ def execute(self): raise TweepError(error_msg, resp, api_code=api_error_code) # Parse the response payload - self.return_cursors = self.return_cursors or 'cursor' in self.session.params + self.return_cursors = (self.return_cursors or + 'cursor' in self.session.params or 'next' in self.session.params) result = self.parser.parse(self, resp.text, return_cursors=self.return_cursors) # Store result into cache if one is available. @@ -266,3 +267,10 @@ def _call(*args, **kwargs): _call.pagination_mode = 'page' return _call + + +def pagination(mode): + def decorator(method): + method.pagination_mode = mode + return method + return decorator diff --git a/tweepy/cursor.py b/tweepy/cursor.py index 2a3d950ea..fe76fc53f 100644 --- a/tweepy/cursor.py +++ b/tweepy/cursor.py @@ -17,6 +17,8 @@ def __init__(self, method, *args, **kwargs): self.iterator = DMCursorIterator(method, *args, **kwargs) elif method.pagination_mode == 'id': self.iterator = IdIterator(method, *args, **kwargs) + elif method.pagination_mode == "next": + self.iterator = NextIterator(method, *args, **kwargs) elif method.pagination_mode == 'page': self.iterator = PageIterator(method, *args, **kwargs) else: @@ -201,6 +203,28 @@ def prev(self): return self.method(page=self.current_page, *self.args, **self.kwargs) +class NextIterator(BaseIterator): + + def __init__(self, method, *args, **kwargs): + BaseIterator.__init__(self, method, *args, **kwargs) + self.next_token = self.kwargs.pop('next', None) + self.page_count = 0 + + def next(self): + if self.next_token == -1 or (self.limit and self.page_count == self.limit): + raise StopIteration + data = self.method(next=self.next_token, return_cursors=True, *self.args, **self.kwargs) + self.page_count += 1 + if isinstance(data, tuple): + data, self.next_token = data + else: + self.next_token = -1 + return data + + def prev(self): + raise TweepError('This method does not allow backwards pagination') + + class ItemIterator(BaseIterator): def __init__(self, page_iterator): diff --git a/tweepy/parsers.py b/tweepy/parsers.py index 7d09f636d..a047fc4a3 100644 --- a/tweepy/parsers.py +++ b/tweepy/parsers.py @@ -50,7 +50,9 @@ def parse(self, method, payload, return_cursors=False): raise TweepError('Failed to parse JSON payload: %s' % e) if return_cursors and isinstance(json, dict): - if 'next_cursor' in json: + if 'next' in json: + return json, json['next'] + elif 'next_cursor' in json: if 'previous_cursor' in json: cursors = json['previous_cursor'], json['next_cursor'] return json, cursors