-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Guest Token fetching for twint run in AWS using proxy #1146
base: master
Are you sure you want to change the base?
Conversation
…er sothat twitter does not block requests from AWS
Example Usage:
|
Hello, i got this error with your new update;
|
if fix this issue with replacing
|
Same error after using your fork of twint. import twint
c = twint.Config()
c.Username = 'nytimes'
c.Proxy_host = '127.0.0.1'
c.Proxy_port = '7890'
c.Proxy_type = 'http'
twint.run.Lookup(c) Error:
I am sure my http proxy works well, because I have run |
Check if you local proxy is working or not using this code. If not use other proxies.
|
I can not access twitter by using this code regardless of which proxy I use. But I can visit twitter on Firefox with any of my proxies. |
This commit is error when I test it. Here is my code: token.pyimport re
import time
import logging as logme
import requests
class TokenExpiryException(Exception):
def __init__(self, msg):
super().__init__(msg)
class RefreshTokenException(Exception):
def __init__(self, msg):
super().__init__(msg)
class Token:
def __init__(self, config):
self._session = requests.Session()
self._session.headers.update(
{'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'})
self.config = config
self._proxies = self._set_proxies()
self._retries = 5
self._timeout = 10
self.url = 'https://twitter.com'
def _set_proxies(self) -> dict:
settings = [self.config.Proxy_type, self.config.Proxy_host, self.config.Proxy_port]
if not all(settings):
logme.debug(f"No proxy in config")
return {}
proxy_type = self.config.Proxy_type.lower()
proxy_val = f"{self.config.Proxy_host}:{self.config.Proxy_port}"
proxies = {proxy_type: proxy_val}
if proxy_type == 'http':
proxies['https'] = proxy_val
return proxies
def _request(self):
for attempt in range(self._retries + 1):
# The request is newly prepared on each retry because of potential cookie updates.
req = self._session.prepare_request(requests.Request('GET', self.url))
logme.debug(f'Retrieving {req.url}')
try:
if self._proxies:
r = self._session.send(
req,
allow_redirects=True,
timeout=self._timeout,
proxies=self._proxies,
verify=False
)
else:
r = self._session.send(req, allow_redirects=True, timeout=self._timeout)
except requests.exceptions.RequestException as exc:
if attempt < self._retries:
retrying = ', retrying'
level = logme.WARNING
else:
retrying = ''
level = logme.ERROR
logme.log(level, f'Error retrieving {req.url}: {exc!r}{retrying}')
else:
success, msg = (True, None)
msg = f': {msg}' if msg else ''
if success:
logme.debug(f'{req.url} retrieved successfully{msg}')
return r
if attempt < self._retries:
# TODO : might wanna tweak this back-off timer
sleep_time = 2.0 * 2 ** attempt
logme.info(f'Waiting {sleep_time:.0f} seconds')
time.sleep(sleep_time)
else:
msg = f'{self._retries + 1} requests to {self.url} failed, giving up.'
logme.fatal(msg)
self.config.Guest_token = None
raise RefreshTokenException(msg)
def refresh(self):
logme.debug('Retrieving guest token')
res = self._request()
match = re.search(r'\("gt=(\d+);', res.text)
if match:
logme.debug('Found guest token in HTML')
self.config.Guest_token = str(match.group(1))
else:
self.config.Guest_token = None
raise RefreshTokenException('Could not find the Guest token in HTML') |
The last commit is correct. You can check the changes in the PR. |
twitter is https change to this,add https proxy,Is work. bug twint has is bug.
|
use the proxy host, port and proxy type from config to use proxy server so that twitter does not block requests from AWS