Skip to content

Commit

Permalink
variety bug fix (imWildCat#142)
Browse files Browse the repository at this point in the history
  • Loading branch information
PaleNeutron committed Aug 20, 2021
1 parent 665bfc4 commit 81e0dee
Show file tree
Hide file tree
Showing 15 changed files with 191 additions and 41 deletions.
126 changes: 126 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.DS_Store

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
scylla/assets/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
.static_storage/
.media/
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# idea
.idea/

# vscode
.vscode

out/

*.db

# node
node_modules

*.bak

package-lock.json
scylla.db-journal

28 changes: 19 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
FROM python:3.9-slim as build
FROM node:lts-buster as node-build
WORKDIR /root
COPY package.json .
RUN npm install
COPY . .
RUN make assets-build

RUN apt-get update && apt-get install -y g++ gcc libxslt-dev make libcurl4-openssl-dev build-essential

FROM python:3.9-slim as python-build
RUN apt-get update && apt-get install -y g++ gcc libxslt-dev make libcurl4-openssl-dev build-essential
RUN apt-get install -y libssl-dev
WORKDIR /root

RUN mkdir -p /var/www/scylla
WORKDIR /var/www/scylla

RUN pip install scylla
COPY --from=node-build /root/scylla/assets ./scylla/assets
COPY requirements.txt .
RUN pip install -r requirements.txt
RUN python -m playwright install
COPY . .
RUN python setup.py install

FROM python:3.9-slim
FROM python:3.9-slim as prod

LABEL maintainer="WildCat <[email protected]>"

RUN apt-get update && apt-get install -y libxslt-dev libssl-dev libcurl4-openssl-dev

RUN apt-get install -y gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget
RUN apt-get install -y libgbm-dev gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget

COPY --from=build /usr/local/lib/python3.6/site-packages/ /usr/local/lib/python3.6/site-packages/
COPY --from=python-build /usr/local/lib/python3.9/site-packages/ /usr/local/lib/python3.9/site-packages/
COPY --from=python-build /root/.cache/ms-playwright /root/.cache/ms-playwright

WORKDIR /var/www/scylla
VOLUME /var/www/scylla
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile-alpine
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ WORKDIR /var/www/scylla

RUN pip install scylla

FROM python:3.9-alpine
FROM python:3.9-alpine as prod

LABEL maintainer="WildCat <[email protected]>"

RUN apk add --update --no-cache libxslt-dev

COPY --from=build /usr/local/lib/python3.6/site-packages/ /usr/local/lib/python3.6/site-packages/
COPY --from=build /usr/local/lib/python3.9/site-packages/ /usr/local/lib/python3.9/site-packages/

WORKDIR /var/www/scylla
VOLUME /var/www/scylla
Expand Down
12 changes: 12 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: '3.8'

services:
scylla:
build:
context: .
target: prod
volumes:
- /var/www/scylla:/var/www/scylla
ports:
- "8899:8899"
- "8081:8081"
16 changes: 8 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
tornado==5.0.2
peewee==3.2.2
tornado==5.1.1
peewee==3.14.4
requests==2.26.0
pycurl==7.43.0.1
sanic==20.12.2
sanic-cors==0.10.0.post3
schedule==0.5.0
six==1.11.0
playwright==1.9.2
pycurl==7.44.0
sanic==21.6.2
sanic-cors
schedule==1.1.0
six==1.16.0
playwright==1.13.1
pyquery==1.4.3
2 changes: 1 addition & 1 deletion scylla/providers/free_proxy_list_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def parse(self, document: PyQuery) -> [ProxyIP]:
ip_list: [ProxyIP] = []

for ip_row in document.find('#proxylisttable tbody tr'):
ip_row: PyQuery = ip_row
ip_row: PyQuery = PyQuery(ip_row)
ip_address: str = ip_row.find('td:nth-child(1)').text()
port: str = ip_row.find('td:nth-child(2)').text()

Expand Down
2 changes: 1 addition & 1 deletion scylla/providers/ipaddress_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def parse(self, document: PyQuery) -> [ProxyIP]:
ip_list: [ProxyIP] = []

for ip_row in document.find('.proxylist tbody tr'):
ip_row: PyQuery = ip_row
ip_row: PyQuery = PyQuery(ip_row)
ip_port: str = ip_row.find('td:nth-child(1)').text()
ip_address, port = ip_port.split(":")

Expand Down
15 changes: 7 additions & 8 deletions scylla/providers/proxy_list_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from scylla.database import ProxyIP
from scylla.worker import Worker
from .base_provider import BaseProvider

import urllib.parse

class ProxyListProvider(BaseProvider):

Expand Down Expand Up @@ -35,14 +35,13 @@ def parse(self, document: PyQuery) -> [ProxyIP]:
def urls(self) -> [str]:
ret = []
first_url = 'http://proxy-list.org/english/index.php?p=1'
sub = first_url[0:first_url.rfind('/')] # http://proxy-list.org/english
first_page = self.w.get_html(first_url, False)

ret.append(first_url)
for a in first_page.find('#content div.content div.table-menu a.item'):
relative_path = a.attrs['href']
absolute_url = sub + relative_path[relative_path.find('/'):]
ret.append(absolute_url)
if first_page:
ret.append(first_url)
for a in first_page.find('#content div.content div.table-menu a.item'):
relative_path = a.attrib['href']
absolute_url = urllib.parse.urljoin(first_url, relative_path)
ret.append(absolute_url)
return ret


Expand Down
2 changes: 1 addition & 1 deletion scylla/providers/proxy_scraper_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def parse(self, document: PyQuery) -> [ProxyIP]:
ip_list: [ProxyIP] = []

text = document.html()
json_object = json.load(text)
json_object = json.loads(text)
if not json_object or type(json_object['usproxy']) != list:
return ip_list

Expand Down
11 changes: 6 additions & 5 deletions scylla/providers/proxylists_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ def urls(self) -> [str]:
ret = set([])
country_url = 'http://www.proxylists.net/countries.html'
country_page = self.w.get_html(country_url, False)
for a in country_page.find('a'):
relative_path = a.attrs['href']
if self.country_patten.match(relative_path) :
ret.update(self.gen_url_for_country(self.country_patten.findall(relative_path)[0]))
break
if country_page:
for a in country_page.find('a'):
relative_path = a.attrib['href']
if self.country_patten.match(relative_path) :
ret.update(self.gen_url_for_country(self.country_patten.findall(relative_path)[0]))
break
return list(ret)

def gen_url_for_country(self, country) -> [str]:
Expand Down
2 changes: 1 addition & 1 deletion scylla/providers/spys_me_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def parse(self, document: PyQuery) -> [ProxyIP]:

text = document.html()

ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', text.decode('utf-8'))
ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', text)

for ip_port in ip_port_str_list:

Expand Down
6 changes: 3 additions & 3 deletions scylla/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def fetch_ips(q: Queue, validator_queue: Queue):

while True:
try:
provider: BaseProvider = q.get()
provider: BaseProvider = q.get()()

provider_name = provider.__class__.__name__

Expand All @@ -32,7 +32,7 @@ def fetch_ips(q: Queue, validator_queue: Queue):
try:
html = worker.get_html(url, render_js=provider.should_render_js())
except Exception as e:
logger.error("worker.get_html failed: ", e)
logger.error("worker.get_html failed: %s", e)
continue

if html:
Expand Down Expand Up @@ -157,7 +157,7 @@ def feed_providers(self):
logger.debug('feed {} providers...'.format(len(all_providers)))

for provider in all_providers:
self.worker_queue.put(provider())
self.worker_queue.put(provider)

def stop(self):
self.worker_queue.close()
Expand Down
2 changes: 1 addition & 1 deletion scylla/web/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

base_path = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir))

app.static('/assets/*', base_path + '/assets')
app.static('/assets', base_path + '/assets')
app.static('/', base_path + '/assets/index.html')
app.static('/*', base_path + '/assets/index.html')

Expand Down
2 changes: 1 addition & 1 deletion tests/requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ pytest==6.2.2
pytest-cov==2.11.1
pytest-forked==1.3.0
pytest-mock==3.5.1
pytest-sanic==1.7.0
pytest-sanic==1.8.1
pytest-xdist==2.2.1
typing==3.7.4.3
2 changes: 2 additions & 0 deletions tests/web/server_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import pytest
from requests import Response
from sanic.websocket import WebSocketProtocol
from sanic import Sanic

from scylla.web.server import app
from ..database_test import create_test_ip, delete_test_ip, delete_test_ips, gen_random_ip, ProxyIP

COUNTRIES = ['CN', 'US', 'GB']

Sanic.test_mode = True

@pytest.fixture
def web_app():
Expand Down

0 comments on commit 81e0dee

Please sign in to comment.