Skip to content
This repository was archived by the owner on Jun 10, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions pyspider/libs/samples/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- encoding: utf-8 -*-
import datetime
from jinja2 import Template


def get_sample_task():
from .task import default_task
return default_task


def get_sample_handler(project, start_url=None, date=None):
from pyspider.libs.samples import handler
import inspect
source = inspect.getsource(handler)
tp = Template(source)
if date is None:
date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if not start_url:
start_url = '__START_URL__'
res = tp.render(DATE=date, PROJECT_NAME=project, START_URL=start_url)
return res
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on __DATE__
# Project: __PROJECT_NAME__
# Created on {{DATE}}
# Project: {{PROJECT_NAME}}

from pyspider.libs.base_handler import *

Expand All @@ -12,7 +12,7 @@ class Handler(BaseHandler):

@every(minutes=24 * 60)
def on_start(self):
self.crawl('__START_URL__', callback=self.index_page)
self.crawl('{{START_URL}}', callback=self.index_page)

@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
Expand Down
10 changes: 10 additions & 0 deletions pyspider/libs/samples/task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# -*- encoding: utf-8 -*-

default_task = {
'taskid': 'data:,on_start',
'project': '',
'url': 'data:,on_start',
'process': {
'callback': 'on_start',
}
}
1 change: 1 addition & 0 deletions pyspider/processor/project_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import inspect
import traceback
import linecache

from pyspider.libs import utils
from pyspider.libs.log import SaveLogHandler, LogFormatter
logger = logging.getLogger("processor")
Expand Down
2 changes: 1 addition & 1 deletion pyspider/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def result_worker(ctx, result_cls, get_object=False):
@click.option('--password', envvar='WEBUI_PASSWORD',
help='password of lock -ed projects')
@click.option('--need-auth', is_flag=True, default=False, help='need username and password')
@click.option('--webui-instance', default='pyspider.webui.app.app', callback=load_cls,
@click.option('--webui-instance', default='pyspider.webui.app', callback=load_cls,
help='webui Flask Application instance to be used.')
@click.option('--process-time-limit', default=30, help='script process time limit in debug')
@click.pass_context
Expand Down
130 changes: 129 additions & 1 deletion pyspider/webui/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,132 @@
# http://binux.me
# Created on 2014-02-22 23:20:40

from . import app, index, debug, task, result, login
import os
import sys

import logging
from importlib import import_module
from flask import current_app
from flask import Blueprint, Response
from werkzeug.exceptions import Unauthorized

from pyspider.libs import utils
from pyspider.processor.project_module import ProjectFinder
from .app import QuitableFlask
from pyspider.fetcher import tornado_fetcher
from ._compat import builtins, urljoin, reraise

path = os.path

base_dir = path.dirname(__file__)

logger = logging.getLogger("webui")


def full_path(p):
return path.join(base_dir, p)

if os.name == 'nt':
import mimetypes
mimetypes.add_type("text/css", ".css", True)


def _fetch(url):
return tornado_fetcher.Fetcher(None, None, async=False).fetch(url)


def init_config(app):
app.config.update({
'fetch': _fetch,
'taskdb': None,
'projectdb': None,
'scheduler_rpc': None,
'queues': dict(),
'process_time_limit': 30,
'login_response': Response("need auth.", 401, {'WWW-Authenticate': 'Basic realm="Login Required"'})
})


def init_jinja(app):
app.jinja_env.line_statement_prefix = '#'
app.jinja_env.globals.update(builtins.__dict__)
app.template_filter('format_date')(utils.format_date)


def init_session(app):
app.secret_key = os.urandom(24)


def init_view(app):
bp_modules = ('debug', 'task', 'index', 'bench_test', 'result')
for bp_module in bp_modules:
module = '.view.%s' % bp_module
module_instance = import_module(module, __name__)
bp = getattr(module_instance, 'bp')
if bp and isinstance(bp, Blueprint):
app.register_blueprint(bp)


def cdn_url_handler(error, endpoint, kwargs):
if endpoint == 'cdn':
path = kwargs.pop('path')
# cdn = app.config.get('cdn', 'http://cdn.staticfile.org/')
# cdn = app.config.get('cdn', '//cdnjs.cloudflare.com/ajax/libs/')
cdn = current_app.config.get('cdn', '//cdnjscn.b0.upaiyun.com/libs/')
return urljoin(cdn, path)
else:
exc_type, exc_value, tb = sys.exc_info()
if exc_value is error:
reraise(exc_type, exc_value, tb)
else:
raise error


def init_url_handler(app):
app.handle_url_build_error = cdn_url_handler


def init_login(app):
from .login import login_manager
login_manager.init_app(app)


from ._compat import login

@app.before_request
def before_request():
config = current_app.config
if config.get('need_auth', True):
if not login.current_user.is_active():
return config['login_response']


def init_project_import(app):
sys.meta_path.append(ProjectFinder(app.config['projectdb']))


def init_webdav(app):
try:
from .webdav import init_webdav
init_webdav(app)
except ImportError as e:
logger.warning('WebDav interface not enabled: %r', e)


def create_app():
static_folder = full_path('static')
template_folder = full_path('templates')
app = QuitableFlask(__name__,
static_folder=static_folder,
template_folder=template_folder)
init_config(app)
init_jinja(app)
init_session(app)
init_view(app)
init_url_handler(app)
init_login(app)
init_project_import(app)
return app


app = create_app()
16 changes: 16 additions & 0 deletions pyspider/webui/_compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-

from six import reraise
from six.moves import builtins
from six.moves.urllib.parse import urljoin
from six import iteritems, itervalues

try:
import flask_login as login
except ImportError:
from flask.ext import login

try:
from urllib import urlencode
except ImportError:
from urllib.parse import urlencode
57 changes: 7 additions & 50 deletions pyspider/webui/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,12 @@
# http://binux.me
# Created on 2014-02-22 23:17:13

import os
import sys
import logging
logger = logging.getLogger("webui")

from six import reraise
from six.moves import builtins
from six.moves.urllib.parse import urljoin
from flask import Flask
from pyspider.fetcher import tornado_fetcher
from werkzeug.wsgi import DispatcherMiddleware

if os.name == 'nt':
import mimetypes
mimetypes.add_type("text/css", ".css", True)
logger = logging.getLogger("webui")


class QuitableFlask(Flask):
Expand Down Expand Up @@ -56,15 +48,13 @@ def run(self, host=None, port=None, debug=None, **options):
application = DebuggedApplication(application, True)

try:
from .webdav import dav_app
except ImportError as e:
logger.warning('WebDav interface not enabled: %r', e)
dav_app = None
if dav_app:
from werkzeug.wsgi import DispatcherMiddleware
from .webdav import init_webdav
dev_app = init_webdav(self)
application = DispatcherMiddleware(application, {
'/dav': dav_app
'/dav': dev_app
})
except ImportError as e:
pass

container = tornado.wsgi.WSGIContainer(application)
self.http_server = tornado.httpserver.HTTPServer(container)
Expand All @@ -82,36 +72,3 @@ def quit(self):
self.ioloop.add_callback(self.http_server.stop)
self.ioloop.add_callback(self.ioloop.stop)
self.logger.info('webui exiting...')


app = QuitableFlask('webui',
static_folder=os.path.join(os.path.dirname(__file__), 'static'),
template_folder=os.path.join(os.path.dirname(__file__), 'templates'))
app.secret_key = os.urandom(24)
app.jinja_env.line_statement_prefix = '#'
app.jinja_env.globals.update(builtins.__dict__)

app.config.update({
'fetch': lambda x: tornado_fetcher.Fetcher(None, None, async=False).fetch(x),
'taskdb': None,
'projectdb': None,
'scheduler_rpc': None,
'queues': dict(),
'process_time_limit': 30,
})


def cdn_url_handler(error, endpoint, kwargs):
if endpoint == 'cdn':
path = kwargs.pop('path')
# cdn = app.config.get('cdn', 'http://cdn.staticfile.org/')
# cdn = app.config.get('cdn', '//cdnjs.cloudflare.com/ajax/libs/')
cdn = app.config.get('cdn', '//cdnjscn.b0.upaiyun.com/libs/')
return urljoin(cdn, path)
else:
exc_type, exc_value, tb = sys.exc_info()
if exc_value is error:
reraise(exc_type, exc_value, tb)
else:
raise error
app.handle_url_build_error = cdn_url_handler
31 changes: 0 additions & 31 deletions pyspider/webui/bench_test.py

This file was deleted.

Loading