Scrapyd 改进第二步: Web Interface 添加 STOP 和 START 超链接, 一键调用 Scrapyd API

2021-03-29 02:24

阅读:389

YPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">

标签:running   效果   home   避免   runner   load   sele   collect   root   

0.提出问题

Scrapyd 提供的开始和结束项目的API如下,参考 Scrapyd 改进第一步: Web Interface 添加 charset=UTF-8, 避免查看 log 出现中文乱码,准备继续在页面上进一步添加 START 和 STOP 超链接。

 

http://scrapyd.readthedocs.io/en/stable/api.html#schedule-json

Example request:

$ curl http://localhost:6800/schedule.json -d project=myproject -d spider=somespider

Example response:

{"status": "ok", "jobid": "6487ec79947edab326d6db28a2d86511e8247444"}

http://scrapyd.readthedocs.io/en/stable/api.html#cancel-json

Example request:

$ curl http://localhost:6800/cancel.json -d project=myproject -d job=6487ec79947edab326d6db28a2d86511e8247444

Example response:

{"status": "ok", "prevstate": "running"}

 

1.解决思路

尝试直接通过浏览器地址栏 GET 请求页面 http://localhost:6800/schedule.json?project=myproject&spider=somespider

返回提示需要使用 POST 请求

{"node_name": "pi-desktop", "status": "error", "message": "Expected one of [b‘HEAD‘, b‘object‘, b‘POST‘]"}

那就继续通过 URL 查询对传参,通过 JS 发起 POST 异步请求

 

2.修改 Scrapyd 代码

/site-packages/scrapyd/website.py

改动位置:

(1) table 添加最后两列,分别用于 UTF-8 和 STOP/START 超链接,见红色代码

    def render(self, txrequest):
        cols = 10 ######## 8
        s = "

Scrapyd"
        s += ""
        s += "

Jobs

" s += "

Go back

" s += "" s += ""if self.local_items: s += "" #cols = 9 ######## cols += 1 ########

 

(2) 有两处需要添加 UTF-8 超链接,分别对应 Running 和 Finished,见红色代码

前面 Running 部分添加 UTF-8 超链接后继续添加 STOP 超链接,见蓝色代码

            s += "
" % (p.project, p.spider, p.job) s += "" % (p.project, p.spider, p.job) ######## s += "" % (p.project, p.job) ########

 

后面 Finished 部分添加 UTF-8 超链接后继续添加 START 超链接,见蓝色代码

            s += "
" % (p.project, p.spider, p.job) s += "" % (p.project, p.spider, p.job) ######## s += "" % (p.project, p.spider) ########

(3) 完整代码

技术分享图片技术分享图片
from datetime import datetime

import socket

from twisted.web import resource, static
from twisted.application.service import IServiceCollection

from scrapy.utils.misc import load_object

from .interfaces import IPoller, IEggStorage, ISpiderScheduler

from six.moves.urllib.parse import urlparse

class Root(resource.Resource):

    def __init__(self, config, app):
        resource.Resource.__init__(self)
        self.debug = config.getboolean(debug, False)
        self.runner = config.get(runner)
        logsdir = config.get(logs_dir)
        itemsdir = config.get(items_dir)
        local_items = itemsdir and (urlparse(itemsdir).scheme.lower() in [‘‘, file])
        self.app = app
        self.nodename = config.get(node_name, socket.gethostname())
        self.putChild(b‘‘, Home(self, local_items))
        if logsdir:
            self.putChild(blogs, static.File(logsdir.encode(ascii, ignore), text/plain))
        if local_items:
            self.putChild(bitems, static.File(itemsdir, text/plain))
        self.putChild(bjobs, Jobs(self, local_items))
        services = config.items(services, ())
        for servName, servClsName in services:
          servCls = load_object(servClsName)
          self.putChild(servName.encode(utf-8), servCls(self))
        self.update_projects()

    def update_projects(self):
        self.poller.update_projects()
        self.scheduler.update_projects()

    @property
    def launcher(self):
        app = IServiceCollection(self.app, self.app)
        return app.getServiceNamed(launcher)

    @property
    def scheduler(self):
        return self.app.getComponent(ISpiderScheduler)

    @property
    def eggstorage(self):
        return self.app.getComponent(IEggStorage)

    @property
    def poller(self):
        return self.app.getComponent(IPoller)


class Home(resource.Resource):

    def __init__(self, root, local_items):
        resource.Resource.__init__(self)
        self.root = root
        self.local_items = local_items

    def render_GET(self, txrequest):
        vars = {
            projects: , .join(self.root.scheduler.list_projects())
        }
        s = """



Scrapyd

Scrapyd

Available projects: %(projects)s

  • Jobs
""" % vars if self.local_items: s +=
  • Items
  • s += """
  • Logs
  • Documentation
  • How to schedule a spider?

    To schedule a spider you need to use the API (this web UI is only for monitoring)

    Example using curl:

    curl http://localhost:6800/schedule.json -d project=default -d spider=somespider

    For more information about the API, see the Scrapyd documentation

    """ % vars return s.encode(utf-8) class Jobs(resource.Resource): def __init__(self, root, local_items): resource.Resource.__init__(self) self.root = root self.local_items = local_items def render(self, txrequest): cols = 10 ######## 8 s = " Scrapyd" s += "" s += "

    Jobs

    " s += "

    Go back

    " s += "
    Project Spider Job PID Start Runtime Finish Log Items LogUTF-8STOP LogUTF-8START
    " s += ""if self.local_items: s += ""#cols = 9 ######## cols += 1 ######## s += "" s += "" % cols for project, queue in self.root.poller.queues.items(): for m in queue.list(): s += "" s += "" % project s += "" % str(m[name]) s += "" % str(m[_job]) s += "" s += "" % cols for p in self.root.launcher.processes.values(): s += ""for a in [project, spider, job, pid]: s += "" % getattr(p, a) s += "" % p.start_time.replace(microsecond=0) s += "" % (datetime.now().replace(microsecond=0) - p.start_time.replace(microsecond=0)) s += "" s += "" % (p.project, p.spider, p.job) s += "" % (p.project, p.spider, p.job) ######## s += "" % (p.project, p.job) ########if self.local_items: s += "" % (p.project, p.spider, p.job) s += "" s += "" % cols for p in self.root.launcher.finished: s += ""for a in [project, spider, job]: s += "" % getattr(p, a) s += "" s += "" % p.start_time.replace(microsecond=0) s += "" % (p.end_time.replace(microsecond=0) - p.start_time.replace(microsecond=0)) s += "" % p.end_time.replace(microsecond=0) s += "" % (p.project, p.spider, p.job) s += "" % (p.project, p.spider, p.job) ######## s += "" % (p.project, p.spider) ########if self.local_items: s += "" % (p.project, p.spider, p.job) s += "" s += "
    Project Spider Job PID Start Runtime Finish Log Items
    Pending
    %s %s %s
    Running
    %s %s %s Log UTF-8 STOP Items
    Finished
    %s %s %s %s Log UTF-8 START Items
    " s += "" s += "" txrequest.setHeader(Content-Type, text/html; charset=utf-8) txrequest.setHeader(Content-Length, len(s)) return s.encode(utf-8)
    /site-packages/scrapyd/website.py

     

    3.新建 scrapyd.html

    根据 http://scrapyd.readthedocs.io/en/stable/config.html 确定 Scrapyd 所使用的 logs_dir,在该目录下添加如下文件

    技术分享图片技术分享图片
    html>
    head>
        meta charset="UTF-8">
        meta name="viewport" content="width=device-width, initial-scale=1.0">
        title>scrapydtitle>
    head>
    
    body>
    p>仅用于内网环境下执行 scrapyd APIp>
    div id="result">div>
    
    script>
    function parseQueryString(url) {
        var urlParams = {};
        url.replace(
            new RegExp("([^?=&]+)(=([^&]*))?", "g"),
            function($0, $1, $2, $3) {
                urlParams[$1] = $3;
            }
        );
        return urlParams;
    }
    
    function curl(opt, project, job_or_spider) {
        console.log(opt);
        console.log(project);
        console.log(job_or_spider);    
        var formdata = new FormData();
        formdata.append(project, project);
        if(opt == cancel) {
            formdata.append(job, job_or_spider);
        } else {
            formdata.append(spider, job_or_spider);
        }
    
        var req = new XMLHttpRequest();
        req.onreadystatechange = function() {
            if (this.readyState == 4) {
                if (this.status == 200) {
                    document.querySelector(#result).innerHTML = this.responseText;
                } else {
                    alert(status code:  + this.status);
                }
            } else {
                document.querySelector(#result).innerHTML = this.readyState;
            }
        };
        req.open(post, http://127.0.0.1:6800/+opt+.json, Async = true);    
        req.send(formdata);
    }
    
    var kwargs = parseQueryString(location.search);
    if (kwargs.opt == cancel || kwargs.opt == schedule) {
        curl(kwargs.opt, kwargs.project, kwargs.job_or_spider);          
    } 
    script>
    body>
    html>
    scrapyd.html

    可以根据需要修改其中的 req.open(‘post‘, ‘http://127.0.0.1:6800/‘+opt+‘.json‘, Async = true); 的 127.0.0.1 为运行 Scrapyd 的实际内网主机 ip 地址。

     

    4.实现效果

    技术分享图片

     

    (1) 点击 STOP 超链接

    技术分享图片

     

     

    (2) 返回 Jobs 页面

    技术分享图片

     

     

    (3) 点击 START 超链接

    技术分享图片

     

     

    (4) 返回 Jobs 页面

    技术分享图片

     

    Scrapyd 改进第二步: Web Interface 添加 STOP 和 START 超链接, 一键调用 Scrapyd API

    标签:running   效果   home   避免   runner   load   sele   collect   root   

    原文地址:https://www.cnblogs.com/my8100/p/scrapyd_2_add_stop_start.html

    上一篇:js 数据类型

    下一篇:c# 基本值类型及其默认值


    评论


    亲,登录后才可以留言!

    热门文章

    推荐文章

    最新文章

    置顶文章