
获取粉丝数目
package com.web;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
public class FanCountSpider implements PageProcessor {
/*
https://blog.csdn.net/ + fanName + /article/list/
*/
private final String TAG = FanCountSpider.class.getSimpleName();
Integer count = 0;
private Site site = Site
.me()
.setDomain("blog.csdn.net")
.setSleepTime(1000)
// 便于测试,休眠较长时间。
.setUserAgent(
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
@Override
public void process(Page page) {
String rawText = page.getRawText();
count = getCount(rawText);
}
public Integer getCount(String line) {
String pattern = "(id=\"fan\">\\d+?<)";
// 创建 Pattern 对象
Pattern r = Pattern.compile(pattern);
// 现在创建 matcher 对象
Matcher m = r.matcher(line);
if (m.find()) {
String fanCount = m.group(0);
String replace = fanCount.replace("id=\"fan\">", "");
String result = replace.replace("<", "");
return Integer.parseInt(result);
}
return 0;
}
@Override
public Site getSite() {
return site;
}
}
测试类
/*
* 文件名称: T2.java
* 项目描述: ALIPAY 交易系统
* 公司名称: 杭州市阿里科技股份有限公司
* 版权所有: (C) 2022-2023
*/
package com.web;
import us.codecraft.webmagic.Spider;
/**
* @author globalcoding
* @since 2022/5/9 15:28
*/
public class T2 {
public static void main(String[] args) {
String username = "";
String fanUrl = "https://blog.csdn.net/" + username + "/article/list/";
FanCountSpider fanCountSpider = new FanCountSpider();
Spider.create(fanCountSpider).addUrl(fanUrl).thread(1).run();
System.out.println(fanCountSpider.count);
}
}
欢迎分享,转载请注明来源:内存溢出
微信扫一扫
支付宝扫一扫
评论列表(0条)