java获取行政区划编码(省市区县居委5级)

java获取行政区划编码(省市区县居委5级),第1张

数据来源:中国统计局标准 http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/

由于时间关系简单粗糙的写了份代码。可自行优化。如下 复制代码,能直接运行;

环境:jdk8;



            org.jsoup
            jsoup
            1.11.3
        

import cn.hutool.http.HttpUtil;
import lombok.var;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;

/**
 * 抓取统计局区域编码
 */
public class TestArea {


    public static void main(String[] args) throws InterruptedException {
        test();
    }

    public static void test() throws InterruptedException {
        var url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/index.html";

        String baseUrl = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2021/";


        String html = HttpUtil.get(url);

        var htmlDoc = Jsoup.parse(html);

        var selectClasses = htmlDoc.getElementsByClass("provincetr");


        for (int i = 0; i < selectClasses.size(); i++) {

            var provideCodes = selectClasses.get(i).children();


            //1.省份 provincetr
            for (int provideCodeIndex = 0; provideCodeIndex < provideCodes.size(); provideCodeIndex++) {
                var provideCodeUrl = provideCodes.get(provideCodeIndex).select("a").attr("href");
                var provideName = provideCodes.get(provideCodeIndex).select("a").text();

                System.out.println("省份 = " + provideName);

                if (!StringUtils.isBlank(provideCodeUrl)) {
                    var provideCode = provideCodeUrl.split("\.")[0];

                    String gotoCityHtml = HttpUtil.get(baseUrl + provideCodeUrl);
                    var cityHtmlDoc = Jsoup.parse(gotoCityHtml);
                    Elements selectCityClass = cityHtmlDoc.select(".citytr");

                    Thread.sleep(2000);
                    //2.城市  citytr
                    for (int cityIndex = 0; cityIndex < selectCityClass.size(); cityIndex++) {
                        var gotoCountyUrl = selectCityClass.get(cityIndex).select("a").attr("href");
                        var cityName = selectCityClass.get(cityIndex).select("a").text();

                        System.out.println("城市 = " + cityName);

                        if (StringUtils.isBlank(gotoCountyUrl)) {
                            continue;
                        }
                        String countytr = HttpUtil.get(baseUrl + gotoCountyUrl);
                        var countytrDoc = Jsoup.parse(countytr);
                        Elements countyClass = countytrDoc.select(".countytr");

                        Thread.sleep(2000);

                        //3.县区  countytr
                        for (int county = 0; county < countyClass.size(); county++) {
                            var gotoTownUrl = countyClass.get(county).select("a").attr("href");
                            var countyName = countyClass.get(county).select("a").text();
                            System.out.println("县区 = " + countyName);


                            if (StringUtils.isBlank(gotoTownUrl)) {
                                continue;
                            }


                            String towntr = HttpUtil.get(baseUrl + provideCode + "/" + gotoTownUrl);
                            var townDoc = Jsoup.parse(towntr);
                            Elements townClass = townDoc.select(".towntr");
                            var gotoTownCode = gotoTownUrl.split("/")[0];
                            Thread.sleep(2000);

                            //4.街道。镇 towntr
                            for (int town = 0; town < townClass.size(); town++) {
                                //towntr
                                var gotoVillageHref = townClass.get(town).select("a").attr("href");
                                var townName = townClass.get(town).select("a").text();
                                System.out.println("街道。镇 = " + townName);

                                if (StringUtils.isBlank(gotoVillageHref)) {
                                    continue;
                                }


                                //居委会
                                String villageStr = HttpUtil.get(baseUrl + provideCode + "/" + gotoTownCode + "/" + gotoVillageHref);
                                var villageDoc = Jsoup.parse(villageStr);
                                Elements villagetr = villageDoc.select(".villagetr");

                                for (int villageIndex = 0; villageIndex < villagetr.size(); villageIndex++) {
                                    var tds = villagetr.get(villageIndex).select("td");//[0].text();
                                    var text = tds.get(0).text();
                                    var text1 = tds.get(1).text();
                                    var text2 = tds.get(2).text();
                                    System.out.println(text + " " + text1 + " " + text2);
                                }

                                System.out.println("---------");
                                Thread.sleep(2000);


                            }


                        }


                    }


                }

            }



        }




    }

}

获取最新的省市区县字典数据代码
免责申明:爬取数据造成任何问题,概不负责,本文只做技术分享和学习。

欢迎分享,转载请注明来源:内存溢出

原文地址:https://54852.com/langs/942019.html

(0)
打赏 微信扫一扫微信扫一扫 支付宝扫一扫支付宝扫一扫
上一篇 2022-05-17
下一篇2022-05-17

发表评论

登录后才能评论

评论列表(0条)

    保存