公司有个项目需要五级行政区划,没有现成的数据,写了一段代码,从gj统计j获取的数据。记录一下。
1.引入maven解析html
<!-- jsoup --> <dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.11.3</version> </dependency>
2.Java代码实现
@GetMapping("/hh")public void hh(){Division d=new Division();final String url = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/";String provinceurl = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/index.html"; // 需要爬取的目标网站地址try {Document document = Jsoup.connect(provinceurl).get(); // 获取该网页的文档对象String title = document.title(); // 获取页面标题//省Elements provincetable=document.body().select("tr.provincetr").select("a[href]");for (Element province : provincetable) {String provinceHref = province.attr("href"); // 获取链接地址String provinceText = province.text(); // 获取链接文字d.setProvincialCode(provinceHref.replace(".html",""));d.setProvincialName(provinceText);String cityurl=url+provinceHref;//System.out.println("cityurl = " + cityurl);Document citytabledocument = Jsoup.connect(cityurl).get();//市Elements citytable=citytabledocument.body().select("table.citytable").select("a[href]");//System.out.println("citytable = " + citytable);for(int i=0;i<citytable.size()/2;i++){d.setMunicipalCode(citytable.get(i).text());i=i+1;d.setMunicipalName(citytable.get(i).text());String cityHref = citytable.get(i).attr("href"); // 获取链接地址String countyurl=url+cityHref;System.out.println("countyurl = " + countyurl);Document countytableocument = Jsoup.connect(countyurl).get();//区Elements countytable=countytableocument.body().select("table.countytable").select("a[href]");for(int j=0;j<countytable.size()/2;j++){d.setDistrictCode(countytable.get(j).text());j=j+1;d.setDistrictName(countytable.get(j).text());String countyHref = countytable.get(j).attr("href"); // 获取链接地址String townturl = url + provinceHref.replace(".html", "") + "/" + countyHref;Document townttableocument = Jsoup.connect(townturl).get();//镇Elements towntable = townttableocument.body().select("table.towntable").select("a[href]");for(int k=0;k<towntable.size()/2;k++){d.setStreetTownCode(towntable.get(k).text());k=k+1;d.setStreetTownName(towntable.get(k).text());String towntHref = towntable.get(k).attr("href"); // 获取链接地址String villageurl = townturl.substring(0, townturl.length() - 11) + towntHref;System.out.println("villageurl = " + villageurl);Document villagetabledocument = Jsoup.connect(villageurl).get();//村Elements villagetable = villagetabledocument.body().select("table.villagetable").select("tr.villagetr");for (Element village : villagetable) {String villageText = village.text(); // 获取链接文字String[] vi = villageText.split(" ");System.out.println("统计用区划代码: " + vi[0]);System.out.println("城乡分类代码: " + vi[1]);System.out.println("名称: " + vi[2]);d.setCommunityVillageCode(vi[0]);d.setUrbanRural( vi[1]);d.setCommunityVillageName(vi[2]);System.out.println("d.toString() = " + d.toString());System.out.println("vi = " + vi);divisionService.insertDivision1(d);}//我想让他跑慢点,你可以自己调try {Thread.sleep(2000);} catch (InterruptedException e) {throw new RuntimeException(e);}}}}}} catch (IOException e) {e.printStackTrace();}}
3.用到的实体类
import com.ruoyi.common.utils.StringUtils;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import com.ruoyi.common.annotation.Excel;
import com.ruoyi.common.core.domain.BaseEntity;/*** 行政区划对象 division** @author liphui* @date 2023-11-17*/
public class Division extends BaseEntity
{private static final long serialVersionUID = 1L;/** 省级代码 */@Excel(name = "省级代码")private String provincialCode;/** 省级名称 */@Excel(name = "省级名称")private String provincialName;/** 市级代码 */@Excel(name = "市级代码")private String municipalCode;/** 市级名称 */@Excel(name = "市级名称")private String municipalName;/** 区县代码 */@Excel(name = "区县代码")private String districtCode;/** 区县名称 */@Excel(name = "区县名称")private String districtName;/** 街镇乡代码 */@Excel(name = "街镇乡代码")private String streetTownCode;/** 街镇乡名称 */@Excel(name = "街镇乡名称")private String streetTownName;/** 社区村级代码 */@Excel(name = "社区村级代码")private String communityVillageCode;/** 社区村级名称 */@Excel(name = "社区村级名称")private String communityVillageName;/** 城乡分类 */@Excel(name = "城乡分类")private String urbanRural;public void setProvincialCode(String provincialCode){this.provincialCode = provincialCode;}public String getProvincialCode(){return provincialCode;}public void setProvincialName(String provincialName){this.provincialName = provincialName;}public String getProvincialName(){return provincialName;}public void setMunicipalCode(String municipalCode){this.municipalCode = municipalCode;}public String getMunicipalCode(){return municipalCode;}public void setMunicipalName(String municipalName){this.municipalName = municipalName;}public String getMunicipalName(){return municipalName;}public void setDistrictCode(String districtCode){this.districtCode = districtCode;}public String getDistrictCode(){return districtCode;}public void setDistrictName(String districtName){this.districtName = districtName;}public String getDistrictName(){return districtName;}public void setStreetTownCode(String streetTownCode){this.streetTownCode = streetTownCode;}public String getStreetTownCode(){return streetTownCode;}public void setStreetTownName(String streetTownName){this.streetTownName = streetTownName;}public String getStreetTownName(){return streetTownName;}public void setCommunityVillageCode(String communityVillageCode){this.communityVillageCode = communityVillageCode;}public String getCommunityVillageCode(){return communityVillageCode;}public void setCommunityVillageName(String communityVillageName){this.communityVillageName = communityVillageName;}public String getCommunityVillageName(){return communityVillageName;}public void setUrbanRural(String urbanRural){this.urbanRural = urbanRural;}public String getUrbanRural(){return urbanRural;}public String getDivisionName(){StringBuilder stringBuilder = new StringBuilder();if (StringUtils.isNotEmpty(this.provincialName)){stringBuilder.append(this.provincialName);}else {return stringBuilder.toString();}if (StringUtils.isNotEmpty(this.municipalName)){stringBuilder.append(",").append(this.municipalName);}else {return stringBuilder.toString();}if (StringUtils.isNotEmpty(this.districtName)){stringBuilder.append(",").append(this.districtName);}else {return stringBuilder.toString();}if (StringUtils.isNotEmpty(this.streetTownName)){stringBuilder.append(",").append(this.streetTownName);}else {return stringBuilder.toString();}if (StringUtils.isNotEmpty(this.communityVillageName)){stringBuilder.append(",").append(this.communityVillageName);}else {return stringBuilder.toString();}return stringBuilder.toString();}public String getDivisionCode(){StringBuilder stringBuilder = new StringBuilder();if (StringUtils.isNotEmpty(this.provincialCode)){stringBuilder.append(this.provincialCode);}else {return stringBuilder.toString();}if (StringUtils.isNotEmpty(this.municipalCode)){stringBuilder.append(",").append(this.municipalCode);}else {return stringBuilder.toString();}if (StringUtils.isNotEmpty(this.districtCode)){stringBuilder.append(",").append(this.districtCode);}else {return stringBuilder.toString();}if (StringUtils.isNotEmpty(this.streetTownCode)){stringBuilder.append(",").append(this.streetTownCode);}else {return stringBuilder.toString();}if (StringUtils.isNotEmpty(this.communityVillageCode)){stringBuilder.append(",").append(this.communityVillageCode);}else {return stringBuilder.toString();}return stringBuilder.toString();}@Overridepublic String toString() {return new ToStringBuilder(this,ToStringStyle.MULTI_LINE_STYLE).append("provincialCode", getProvincialCode()).append("provincialName", getProvincialName()).append("municipalCode", getMunicipalCode()).append("municipalName", getMunicipalName()).append("districtCode", getDistrictCode()).append("districtName", getDistrictName()).append("streetTownCode", getStreetTownCode()).append("streetTownName", getStreetTownName()).append("communityVillageCode", getCommunityVillageCode()).append("communityVillageName", getCommunityVillageName()).append("urbanRural", getUrbanRural()).toString();}
}
其他的代码不贴了,就是数据入库。