目录
- 场景描述
- 一.引入依赖
- 二.调用接口响应回来的html
- 三.测试代码
场景描述
我调用外部接口,但是返回来的数据是html的格式,所以我就需要进行处理来获得我想要的数据。我使用的是jsoup。
一.引入依赖
<dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.11.3</version>
</dependency>
二.调用接口响应回来的html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
<title>无标题文档</title>
<style type="text/css">
body, div, dl, dt, dd, ul, ol, li, h1, h2, h3, h4, h5, h6, pre, form, fieldset, input, textarea, p, blockquote, th, td {font-family: "微软雅黑"!important;
}
</style>
<script type="text/javascript" src="https://www.ikun.com.cn/statics/js/jquery-1.7.2.js"></script>
</head>
<body>
<link rel="stylesheet" href="https://www.ikun.com.cn/statics/css/list.css" />
<link rel="stylesheet" href="https://www.ikun.com.cn/statics/css/base.css" />
<!--main-->
<div class="w870" style="background:#fff"><div class="suipin"> <a href="https://www.ikun.com.cn">首页</a>><b class="mowei"><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=lists&catid=194">智库</a> > <a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=lists&catid=201">港口</a> > <a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=lists&catid=226">ikun日记</a> > 列表</b> </div><!--最新下载--><table width="100%" class="mtzktab" id="mtzktab"><thead><tr class="biaoti"><th width="70%" align="left">ikun日记</th><!-- <th width="15%">点击</th> --><th width="30%">发布时间</th></tr></thead><tbody><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=235904" target="_blank">ikun日记(第468期)</a></td><!-- <td align="center">0</td> --><td align="center">2023-08-21</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=235638" target="_blank">ikun日记(第467期)</a></td><!-- <td align="center">3</td> --><td align="center">2023-08-14</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=235402" target="_blank">ikun日记(第466期)</a></td><!-- <td align="center">10</td> --><td align="center">2023-08-07</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=235224" target="_blank">ikun日记(第465期)</a></td><!-- <td align="center">4</td> --><td align="center">2023-07-31</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=235047" target="_blank">ikun日记(第464期)</a></td><!-- <td align="center">6</td> --><td align="center">2023-07-24</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=235043" target="_blank">ikun日记(第463期)</a></td><!-- <td align="center">2</td> --><td align="center">2023-07-17</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=234716" target="_blank">ikun日记(第462期)</a></td><!-- <td align="center">5</td> --><td align="center">2023-07-10</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=234535" target="_blank">ikun日记(第461期)</a></td><!-- <td align="center">9</td> --><td align="center">2023-07-03</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=234531" target="_blank">ikun日记(第460期)</a></td><!-- <td align="center">1</td> --><td align="center">2023-06-19</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=234100" target="_blank">ikun日记(第459期)</a></td><!-- <td align="center">6</td> --><td align="center">2023-06-12</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=233842" target="_blank">ikun日记(第458期)</a></td><!-- <td align="center">3</td> --><td align="center">2023-06-05</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=233838" target="_blank">ikun日记(第457期)</a></td><!-- <td align="center">1</td> --><td align="center">2023-05-29</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=233551" target="_blank">ikun日记(第456期)</a></td><!-- <td align="center">7</td> --><td align="center">2023-05-22</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=233279" target="_blank">ikun日记(第455期)</a></td><!-- <td align="center">6</td> --><td align="center">2023-05-15</td></tr><tr><td><div class="dian">.</div><a href="https://www.ikun.com.cn/index.php?m=content&c=index&a=show&catid=226&id=233087" target="_blank">ikun日记(第454期)</a></td><!-- <td align="center">8</td> --><td align="center">2023-04-29</td></tr></tbody></table><div id="pages"> <a class="a1">368条</a> <a href="index.php?m=content&c=index&a=lists&catid=226" class="a1">上一页</a> <span>1</span> <a href="index.php?m=content&c=index&a=lists&catid=226&page=2">2</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=3">3</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=4">4</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=5">5</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=6">6</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=7">7</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=8">8</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=9">9</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=10">10</a> ..<a href="index.php?m=content&c=index&a=lists&catid=226&page=25">25</a> <a href="index.php?m=content&c=index&a=lists&catid=226&page=2" class="a1">下一页</a> </div><script type="text/javascript">$().ready(function () {$('#mtzktab tbody tr:odd').css('background', '#eeeeee'); });</script>
</div>
</body>
</html>
三.测试代码
package org.jeecg.modules.mt.controller;import cn.hutool.http.HttpUtil;
import org.jsoup.Jsoup;
import org.springframework.util.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;public class Test {public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException {String apiUrl = "https://www.ikun.com.cn/index.php?m=content&c=index&a=lists&catid=226&page=1";// 发送get请求String body = HttpUtil.createPost(apiUrl).execute().body();Document doc = Jsoup.parse(body);//获取<tbody>标签下的<div>标签,并把<div>标签的值改为空Elements headingsDiv = doc.select("tbody").select("div");for (Element element : headingsDiv) {element.text("");}//获取<tbody>标签下的<td>标签Elements headings = doc.select("tbody").select("td");//循环获取的<td>标签for (int i = 0; i < headings.size(); i++) {//获取<td>标签内的值String text = headings.get(i).text();//因为包含两个不同的<td>标签,所以需要取余if (i%2 == 0){//获取<td>标签下的<a>标签的 href 属性的值String href = headings.get(i).select("a").attr("href");System.out.println("文件下载地址:"+href);System.out.println("文件名称:"+text);}else {System.out.println("文件时间:"+text);}}}
}