使用Java爬取网站:http://www.shicimingju.com的小说内容
代码详解
1.在本地创建存储位置
2.编写正则表达式
3.循环获取内容
4.把内容存入文件夹中
5.判断成功或失败
效果演示
代码展示
package text;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileOutputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.net.URL;import java.util.regex.Matcher;import java.util.regex.Pattern;public class text {public static void main(String[] args) {//创建文件存放内容File file = new File("D:\\Text\\text.txt");//正则表达式String regex_content = "<p.*?>(.*?)</p>";String regex_title = "<title>(.*?)</title>";Pattern p_content = Pattern.compile(regex_content);Pattern p_title = Pattern.compile(regex_title);Matcher m_content;Matcher m_title;//循环获取内容for (int i = 1; i <= 120; i++) {System.out.println("第" + i + "章开始下载。。。");try {URL url = new URL("http://www.shicimingju.com/book/sanguoyanyi/" + i+ ".html");BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream(), "utf8"));String str = null;BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));while ((str = reader.readLine()) != null) {m_title = p_title.matcher(str.toString());m_content = p_content.matcher(str.toString());boolean isEx = m_title.find();if (isEx) {String title = m_title.group();title = title.replace("<title>", "").replace("</title>", "");System.out.println(title);writer.write("第" + i + "章:" + title + "\n");}while (m_content.find()) {String content = m_content.group();content = content.replace("<p>", "").replace("</p>", "").replace(" ", "").replace("?", "");//存内容writer.write(content + "\n");}}System.out.println("第" + i + "章下载完成.........");writer.write("\n\n");writer.close();reader.close();} catch (Exception e) {System.out.println("很遗憾,本次下载失败!!!");e.printStackTrace();}}}}
了解更多关注我哟!!!