我们使用 net.URL 类的 URL() 构造函数来抓取网页
代码如下
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.net.URL;public class Main {public static void main(String[] args) throws Exception {URL url = new URL("http://www.baidu.com");BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream()));BufferedWriter writer = new BufferedWriter(new FileWriter("test.html"));String line;while ((line = reader.readLine()) != null) {System.out.println(line);writer.write(line);writer.newLine();}reader.close();writer.close();}
}
结果输出
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"/> <meta http-equiv="X-UA-Compatible" content="IE=11,IE=10,IE=9,IE=8"/>……