using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using System.IO;//引用流 using System.Net; using System.Text.RegularExpressions;//引用网页namespace WindowsFormsApplication8 {public partial class Form1 : Form{public Form1(){InitializeComponent();}private string thtxt(string q){Regex reg = new Regex("<(.|\n)+?>");//Regex r = new Regex(@"\s+");//把空格替换掉的正则表达式string th = reg.Replace(q, "");th = th.Replace("<", "<");th = th.Replace(">", "");//th = r.Replace(th,"");return th;}private void button1_Click(object sender, EventArgs e)//抓取网页,在richtextbox1中显示出网页的源代码 {WebRequest wr = WebRequest.Create(textBox1.Text.Trim());//接收按钮2里输入的网址的文本内容WebResponse wtr = wr.GetResponse();//得到一个网址的回应Stream a = wtr.GetResponseStream();//用流来读取StreamReader s = new StreamReader(a,Encoding.UTF8);string q= s.ReadToEnd();//定义一个string类型的来接受它string th = thtxt(q);//调用上面去乱码的函数,把里面的符号全部去掉。a.Close();//使用完把流给关掉richTextBox1.Text = th;StreamWriter c = new StreamWriter("E:\\1.txt");//桌面上新建一个测试的内容,吧截下来的文本保存在这个文本档里。//把截取好的文本内容从流里输出出来c.WriteLine(th);//输出c.Close();//把流给关掉 wtr.Close();if (q.IndexOf("红|袖|言|情|小|说") > 0){int sub = q.IndexOf("红|袖|言|情|小|说");//添加一个网址的索引int xub = q.IndexOf("最后放弃");//从这里截取//+4 (“好了,收工”)这4个字的索引不被截取,+4就是这五个字都显示在文本,否则会被删掉string w = q.Substring(sub, xub - sub + 4);richTextBox1.Text = w;//让显示框里吧截取好的文本内容给显示出来。//引用一个流把它截取下来的内容存到一个文件里 }else{MessageBox.Show("您没有获得数据");}}} }