C# 网页图片爬虫的几种技术基础-c# 爬虫

785次阅读
没有评论
C#


一、文件流方式获取网络图片资源方法1



string url = string.Format(@”http://webservice.36wu.com/DimensionalCodeService.asmx/GetCodeImgByString?size={0}&content={1}”, 5, 123456);
System.Net.WebRequest webreq = System.Net.WebRequest.Create(url);
System.Net.WebResponse webres = webreq.GetResponse();
using(System.IO.Stream stream = webres.GetResponseStream())
{
  ictureBox1.Image = Image.FromStream(stream);
}


方法2

生成图片的URL假设是这样:​​http://localhost/administrator/qrcode.aspx?pid=78​​

qrcode.aspx.cs的生成图片的部分代码:



Image image = new Bitmap(200, 200);
Graphics g = Graphics.FromImage(image);
try
{
string url=”http://localhost”;

DotNetBarcode bc = new DotNetBarcode();
bc.Type = DotNetBarcode.Types.QRCode;
bc.PrintCheckDigitChar = true;
bc.WriteBar(url, 0, 0, 210, 210, g);

System.IO.MemoryStream ms = new System.IO.MemoryStream();
image.Save(ms, System.Drawing.Imaging.ImageFormat.Png);

Response.ClearContent();
//Response.ContentType = “image/Png”;
//Response.BinaryWrite(ms.ToArray());Response.ContentType = “application/octet-stream”;
Response.AddHeader(“Content-Disposition”, “attachment; filename=” + HttpUtility.UrlEncode(“qrcode.png”, System.Text.Encoding.UTF8));
Response.BinaryWrite(ms.ToArray());

ms.Dispose();
}
finally
{
g.Dispose();
image.Dispose();
}


 

或者这样



string fileName = “aaa.txt”;//客户端保存的文件名
string filePath = Server.MapPath(“DownLoad/aaa.txt”);//路径

//以字符流的形式下载文件
FileStream fs = new FileStream(filePath, FileMode.Open);
byte[] bytes = new byte[(int)fs.Length];
fs.Read(bytes, 0, bytes.Length);
fs.Close();
Response.ContentType = “application/octet-stream”;
//通知浏览器下载文件而不是打开
Response.AddHeader(“Content-Disposition”, “attachment; filename=” + HttpUtility.UrlEncode(fileName, System.Text.Encoding.UTF8));
Response.BinaryWrite(bytes);
Response.Flush();
Response.End();


 

 

 

 二、WebClient方式从服务器上下载文件

参考方法1:



/// <summary>
/// 下载服务器文件至客户端
/// </summary>
/// <param name=”url”>被下载的文件地址,绝对路径</param>
/// <param name=”dir”>另存放的目录</param>
public void DownloadUrlFile(string url, string dir)
{
WebClient client = new WebClient();
string fileName = Path.GetFileName(url); //被下载的文件名
string path = dir + fileName; //另存为的绝对路径+文件名
try
{
if (!System.IO.Directory.Exists(dir))
{
System.IO.Directory.CreateDirectory(dir);
}
if (!System.IO.File.Exists(path))
{
client.DownloadFile(url, path);
}
}
catch (Exception)
{
// ShowError(“文件下载失败!”);
}
}


 

 

 

 

 

 

参考方法2 [2]



<%@ Page Language=”C#” AutoEventWireup=”true” CodeBehind=”GetPictureByUrl.aspx.cs” Inherits=”HoverTreeMobile.GetPictureByUrl” %>
<!DOCTYPE html>
<html xmlns=”http://www.w3.org/1999/xhtml”>
<head runat=”server”>
<meta http-equiv=”Content-Type” content=”text/html; charset=utf-8″/>
<meta name=”viewport” content=”width=device-width, initial-scale=1″ />
<title>根据网址把图片下载到服务器 – 何问起</title>
</head>
<body>
<form id=”form1″ runat=”server”>
<div>
图片网址:<br /><asp:TextBox runat=”server” ID=”textBoxImgUrl” Width=”500″ Text=”http://hovertree.com/hvtimg/201508/cnvkv745.jpg” />
<br /> <asp:Button runat=”server” ID=”btnImg” Text=”下载” OnClick=”btnImg_Click” />
<br /><asp:Image runat=”server” ID=”hvtImg” />
<br />
<asp:Literal runat=”server” ID=”ltlTips” />
</div>
</form>
</body>
</html>


页面所对应的代码



using System;

namespace HoverTreeMobile
{
public partial class GetPictureByUrl : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{

}

protected void btnImg_Click(object sender, EventArgs e)
{
try
{
System.Net.WebClient m_hvtWebClient = new System.Net.WebClient();


//如果不是指定格式图片
//例如http://hovertree.com/hvtart/bjae/t2lo8pf7.htm 是htm文件,不是图片
if (!(textBoxImgUrl.Text.EndsWith(“.jpg”)
|| textBoxImgUrl.Text.EndsWith(“.gif”)
|| textBoxImgUrl.Text.EndsWith(“.png”)))
{
ltlTips.Text = “输入的不是指定格式的图片的网址”;

return;
}

//生成随机的图片文件名
string m_picFileName = HoverTree.HoverTreeFrame.Utils.GetHoverTreeString()+ HoverTree.HoverTreeFrame.HoverString.GetLastStr(textBoxImgUrl.Text,4);

string m_keleyiPicture = Server.MapPath(“/hovertreeimages/”+ m_picFileName);
//根据网址下载文件
m_hvtWebClient.DownloadFile(textBoxImgUrl.Text, m_keleyiPicture);

hvtImg.ImageUrl = “/hovertreeimages/” + m_picFileName;
ltlTips.Text = string.Empty;
}
catch(Exception ex)
{
ltlTips.Text = ex.ToString();
}
}
}
}


//生成随机的图片文件名

string m_picFileName = HoverTree.HoverTreeFrame.Utils.GetHoverTreeString()+ HoverTree.HoverTreeFrame.HoverString.GetLastStr(textBoxImgUrl.Text,4);

以上代码,请下载源代码查看详细实现方法。部分可到 ​​LINK​​ 查看。

HoverTree 开源项目:新增根据网址把图片下载到服务器功能

请看 HoverTreeMobile 项目,http://hovertree.com,何问起,源代码​​下载 LINK。​​

 

三、网页相关的方式

方法1:



public partial class DownLoadFile : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
string picName = Request.QueryString[“InternalSysURL”];
if (!String.IsNullOrEmpty(picName))
{
byte[] content = this.GetImageContent(picName);
this.WriteResponse(picName, content);
}
}

#region
private byte[] GetImageContent(string picName)
{
string fileURL = GetImgUrlPrefix() + picName;

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(fileURL);
request.AllowAutoRedirect = true;

WebProxy proxy = new WebProxy();
proxy.BypassProxyOnLocal = true;
proxy.UseDefaultCredentials = true;

request.Proxy = proxy;

WebResponse response = request.GetResponse();

using (Stream stream = response.GetResponseStream())
{
using (MemoryStream ms = new MemoryStream())
{
Byte[] buffer = new Byte[1024];
int current = 0;
while ((current = stream.Read(buffer, 0, buffer.Length)) != 0)
{
ms.Write(buffer, 0, current);
}
return ms.ToArray();
}
}
}

private void WriteResponse(string picName, byte[] content)
{
Response.Clear();
Response.ClearHeaders();
Response.Buffer = false;
Response.ContentType = “application/octet-stream”;
Response.AppendHeader(“Content-Disposition”, “attachment;filename=” + HttpUtility.UrlEncode(picName, Encoding.Default));
Response.AppendHeader(“Content-Length”, content.Length.ToString());
Response.BinaryWrite(content);
Response.Flush();
Response.End();
}

private static string GetImgUrlPrefix()
{
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load(AppDomain.CurrentDomain.BaseDirectory + “//Pages//ItemMaintain//ImageDownLoad.xml”);
XmlNodeList nodes = xmlDoc.GetElementsByTagName(“ProductImageOriginal”);
if (nodes.Count > 0)
{
return nodes[0].ChildNodes[0].Value;
}
else { return “”; }
}

#endregion
}


方法2[3]

根据URL请求获取页面HTML代码



/// <summary>
/// 获取网页的HTML码
/// </summary>
/// <param name=”url”>链接地址</param>
/// <param name=”encoding”>编码类型</param>
/// <returns></returns>
public static string GetHtmlStr(string url, string encoding)
{
string htmlStr = “”;
if (!String.IsNullOrEmpty(url))
{
WebRequest request = WebRequest.Create(url); //实例化WebRequest对象
WebResponse response = request.GetResponse(); //创建WebResponse对象
Stream datastream = response.GetResponseStream(); //创建流对象
Encoding ec = Encoding.Default;
if (encoding == “UTF8”)
{
ec = Encoding.UTF8;
}
else if (encoding == “Default”)
{
ec = Encoding.Default;
}
StreamReader reader = new StreamReader(datastream, ec);
htmlStr = reader.ReadToEnd(); //读取数据
reader.Close();
datastream.Close();
response.Close();
}
return htmlStr;
}


下载网站图片



/// <summary>
/// 下载网站图片
/// </summary>
/// <param name=”picUrl”></param>
/// <returns></returns>
public string SaveAsWebImg(string picUrl)
{
string result = “”;
string path = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + @”/File/”; //目录
try
{
if (!String.IsNullOrEmpty(picUrl))
{
Random rd = new Random();
DateTime nowTime = DateTime.Now;
string fileName = nowTime.Month.ToString() + nowTime.Day.ToString() + nowTime.Hour.ToString() + nowTime.Minute.ToString() + nowTime.Second.ToString() + rd.Next(1000, 1000000) + “.jpeg”;
WebClient webClient = new WebClient();
webClient.DownloadFile(picUrl, path + fileName);
result = fileName;
}
}
catch { }
return result;
}


 

 

 

 

 


你们的评论、反馈,及对你们有所用,是我整理材料和博文写作的最大的鼓励和唯一动力。欢迎讨论和关注!

没有整理与归纳的知识,一文不值!高度概括与梳理的知识,才是自己真正的知识与技能。

永远不要让自己的自由、好奇、充满创造力的想法被现实的框架所束缚,让创造力自由成长吧!

多花时间,关心他(她)人,正如别人所关心你的。理想的腾飞与实现,没有别人的支持与帮助,是万万不能的。


神龙|纯净稳定代理IP免费测试>>>>>>>>天启|企业级代理IP免费测试>>>>>>>>IPIPGO|全球住宅代理IP免费测试

相关文章:

版权声明:Python教程2022-11-01发表,共计7125字。
新手QQ群:570568346,欢迎进群讨论 Python51学习