CrawlerHandler.cs 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Net;
  6. using System.Web;
  7. /// <summary>
  8. /// Crawler 的摘要说明
  9. /// </summary>
  10. ///
  11. namespace LYFZ.WanYuKeFuData.UEditControl
  12. {
  13. public class CrawlerHandler : Handler
  14. {
  15. private string[] Sources;
  16. private Crawler[] Crawlers;
  17. public CrawlerHandler(HttpContext context) : base(context) { }
  18. public override void Process()
  19. {
  20. Sources = Request.Form.GetValues("source[]");
  21. if (Sources == null || Sources.Length == 0)
  22. {
  23. WriteJson(new
  24. {
  25. state = "参数错误:没有指定抓取源"
  26. });
  27. return;
  28. }
  29. Crawlers = Sources.Select(x => new Crawler(x, Server).Fetch()).ToArray();
  30. WriteJson(new
  31. {
  32. state = "SUCCESS",
  33. list = Crawlers.Select(x => new
  34. {
  35. state = x.State,
  36. source = x.SourceUrl,
  37. url = x.ServerUrl
  38. })
  39. });
  40. }
  41. }
  42. public class Crawler
  43. {
  44. public string SourceUrl { get; set; }
  45. public string ServerUrl { get; set; }
  46. public string State { get; set; }
  47. private HttpServerUtility Server { get; set; }
  48. public Crawler(string sourceUrl, HttpServerUtility server)
  49. {
  50. this.SourceUrl = sourceUrl;
  51. this.Server = server;
  52. }
  53. public Crawler Fetch()
  54. {
  55. if (!IsExternalIPAddress(this.SourceUrl))
  56. {
  57. State = "INVALID_URL";
  58. return this;
  59. }
  60. var request = HttpWebRequest.Create(this.SourceUrl) as HttpWebRequest;
  61. using (var response = request.GetResponse() as HttpWebResponse)
  62. {
  63. if (response.StatusCode != HttpStatusCode.OK)
  64. {
  65. State = "Url returns " + response.StatusCode + ", " + response.StatusDescription;
  66. return this;
  67. }
  68. if (response.ContentType.IndexOf("image") == -1)
  69. {
  70. State = "Url is not an image";
  71. return this;
  72. }
  73. ServerUrl = PathFormatter.Format(Path.GetFileName(this.SourceUrl), Config.GetString("catcherPathFormat"));
  74. var savePath = Server.MapPath(ServerUrl);
  75. if (!Directory.Exists(Path.GetDirectoryName(savePath)))
  76. {
  77. Directory.CreateDirectory(Path.GetDirectoryName(savePath));
  78. }
  79. try
  80. {
  81. var stream = response.GetResponseStream();
  82. var reader = new BinaryReader(stream);
  83. byte[] bytes;
  84. using (var ms = new MemoryStream())
  85. {
  86. byte[] buffer = new byte[4096];
  87. int count;
  88. while ((count = reader.Read(buffer, 0, buffer.Length)) != 0)
  89. {
  90. ms.Write(buffer, 0, count);
  91. }
  92. bytes = ms.ToArray();
  93. }
  94. File.WriteAllBytes(savePath, bytes);
  95. State = "SUCCESS";
  96. }
  97. catch (Exception e)
  98. {
  99. State = "抓取错误:" + e.Message;
  100. }
  101. return this;
  102. }
  103. }
  104. private bool IsExternalIPAddress(string url)
  105. {
  106. var uri = new Uri(url);
  107. switch (uri.HostNameType)
  108. {
  109. case UriHostNameType.Dns:
  110. var ipHostEntry = Dns.GetHostEntry(uri.DnsSafeHost);
  111. foreach (IPAddress ipAddress in ipHostEntry.AddressList)
  112. {
  113. byte[] ipBytes = ipAddress.GetAddressBytes();
  114. if (ipAddress.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
  115. {
  116. if (!IsPrivateIP(ipAddress))
  117. {
  118. return true;
  119. }
  120. }
  121. }
  122. break;
  123. case UriHostNameType.IPv4:
  124. return !IsPrivateIP(IPAddress.Parse(uri.DnsSafeHost));
  125. }
  126. return false;
  127. }
  128. private bool IsPrivateIP(IPAddress myIPAddress)
  129. {
  130. if (IPAddress.IsLoopback(myIPAddress)) return true;
  131. if (myIPAddress.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
  132. {
  133. byte[] ipBytes = myIPAddress.GetAddressBytes();
  134. // 10.0.0.0/24
  135. if (ipBytes[0] == 10)
  136. {
  137. return true;
  138. }
  139. // 172.16.0.0/16
  140. else if (ipBytes[0] == 172 && ipBytes[1] == 16)
  141. {
  142. return true;
  143. }
  144. // 192.168.0.0/16
  145. else if (ipBytes[0] == 192 && ipBytes[1] == 168)
  146. {
  147. return true;
  148. }
  149. // 169.254.0.0/16
  150. else if (ipBytes[0] == 169 && ipBytes[1] == 254)
  151. {
  152. return true;
  153. }
  154. }
  155. return false;
  156. }
  157. }
  158. }