html.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. <?php
  2. class html_lib
  3. {
  4. var $app;
  5. var $purl;
  6. var $timeout = 10;
  7. var $socket;
  8. var $use_func = "fsockopen";
  9. function __construct()
  10. {
  11. $fsock_exists = function_exists('fsockopen') && function_exists("socket_accept");
  12. $curl_exists = function_exists('curl_init');
  13. if(!$fsock_exists && !$curl_exists)
  14. {
  15. $this->use_func = "error";
  16. $this->use_curl = 0;
  17. }
  18. else
  19. {
  20. $this->use_func = $curl_exists ? "curl" : "fsockopen";
  21. }
  22. }
  23. function html_lib()
  24. {
  25. $this->__construct();
  26. }
  27. function get_content($url)
  28. {
  29. if(!$url || $this->use_func == "error")
  30. {
  31. return false;
  32. }
  33. return $this->use_func == "curl" ? $this->_curl($url) : $this->_fsockopen($url);
  34. }
  35. function _curl($url)
  36. {
  37. $curl = curl_init();
  38. curl_setopt($curl, CURLOPT_FORBID_REUSE, true); // 处理完后,关闭连接,释放资源
  39. curl_setopt($curl, CURLOPT_HEADER, true);//结果中包含头部信息
  40. curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);//把结果返回,而非直接输出
  41. curl_setopt($curl, CURLOPT_HTTPGET,true);//使用GET传输数据
  42. curl_setopt($curl, CURLOPT_CONNECTTIMEOUT,5);//等待时间,超时退出
  43. curl_setopt($curl,CURLOPT_ENCODING ,'gzip');//GZIP压缩
  44. curl_setopt($curl, CURLOPT_TIMEOUT, $this->timeout);
  45. $this->format_url($url);
  46. if($this->purl["user"])
  47. {
  48. $auth = $this->purl["user"].":".$this->purl["pass"];
  49. curl_setopt($curl, CURLOPT_USERPWD, $auth);
  50. curl_setopt($curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
  51. }
  52. $header = array();
  53. $header[] = "Host: ".$this->purl["host"];
  54. $header[] = "Referer: ".$this->purl['protocol'].$this->purl["host"];
  55. curl_setopt($curl, CURLOPT_URL, $url);
  56. curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
  57. $content = curl_exec($curl);
  58. if (curl_errno($curl) != 0)
  59. {
  60. return false;
  61. }
  62. $separator = '/\r\n\r\n|\n\n|\r\r/';
  63. list($http_header, $http_body) = preg_split($separator, $content, 2);
  64. curl_close($curl);
  65. return $http_body;
  66. }
  67. function _fsockopen($url)
  68. {
  69. $crlf = $this->get_crlf();
  70. //格式化URL
  71. $this->format_url($url);
  72. $handle = fsockopen($this->purl["host"], $this->purl['port'], $errno, $errstr, $this->timeout);
  73. if(!$handle)
  74. {
  75. return false;
  76. }
  77. set_time_limit($this->timeout);
  78. //取得内容信息
  79. $urlext = $this->purl["path"];
  80. if($urlext != "/" && $this->purl["query"])
  81. {
  82. $urlext .= "?";
  83. $urlext .= $this->purl["query"];
  84. if($this->purl["fragment"])
  85. {
  86. $urlext .= "#".$this->purl["fragment"];
  87. }
  88. }
  89. $out = "GET ".$urlext." HTTP/1.1".$crlf;
  90. $out.= "Host: ".$this->purl["host"].$crlf;
  91. $out.= "Referer: ".$this->purl['protocol'].$this->purl["host"].$crlf;
  92. $out.= "Connection: Close".$crlf.$crlf;
  93. if(!fwrite($handle, $out))
  94. {
  95. return false;
  96. }
  97. $content = "";
  98. while(!feof($handle))
  99. {
  100. $content .= fgets($handle);
  101. }
  102. fclose($handle);
  103. $separator = '/\r\n\r\n|\n\n|\r\r/';
  104. list($http_header, $http_body) = preg_split($separator, $content, 2);
  105. if (strpos(strtolower($http_header), "transfer-encoding: chunked") !== FALSE)
  106. {
  107. $http_body = $this->unchunkHttp11($http_body);
  108. }
  109. return $http_body;
  110. }
  111. function unchunkHttp11($data)
  112. {
  113. $fp = 0;
  114. $outData = "";
  115. while ($fp < strlen($data))
  116. {
  117. $rawnum = substr($data, $fp, strpos(substr($data, $fp), "\r\n") + 2);
  118. $num = hexdec(trim($rawnum));
  119. $fp += strlen($rawnum);
  120. $chunk = substr($data, $fp, $num);
  121. $outData .= $chunk;
  122. $fp += strlen($chunk);
  123. }
  124. return $outData;
  125. }
  126. function get_crlf()
  127. {
  128. $crlf = '';
  129. if (strtoupper(substr(PHP_OS, 0, 3) === 'WIN'))
  130. {
  131. $crlf = "\r\n";
  132. }
  133. elseif (strtoupper(substr(PHP_OS, 0, 3) === 'MAC'))
  134. {
  135. $crlf = "\r";
  136. }
  137. else
  138. {
  139. $crlf = "\n";
  140. }
  141. return $crlf;
  142. }
  143. function format_url($url)
  144. {
  145. $this->purl = parse_url($url);
  146. if (!isset($this->purl['host']))
  147. {
  148. if(isset($_SERVER["HTTP_HOST"]))
  149. {
  150. $this->purl['host'] = $_SERVER["HTTP_HOST"];
  151. }
  152. elseif(isset($_SERVER["SERVER_NAME"]))
  153. {
  154. $this->purl['host'] = $_SERVER["SERVER_NAME"];
  155. }
  156. else
  157. {
  158. $this->purl['host'] = "localhost";
  159. }
  160. }
  161. if(!isset($_SERVER["HTTPS"]) || $_SERVER["HTTPS"] == "off" || $_SERVER["HTTPS"] == "")
  162. {
  163. $this->purl['scheme'] = "http";
  164. }
  165. else
  166. {
  167. $this->purl['scheme'] = "https";
  168. }
  169. $this->purl['port'] = $_SERVER["SERVER_PORT"] ? $_SERVER["SERVER_PORT"] : 80;
  170. if(!isset($this->purl['path']))
  171. {
  172. $this->purl['path'] = "/";
  173. }
  174. elseif(($this->purl['path']{0} != '/') && ($_SERVER["PHP_SELF"]{0} == '/'))
  175. {
  176. $this->purl['path'] = substr($_SERVER["PHP_SELF"], 0, strrpos($_SERVER["PHP_SELF"], '/') + 1) . $this->purl['path'];
  177. }
  178. return $this->purl;
  179. }
  180. }
  181. ?>