加入收藏 | 设为首页 | 会员中心 | 我要投稿 淮安站长网 (https://www.0517zz.com.cn/)- 数据开发、人脸识别、智能机器人、图像处理、语音技术!
当前位置: 首页 > 站长学院 > PHP教程 > 正文

PHP采集程序中常用的函数

发布时间:2022-06-20 08:56:04 所属栏目:PHP教程 来源:互联网
导读:函数描述及例子 PHP采集程序中常用的函数 查询关键字 PHP采集程序中常用的函数 //获得当前的脚本网址 function get_php_url(){ if(!emptyempty($_SERVER[REQUEST_URI])){ $scriptName = $_SERVER[REQUEST_URI]; $nowurl = $scriptName; }else{ $scriptName =
  函数描述及例子 PHP采集程序中常用的函数 查询关键字 PHP采集程序中常用的函数
  //获得当前的脚本网址   
  function get_php_url(){   
          if(!emptyempty($_SERVER["REQUEST_URI"])){   
                  $scriptName = $_SERVER["REQUEST_URI"];   
                  $nowurl = $scriptName;   
          }else{   
                  $scriptName = $_SERVER["PHP_SELF"];   
                  if(emptyempty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;   
                  else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];   
          }   
          return $nowurl;   
  }   
  //把全角数字转为半角数字   
  function GetAlabNum($fnum){   
          $nums = array("0","1","2","3","4","5","6","7","8","9");   
          $fnums = "0123456789";   
          for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);   
          $fnum = ereg_replace("[^0-9.]|^0{1,}","",$fnum);   
          if($fnum=="") $fnum=0;   
          return $fnum;   
  }   
  //去除HTML标记   
  function Text2Html($txt){   
          $txt = str_replace("  "," ",$txt);   
          $txt = str_replace("<","<",$txt);   
          $txt = str_replace(">",">",$txt);   
          $txt = preg_replace("/[rn]{1,}/isU","  
  rn",$txt);   
          return $txt;   
  }  
  //清除HTML标记   
  function ClearHtml($str){   
          $str = str_replace('<','<',$str);   
          $str = str_replace('>','>',$str);   
          return $str;   
  }   
  //相对路径转化成绝对路径   
  function relative_to_absolute($content, $feed_url) {   
      preg_match('/(http|https|ftp):///', $feed_url, $protocol);   
      $server_url = preg_replace("/(http|https|ftp|news):///", "", $feed_url);   
      $server_url = preg_replace("//.*/", "", $server_url);  
      if ($server_url == '') {   
          return $content;   
      }  
      if (isset($protocol[0])) {   
          $new_content = preg_replace('/href="//', 'href="'.$protocol[0].$server_url.'/', $content);   
          $new_content = preg_replace('/src="//', 'src="'.$protocol[0].$server_url.'/', $new_content);   
      } else {   
          $new_content = $content;   
      }   
      return $new_content;   
  }   
  //取得所有链接   
  function get_all_url($code){   
          preg_match_all('/<as+href=["|']?([^>"' ]+)["|']?s*[^>]*>([^>]+)</a>/i',$code,$arr);   
          return array('name'=>$arr[2],'url'=>$arr[1]);   
  }  
  //获取指定标记中的内容   
  function get_tag_data($str, $start, $end){   
          if ( $start == '' || $end == '' ){   
                 return;   
          }   
          $str = explode($start, $str);   
          $str = explode($end, $str[1]);   
          return $str[0];   
  }   
  //HTML表格的每行转为CSV格式数组   
  function get_tr_array($table) {   
          $table = preg_replace("'<td[^>]*?>'si",'"',$table);   
          $table = str_replace("",'",',$table);   
          $table = str_replace("","{tr}",$table);   
          //去掉 HTML 标记   
          $table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);   
          //去掉空白字符   
          $table = preg_replace("'([rn])[s]+'","",$table);   
          $table = str_replace(" ","",$table);   
          $table = str_replace(" ","",$table);  
          $table = explode(",{tr}",$table);   
          array_pop($table);   
          return $table;   
  }  
  //将HTML表格的每行每列转为数组,采集表格数据   
  function get_td_array($table) {   
          $table = preg_replace("'<table[^>]*?>'si","",$table);   
          $table = preg_replace("'<tr[^>]*?>'si","",$table);   
          $table = preg_replace("'<td[^>]*?>'si","",$table);   
          $table = str_replace("","{tr}",$table);   
          $table = str_replace("","{td}",$table);   
          //去掉 HTML 标记   
          $table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);   
          //去掉空白字符   
          $table = preg_replace("'([rn])[s]+'","",$table);   
          $table = str_replace(" ","",$table);   
          $table = str_replace(" ","",$table);   
            
          $table = explode('{tr}', $table);   
          array_pop($table);   
          foreach ($table as $key=>$tr) {   
                  $td = explode('{td}', $tr);   
                  array_pop($td);   
              $td_array[] = $td;   
          }   
          return $td_array;   
  }  
  //返回字符串中的所有单词 $distinct=true 去除重复   
  function split_en_str($str,$distinct=true) {   
          preg_match_all('/([a-zA-Z]+)/',$str,$match);   
          if ($distinct == true) {   
                  $match[1] = array_unique($match[1]);   
          }   
          sort($match[1]);   
          return $match[1];   
  }  
     
  函数描述及例子  
     
  PHP采集程序中常用的函数  
   
  查询关键字  
     
  PHP采集程序中常用的函数  
  <!--?  
  //获得当前的脚本网址   
  function get_php_url(){   
          if(!emptyempty($_SERVER["REQUEST_URI"])){   
                  $scriptName = $_SERVER["REQUEST_URI"];   
                  $nowurl = $scriptName;   
          }else{   
                  $scriptName = $_SERVER["PHP_SELF"];   
                  if(emptyempty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;   
                  else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];   
          }   
          return $nowurl;   
  }   
  //把全角数字转为半角数字   
  function GetAlabNum($fnum){   
          $nums = array("0","1","2","3","4","5","6","7","8","9");   
          $fnums = "0123456789";   
          for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);   
          $fnum = ereg_replace("[^0-9.]|^0{1,}","",$fnum);   
          if($fnum=="") $fnum=0;   
          return $fnum;   
  }   
  //去除HTML标记   
  function Text2Html($txt){   
          $txt = str_replace("  "," ",$txt);   
          $txt = str_replace("<","<",$txt);   
          $txt = str_replace("-->",">",$txt);   
          $txt = preg_replace("/[rn]{1,}/isU","  
  rn",$txt);   
          return $txt;   
  }  
  //清除HTML标记   
  function ClearHtml($str){   
          $str = str_replace('<','<',$str);   
          $str = str_replace('>','>',$str);   
          return $str;   
  }   
  //相对路径转化成绝对路径   
  function relative_to_absolute($content, $feed_url) {   
      preg_match('/(http|https|ftp):///', $feed_url, $protocol);   
      $server_url = preg_replace("/(http|https|ftp|news):///", "", $feed_url);   
      $server_url = preg_replace("//.*/", "", $server_url);  
      if ($server_url == '') {   
          return $content;   
      }  
      if (isset($protocol[0])) {   
          $new_content = preg_replace('/href="//', 'href="'.$protocol[0].$server_url.'/', $content);   
          $new_content = preg_replace('/src="//', 'src="'.$protocol[0].$server_url.'/', $new_content);   
      } else {   
          $new_content = $content;   
      }   
      return $new_content;   
  }   
  //取得所有链接   
  function get_all_url($code){   
          preg_match_all('/<as+href=["|']?([^>"' ]+)["|']?s*[^>]*>([^>]+)</a>/i',$code,$arr);   
          return array('name'=>$arr[2],'url'=>$arr[1]);   
  }  
  //获取指定标记中的内容   
  function get_tag_data($str, $start, $end){   
          if ( $start == '' || $end == '' ){   
                 return;   
          }   
          $str = explode($start, $str);   
          $str = explode($end, $str[1]);   
          return $str[0];   
  }   
  //HTML表格的每行转为CSV格式数组   
  function get_tr_array($table) {   
          $table = preg_replace("'<td[^>]*?>'si",'"',$table);   
          $table = str_replace("",'",',$table);   
          $table = str_replace("","{tr}",$table);   
          //去掉 HTML 标记   
          $table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);   
          //去掉空白字符   
          $table = preg_replace("'([rn])[s]+'","",$table);   
          $table = str_replace(" ","",$table);   
          $table = str_replace(" ","",$table);  
          $table = explode(",{tr}",$table);   
          array_pop($table);   
          return $table;   
  }  
  //将HTML表格的每行每列转为数组,采集表格数据   
  function get_td_array($table) {   
          $table = preg_replace("'<table[^>]*?>'si","",$table);   
          $table = preg_replace("'<tr[^>]*?>'si","",$table);   
          $table = preg_replace("'<td[^>]*?>'si","",$table);   
          $table = str_replace("","{tr}",$table);   
          $table = str_replace("","{td}",$table);   
          //去掉 HTML 标记   
          $table = preg_replace("'<[/!]*?[^<>]*?>'si","",$table);   
          //去掉空白字符   
          $table = preg_replace("'([rn])[s]+'","",$table);   
          $table = str_replace(" ","",$table);   
          $table = str_replace(" ","",$table);   
            
          $table = explode('{tr}', $table);   
          array_pop($table);   
          foreach ($table as $key=>$tr) {   
                  $td = explode('{td}', $tr);   
                  array_pop($td);   
              $td_array[] = $td;   
          }   
          return $td_array;   
  }  
  //返回字符串中的所有单词 $distinct=true 去除重复   
  function split_en_str($str,$distinct=true) {   
          preg_match_all('/([a-zA-Z]+)/',$str,$match);   
          if ($distinct == true) {   
                  $match[1] = array_unique($match[1]);   
          }   
          sort($match[1]);   
          return $match[1];   
  }  
     
  </td[^></tr[^></table[^></td[^></as+href=["|']?([^></td[^></tr[^></table[^></td[^></as+href=["|']?([^> 

(编辑:淮安站长网)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    热点阅读