PHP XML 파일 Pasrsing

PHP 2008. 10. 1. 11:41

// 파일명 getXML.php


<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=euc-kr">
<meta name="robots" content="noindex, nofollow">
</head>
<?
session_start();
error_reporting(E_ALL^ E_NOTICE); # report all errors
ini_set("display_errors", "1"); # but do not echo the errors "0" or "1")


include_once "../../common/config/common.php";
include_once "../../common/inc/db_con.inc.php";
include_once "../../common/inc/function.inc.php";

$sdate = RemoveXSS($_REQUEST["sdate"]);
$umode = RemoveXSS($_REQUEST["umode"]);

class RSSParser {

 var $feed_info = array();
 var $feed_articles = array();
 var $inchannel = FALSE;
 var $initem = FALSE;
 var $inimage = FALSE;
 var $current_item = array();
 var $current_el = FALSE;

 // 여는 태그 처리
 function startElement($parser, $name, $attrs)
 {
  $el = strtoupper($name);
  if ($el == 'RSS') {
   return;
  } else if ($el == 'CHANNEL') {
   $this->inchannel = TRUE;
  } else if ($el == 'ITEM') {
   $this->initem = TRUE;
  } else {
   $this->current_el = $el;
  }
 }

 // 닫는 태그 처리
 function endElement($parser, $name)
 {
  $el = strtoupper($name);

  //아래 'CHANNEL', 'ITEM' 부분에서 제공해주는 사이트의 XML 부분에 따라 필드셋을 변경해준다.
  if ($el == 'RSS') {
   return;
  } else if ($el == 'CHANNEL') {
   $this->inchannel = FALSE;
  } else if ($el == 'ITEM') {
   $this->feed_articles[] = $this->current_item;
   $this->current_item = array();
   $this->initem = FALSE;
  } else {
   $this->current_el = FALSE;
  }
 }

 // 태그 사이의 데이터 처리
 function characterData($parser, $data)
 {
  if ($this->initem) {
   if ($this->current_el) {
    $this->current_item[$this->current_el] .= $data;
   }
  } else if ($this->inimage) {
  } else if ($this->inchannel) {
   if ($this->current_el) {
    $this->feed_info[$this->current_el] .= $data;
   }
  }
 }
}

function parse_save_rss($document)
{
 global $cu_conn;

 // RSS 피드의 인코딩을 UTF-8에 맞춤
 if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $document, $m)) {
  $in_enc = strtoupper($m[1]);
  if ($in_enc != 'UTF-8') {
   $document = preg_replace('/(<?xml.*encoding=[\'"])(.*?)([\'"].*?>)/m', '$1EUC-KR$3', $document);
   $document = iconv($in_enc, 'EUC-KR', $document);
  }
 }
 
 /*
 $document = str_replace("&lt;", "<", $document);
 $document = str_replace("&gt;", ">", $document);
 */

 // XML 및 RSS 분석기 생성
 $xml_parser = xml_parser_create('ISO-8859-1');
 $rss_parser = new RSSParser();

 xml_set_object($xml_parser, $rss_parser);
 xml_set_element_handler($xml_parser, "startElement", "endElement");
 xml_set_character_data_handler($xml_parser, "characterData");

 if (!xml_parse($xml_parser, $document, true)) {
  printf("XML error: %s at line %d \n", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser));
 } else {
  foreach ($rss_parser->feed_articles as $article) {

   /*
   echo $article["LINK"]."<br />";
   echo $article["TITLE"]."<br />";
   echo $article["DESCRIPTION"]."<br />";
   echo $article["PUBDATE"]."<br /><br /><br />";
   */

   $title = addslashes($article["TITLE"]);
   $description = $article["DESCRIPTION"];
   //$description = addslashes($article["DESCRIPTION"]);
   $link = addslashes($article["LINK"]);
   $pub_date = $article["PUBDATE"];

   if($modify_date == "") $modify_date = $register_date;
   
   $query = "SELECT NVL(MAX(UNO), 0) + 1 AS NEW_UNO, NVL(MAX(GNO), 0) + 1 AS NEW_GNO FROM TBWB_BOARD_P06 " ;
   $req = cubrid_execute($cu_conn, $query);
   $rows = cubrid_fetch($req);

   $new_uno = $rows["NEW_UNO"];
   $new_gno = $rows["NEW_GNO"];
   $new_reply_depth = "A";

   // Insert Or Update
   $query  = " INSERT INTO TBWB_BOARD_P06 ";
   $query .= " (UNO, GNO, REPLY_DEPTH, MEMB_ID, NICKNAME, SUBJECT, CONTENT, HTMLTAG_YN, ACCESS_IP, PUB_DATE, INS_DATE, UPD_DATE, UPLOAD_MULTIMEDIA_TYPE, RECOMMEND_YN, SUMMARY, FLV_URL, CATE_CODE, SUB_TITLE)";
   $query .= " VALUES";
   $query .= " ($new_uno, $new_gno, '$new_reply_depth', '".$_SESSION['SS_ADMIN_ID']."', '".$_SESSION['SS_ADMIN_NICKNAME']."', '$title', '$description', 'Y', '127.0.0.1', 'TO_TIMESTAMP('$pub_date','YYYY.MM.DD HH24:MI')', sys_timestamp, sys_timestamp, '$upload_multimedia_type', 'N', '$summary', '$flv_url', '$cate_code', '$sub_title') ";
   echo $query."<br /><br /><br />";

   //$req = cubrid_execute($cu_conn, $query);
   //exit;
   //@cubrid_close_request($req);
  }
 }

 xml_parser_free($xml_parser);
}

// 읽어올 피드 목록
//local 서버에 존재하는 xml 파일로 바로 파싱할때
$feed_urls = array('./dataXML.xml'); //80포트 일 경우 해당 URL을 바로 입력시 웹XML 파일 파싱 가능

foreach ($feed_urls as $url) {

 $handle = fopen($url, 'r');
 if ($handle) {
  $document = '';
  while (!feof($handle)) {
   $document .= fgets($handle, 4096);
  }

  //echo $document;
  //echo "url : ".$url."\n";
  // 읽어온 피드를 분석하여 DB에 저장
  parse_save_rss($document);

  fclose($handle);
 }
}


/* FTP 상의 XML 파일을 파싱할때
// define some variables
$server_file = '';
$local_file = '';

// set up basic connection
$conn_id = ftp_connect("");

// login with username and password
$login_result = ftp_login($conn_id, "tantanloan", "*qlalft");

// try to download $server_file and save to $local_file
if (ftp_get($conn_id, $local_file, $server_file, FTP_BINARY)) {
    echo "Successfully written to $local_file\n";
} else {
    echo "There was a problem\n";
}

// close the connection
ftp_close($conn_id);
*/

?>

Posted by 철냄비짱
,