PDA

View Full Version : سوال: UTF-8 و مشکلات در تبدیل rtf به html



imanvax
دوشنبه 29 تیر 1394, 10:39 صبح
سلام من از این کلاس برای تبدیل rtf به html استفاده کردم و در آخر کد هم نمونه شی رو برای تست ایجاد کردم . اما برای فایل های rtf که فارسی هستند همه چیز به هم میریزه که قاعدتا باید مربوط به یونیکد باشه . حالا اگر فرمی داشته باشیم که کاربری ده ها فایل رو برای آپلود بگذاره ( یعنی ما کنترلی رو تنظیمات فایل ورودی نداریم) وما ازین کلاس برای تبدیل اون فایل ها استفاده کنیم چگونه این مشکل فارسی رو حل کنیم؟


<?php class RtfElement { protected function Indent($level) { for($i = 0; $i < $level * 2; $i++) echo "&nbsp;"; } } class RtfGroup extends RtfElement { public $parent; public $children; public function __construct() { $this->parent = null; $this->children = array(); } public function GetType() { if(sizeof($this->children) == 0) return null; $child = $this->children[0]; if(get_class($child) != "RtfControlWord") return null; return $child->word; } public function IsDestination() { if(sizeof($this->children) == 0) return null; $child = $this->children[0]; if(get_class($child) != "RtfControlSymbol") return null; return $child->symbol == '*'; } public function dump($level = 0) { echo "<div>"; $this->Indent($level); echo "{"; echo "</div>"; foreach($this->children as $child) { if(get_class($child) == "RtfGroup") { if ($child->GetType() == "fonttbl") continue; if ($child->GetType() == "colortbl") continue; if ($child->GetType() == "stylesheet") continue; if ($child->GetType() == "info") continue; if (substr($child->GetType(), 0, 4) == "pict") continue; if ($child->IsDestination()) continue; } $child->dump($level + 2); } echo "<div>"; $this->Indent($level); echo "}"; echo "</div>"; } } class RtfControlWord extends RtfElement { public $word; public $parameter; public function dump($level) { echo "<div style='color:green'>"; $this->Indent($level); echo "WORD {$this->word} ({$this->parameter})"; echo "</div>"; } } class RtfControlSymbol extends RtfElement { public $symbol; public $parameter = 0; public function dump($level) { echo "<div style='color:blue'>"; $this->Indent($level); echo "SYMBOL {$this->symbol} ({$this->parameter})"; echo "</div>"; } } class RtfText extends RtfElement { public $text; public function dump($level) { echo "<div style='color:red'>"; $this->Indent($level); echo "TEXT {$this->text}"; echo "</div>"; } } class RtfReader { public $root = null; protected function GetChar() { $this->char = $this->rtf[$this->pos++]; } protected function ParseStartGroup() { $group = new RtfGroup(); if($this->group != null) $group->parent = $this->group; if($this->root == null) { $this->group = $group; $this->root = $group; } else { array_push($this->group->children, $group); $this->group = $group; } } protected function is_letter() { if(ord($this->char) >= 65 && ord($this->char) <= 90) return TRUE; if(ord($this->char) >= 90 && ord($this->char) <= 122) return TRUE; return FALSE; } protected function is_digit() { if(ord($this->char) >= 48 && ord($this->char) <= 57) return TRUE; return FALSE; } protected function ParseEndGroup() { $this->group = $this->group->parent; } protected function ParseControlWord() { $this->GetChar(); $word = ""; while($this->is_letter()) { $word .= $this->char; $this->GetChar(); } $parameter = null; $negative = false; if($this->char == '-') { $this->GetChar(); $negative = true; } while($this->is_digit()) { if($parameter == null) $parameter = 0; $parameter = $parameter * 10 + $this->char; $this->GetChar(); } if($parameter === null) $parameter = 1; if($negative) $parameter = -$parameter; if($word == "u") { } else { if($this->char != ' ') $this->pos--; } $rtfword = new RtfControlWord(); $rtfword->word = $word; $rtfword->parameter = $parameter; array_push($this->group->children, $rtfword); } protected function ParseControlSymbol() { $this->GetChar(); $symbol = $this->char; $parameter = 0; if($symbol == '\'') { $this->GetChar(); $parameter = $this->char; $this->GetChar(); $parameter = hexdec($parameter . $this->char); } $rtfsymbol = new RtfControlSymbol(); $rtfsymbol->symbol = $symbol; $rtfsymbol->parameter = $parameter; array_push($this->group->children, $rtfsymbol); } protected function ParseControl() { $this->GetChar(); $this->pos--; if($this->is_letter()) $this->ParseControlWord(); else $this->ParseControlSymbol(); } protected function ParseText() { $text = ""; do { $terminate = false; $escape = false; if($this->char == '\\') { $this->GetChar(); switch($this->char) { case '\\': $text .= '\\'; break; case '{': $text .= '{'; break; case '}': $text .= '}'; break; default: $this->pos = $this->pos - 2; $terminate = true; break; } } else if($this->char == '{' || $this->char == '}') { $this->pos--; $terminate = true; } if(!$terminate && !$escape) { $text .= $this->char; $this->GetChar(); } } while(!$terminate && $this->pos < $this->len); $rtftext = new RtfText(); $rtftext->text = $text; array_push($this->group->children, $rtftext); } public function Parse($rtf) { $this->rtf = $rtf; $this->pos = 0; $this->len = strlen($this->rtf); $this->group = null; $this->root = null; while($this->pos < $this->len) { $this->GetChar(); if($this->char == "\n" || $this->char == "\r") continue; switch($this->char) { case '{': $this->ParseStartGroup(); break; case '}': $this->ParseEndGroup(); break; case '\\': $this->ParseControl(); break; default: $this->ParseText(); break; } } } } class RtfState { public function __construct() { $this->Reset(); } public function Reset() { $this->bold = false; $this->italic = false; $this->underline = false; $this->end_underline = false; $this->strike = false; $this->hidden = false; $this->fontsize = 0; } } class RtfHtml { public function Format($root) { $this->output = ""; $this->states = array(); $this->state = new RtfState(); array_push($this->states, $this->state); $this->FormatGroup($root); return $this->output; } protected function FormatGroup($group) { if ($group->GetType() == "fonttbl") return; if ($group->GetType() == "colortbl") return; if ($group->GetType() == "stylesheet") return; if ($group->GetType() == "info") return; if (substr($group->GetType(), 0, 4) == "pict") return; if ($group->IsDestination()) return; $this->state = clone $this->state; array_push($this->states, $this->state); foreach($group->children as $child) { if(get_class($child) == "RtfGroup") $this->FormatGroup($child); if(get_class($child) == "RtfControlWord") $this->FormatControlWord($child); if(get_class($child) == "RtfControlSymbol") $this->FormatControlSymbol($child); if(get_class($child) == "RtfText") $this->FormatText($child); } array_pop($this->states); $this->state = $this->states[sizeof($this->states)-1]; } protected function FormatControlWord($word) { if($word->word == "plain") $this->state->Reset(); if($word->word == "b") $this->state->bold = $word->parameter; if($word->word == "i") $this->state->italic = $word->parameter; if($word->word == "ul") $this->state->underline = $word->parameter; if($word->word == "ulnone") $this->state->end_underline = $word->parameter; if($word->word == "strike") $this->state->strike = $word->parameter; if($word->word == "v") $this->state->hidden = $word->parameter; if($word->word == "fs") $this->state->fontsize = ceil(($word->parameter / 24) * 16); if($word->word == "par") $this->output .= "<p>"; if($word->word == "lquote") $this->output .= "&lsquo;"; if($word->word == "rquote") $this->output .= "&rsquo;"; if($word->word == "ldblquote") $this->output .= "&ldquo;"; if($word->word == "rdblquote") $this->output .= "&rdquo;"; if($word->word == "emdash") $this->output .= "&mdash;"; if($word->word == "endash") $this->output .= "&ndash;"; if($word->word == "bullet") $this->output .= "&bull;"; if($word->word == "u") $this->output .= "&loz;"; } protected function BeginState() { $span = ""; if($this->state->bold) $span .= "font-weight:bold;"; if($this->state->italic) $span .= "font-style:italic;"; if($this->state->underline) $span .= "text-decoration:underline;"; if($this->state->end_underline) $span .= "text-decoration:none;"; if($this->state->strike) $span .= "text-decoration:strikethrough;"; if($this->state->hidden) $span .= "display:none;"; if($this->state->fontsize != 0) $span .= "font-size: {$this->state->fontsize}px;"; $this->output .= "<span style='{$span}'>"; } protected function EndState() { $this->output .= "</span>"; } protected function FormatControlSymbol($symbol) { if($symbol->symbol == '\'') { $this->BeginState(); $this->output .= htmlentities(chr($symbol->parameter), ENT_QUOTES, 'ISO-8859-1'); $this->EndState(); } } protected function FormatText($text) { $this->BeginState(); $this->output .= $text->text; $this->EndState(); } }echo<<<_END<html><head><meta http-equiv="Expires" content="Fri, Jan 01 1900 00:00:00 GMT"><meta http-equiv="Pragma" content="no-cache"><meta http-equiv="Cache-Control" content="no-cache"><meta http-equiv="Content-Type" content="text/html; charset=utf-8"><title>Untitled</title><link rel="stylesheet" type="text/css" href="my.css"></head><body>_END;$reader = new RtfReader();$rtf = file_get_contents("test.rtf"); $reader->Parse($rtf);$reader->root->dump();$formatter = new RtfHtml(); $text=$formatter->Format($reader->root);echo $text;echo<<<_END</body></html>_END;

?>

m_r_1989
دوشنبه 29 تیر 1394, 22:27 عصر
حتما تو فایل اون کلاسی که داری استفاده می کنی یکجا فرمتش مشخص شده بگردی می تونی پیدا کنی



کف کاذب (http://adsa.ir)

imanvax
سه شنبه 30 تیر 1394, 17:54 عصر
[QUOTE=m_r_1989;2242414]حتما تو فایل اون کلاسی که داری استفاده می کنی یکجا فرمتش مشخص شده بگردی می تونی پیدا کنی


کلاس رو در پست آوردم . چیزی پیدا نمیشه !

کلا مشکل من تبدیل یک rtf یا ورد فارسی به html قابل نمایش در سایت هست !!!