forked from: HTML→XHTML変換(改良版)
フィードを解析するときに内容が(整形式でない)HTMLだとAS3のXMLクラスで扱えないので
HTMLを正規表現でXHTMLに変換するテスト
TLFでも表示するようにしてみた
/**
* Copyright riafeed ( http://wonderfl.net/user/riafeed )
* MIT License ( http://www.opensource.org/licenses/mit-license.php )
* Downloaded from: http://wonderfl.net/c/cWrd
*/
// forked from riafeed's HTML→XHTML変換(改良版)
// forked from riafeed's HTML→XHTML変換
/*
フィードを解析するときに内容が(整形式でない)HTMLだとAS3のXMLクラスで扱えないので
HTMLを正規表現でXHTMLに変換するテスト
ネストに対応してみた
*/
package {
import flash.display.AVM1Movie;
import flash.display.Sprite;
import flash.text.TextField;
import flash.text.TextFieldType;
import flash.display.Sprite;
import flash.events.Event;
import com.bit101.components.PushButton;
import flashx.textLayout.container.*;
import flashx.textLayout.elements.*;
import flashx.textLayout.formats.*;
import flashx.textLayout.conversion.TextConverter;
import spark.utils.TextFlowUtil;
public class FlashTest extends Sprite {
private var _text:TextField;
private var _text2:TextField;
private var _btn:PushButton;
private var _flow:TextFlow = null;
private var _spr:Sprite;
public function FlashTest() {
// write as3 code here..
if (stage) init();
else addEventListener(Event.ADDED_TO_STAGE, init);
}
public function init(e:Event = null):void {
_text = new TextField();
_text.type = TextFieldType.INPUT;
_text.border = true;
_text.multiline = true
_text.width = 465;
_text.height = 120;
_text.wordWrap = true;
_text.y = 0;
addChild(_text);
_btn = new PushButton(stage, 0, 120, "Convert", PushEventHandler);
_btn = new PushButton(stage, 100, 120, "HTML View", PushHTMLEventHandler);
_btn = new PushButton(stage, 200, 120, "TextFlow View", PushTextFlowHandler);
_text2 = new TextField();
_text2.width = _text2.height = 465;
_text2.wordWrap = true;
_text2.multiline = true;
_text2.y = 150;
//_text2.border = true;
addChild(_text2);
_spr = new Sprite();
addChild(_spr);
_spr.graphics.beginFill(0xFFFFFF);
_spr.graphics.drawRect(0, 0, 465, 465);
_spr.graphics.endFill();
_spr.width = _spr.height = 465;
_spr.x = 0;
_spr.y = 150;
}
private function PushEventHandler(e:Event):void {
_spr.visible = false;
_text2.visible = true;
var ret:String = convHTML(_text.text);
_text2.text = ret;
try {
var xmltest:XML = new XML(ret);
_text2.textColor = Number(0x000000);
}catch(e:Error){
_text2.textColor = Number(0xff0000);
_text2.appendText("\nこのXMLは整形式になっていません:" + e);
}
}
private function PushHTMLEventHandler(e:Event):void {
_spr.visible = false;
_text2.visible = true;
var ret:String = convHTML(_text.text);
_text2.htmlText = ret;
try {
var xmltest:XML = new XML(ret);
}catch(e:Error){
_text2.textColor = Number(0xff0000);
_text2.appendText("\nこのXMLは整形式になっていません:" + e);
}
}
private function PushTextFlowHandler(e:Event):void {
_spr.visible = true;
_text2.visible = false;
var ret:String = convHTML(_text.text);
//var ret:String = _text.text;
if(_flow != null) _flow.flowComposer.removeAllControllers();
try {
var xmltest:XML = new XML(ret);
_flow = TextConverter.importToFlow(ret, TextConverter.TEXT_FIELD_HTML_FORMAT);
}catch(e:Error){
_flow = TextConverter.importToFlow("<font color=\"#FF0000\">このXMLは整形式になっていません:" + e + "</font>", TextConverter.TEXT_FIELD_HTML_FORMAT);
}
_flow.flowComposer.addController(new ContainerController(_spr,465,465));
_flow.flowComposer.updateAllControllers();
}
public function convHTML(src:String):String {
var temp:String = src;
//HTMLでは終了タグの省略が認められていた要素に終了タグを付加(<li>~ → <li>~</li>)
/*
考え方:
終了タグが省略できる要素だけでネストすることは物理的にありえない(判別できないので)ことを利用して
ベースとなる親要素(tr要素のベースはtbody要素といった感じで)と共にネストの深さをカウントし、
ネストの深さが同じ時にだけ終了タグを補完することで間違った位置に終了タグを補完する現象を避ける
*/
var buf:String = "";
var tag:String;
var tagname:String;
var idx:int = 0;
var pin:int = 0;
//関係タグのネスト数
var tagnest:Object =
{
html:0,
head:0,
body:0,
p:0,
uol:0, //ulタグとolタグはまとめてカウント
li:0,
table:0,
tr:0,
tdh:0, //tdタグとthタグはまとめてカウント
tbody:0,
thead:0,
tfoot:0,
colgroup:0,
option:0,
dl:0,
dd:0,
//tdタグとthタグはあえてまとめてカウントしているのでどっちの終了タグを補完するかを記憶するための配列
tdhnest:[],
//dtタグは仕様上ネストごとに出てきたり出てこなかったりする場合がありネストごとに個別にカウントする必要があるので専用の配列を使う
dtnest:[],
stack:[],
pushtag:function(tagname:String):void {
var nopush:Boolean = false;
//タグごとに処理
switch(tagname) {
//空要素をXML形式に変換(<img src=""> → <img src="" />)
case "hr":
case "br":
case "img":
case "input":
case "param":
case "col":
case "area":
case "base":
case "link":
case "isindex":
case "meta":
case "basefont":{
tag = tag.replace(/<(.*?)>/, "<$1 />");
nopush = true;
break;
}
case "head":
case "body":
//headタグ内でhead,bodyが出てきたらheadの閉じタグを補完
if(tagnest["head"] == 1) {
tagnest.poptag("head");
}
//bodyタグ内でhead,bodyが出てきたらbodyの閉じタグを補完(文法違反だけど一応)
if(tagnest["body"] == 1) {
tagnest.poptag("body");
}
case "html":
tagnest[tagname] = 1;
break;
//段落タグ
case "p":
if(tagnest["p"] == 1) {
tagnest.poptag("p");
}
tagnest["p"] = 1;
break;
//リストタグ
case "ul":
case "ol":
tagnest["uol"]++;
break;
case "li":
if(tagnest["uol"] == 0) {
//ul,olタグ外でliタグが来たらとりあえずulタグを補完
buf += "<ul>";
tagnest.stack.push("ul");
tagnest["uol"] = 1;
}else if(tagnest["li"] == tagnest["uol"]) {
tagnest.poptag("li");
}
tagnest["li"]++;
break;
//フォームの選択タグ
case "option":
if(tagnest["option"] == 1) {
tagnest.poptag("option");
}
tagnest["option"] = 1;
break;
case "optgroup":
if(tagnest["option"] == 1) {
tagnest.poptag("option");
}
break;
//構造化リストタグ
case "dl":
tagnest["dl"]++;
break;
case "dt":
case "dd":
if(tagnest["dd"] == tagnest["dl"]) {
tagnest.poptag("dd");
}else if(tagnest.dtnest[tagnest["dl"]] == 1) {
tagnest.poptag("dt");
}
if(tagname == "dt"){
tagnest.dtnest[tagnest["dl"]] = 1;
}else{
tagnest["dd"]++;
}
break;
//テーブルタグ
case "table":
tagnest["table"]++;
break;
case "tr":
if(tagnest["colgroup"] == tagnest["table"]) {
tagnest.poptag("colgroup");
}
if(tagnest["tdh"] == tagnest["table"]) {
tagnest.poptag(tagnest.tdhnest[tagnest["tdh"]], true);
}
if(tagnest["tr"] == tagnest["table"]) {
tagnest.poptag("tr");
}
if(tagnest["tbody"] < tagnest["table"] && tagnest["thead"] < tagnest["table"] && tagnest["tfoot"] < tagnest["table"]) {
buf += "<tbody>";
tagnest.stack.push("tbody");
tagnest["tbody"]++;
}
tagnest["tr"]++;
break;
case "td":
case "th":
if(tagnest["tdh"] == tagnest["table"]) {
tagnest.poptag(tagnest.tdhnest[tagnest["tdh"]], true);
}
tagnest["tdh"]++;
tagnest.tdhnest[tagnest["tdh"]] = tagname;
break;
case "thead":
case "tfoot":
case "tbody":
if(tagnest["colgroup"] == tagnest["table"]) {
tagnest.poptag("colgroup");
}
if(tagnest["tbody"] == tagnest["table"]) {
tagnest.poptag("tbody");
}
if(tagnest["thead"] == tagnest["table"]) {
tagnest.poptag("thead");
}
if(tagnest["tfoot"] == tagnest["table"]) {
tagnest.poptag("tfoot");
}
tagnest[tagname]++;
break;
}
if(!nopush) this.stack.push(tagname);
},
poptag:function(tagname:String = "", closetag:Boolean = true):String {
if(tagname != "" && tagnest.stack.indexOf(tagname) == -1) return "";
var noloop:Boolean = false;
if(tagname == "") noloop = true;
var ret:String = "";
while(this.stack.length > 0) {
ret = this.stack.pop();
switch(ret){
case "html":
case "body":
case "head":
case "p":
case "option":
tagnest[ret] = 0;
break;
case "li":
case "dl":
case "dd":
case "table":
case "thead":
case "tfoot":
case "tbody":
case "tr":
tagnest[ret]--;
break;
case "ul":
case "ol":
tagnest["uol"]--;
break;
case "dt":
tagnest.dtnest[tagnest["dl"]] = 0;
break;
case "td":
case "th":
tagnest["tdh"]--;
break;
}
if(ret == tagname || noloop) break;
buf += "</" + ret + ">";
}
if(closetag) buf += "</" + ret + ">";
return ret;
},
searchclose:function(start:int):int {
var pos:int = start;
var isquot:Boolean = false;
var isdquot:Boolean = false;
var isapos:Boolean = false;
var char:String;
while(pos < temp.length) {
char = temp.charAt(pos);
switch(char) {
case ">":
if(!isquot) return pos;
break;
case "'":
if(!isdquot) isquot = !isquot
isapos = !isapos;
break;
case "\"":
if(!isapos) isquot = !isquot
isdquot = !isdquot;
break;
}
pos++;
}
return -1;
},
searchopen:function():Boolean {
var pos:int = 1;
var isquot:Boolean = false;
var isdquot:Boolean = false;
var isapos:Boolean = false;
var char:String;
while(pos < tag.length) {
char = tag.charAt(pos);
switch(char) {
case "<":
if(!isquot) return true;
break;
case "'":
if(!isdquot) isquot = !isquot
isapos = !isapos;
break;
case "\"":
if(!isapos) isquot = !isquot
isdquot = !isdquot;
break;
}
pos++;
}
return false;
},
skip:function(str:String, term:String):Boolean {
if(temp.substr(pin, str.length) == str) {
var end:int = temp.indexOf(term, pin + 1);
if(end == -1) {
buf += temp.substring(pin);
buf += term;
idx = temp.length;
return true;
}
buf += temp.substring(pin, end + term.length);
idx = end + term.length;
return true;
}
return false;
},
terminate:function():void {
text = temp.substring(idx);
text = text.replace(/>/g, ">");
text = text.replace(/</g, "<");
buf += text;
while(tagnest.stack.length > 0) {
tagnest.poptag();
}
}
}
var pop:String = "";
var text:String;
while(true){
//タグの開始を検索
pin = temp.indexOf("<", idx);
//なかったら終了
if(pin == -1) {
tagnest.terminate();
break;
}
//あったらひとまずタグの前までの文字列を追加
buf += temp.substring(idx, pin).replace(/>/g, ">");
//XML宣言などを読み飛ばす
if(tagnest.skip("<?", "?>")) continue;
//コメントを読み飛ばす
if(tagnest.skip("<!--", "-->")) continue;
//CDATAセクションを読み飛ばす
if(tagnest.skip("<![CDATA[", "]]>")) continue;
//タグの取得
var closepos:int = tagnest.searchclose(pin);
if(closepos == -1){
tagnest.terminate();
break;
}
tag = temp.substring(pin, closepos + 1);
var taglength:int = tag.length;
if(tagnest.searchopen()) {
buf += "<";
idx += 1;
continue;
}
//タグ内から改行を取り除く
tag = tag.replace(/[\r\n]/ig, "");
//タグ名の取得
var end:int = tag.indexOf(" ");
if(end == -1) end = tag.length - 1;
tagname = tag.substring(1, end).toLowerCase();
var tagbuf:String = tag.substring(1, tag.length - 1);
tagbuf = tagbuf.replace(/</g, "<");
tagbuf = tagbuf.replace(/>/g, ">");
if(!tagname.match(/[a-z]+/)) {
buf += "<" + tagbuf + ">";
idx = pin + taglength;
continue;
}
tag = "<" + tagbuf + ">";
//htmlタグの自動補完
if(tagnest.stack.length == 0 && tagname != "html") {
if(tagname != "body" && tagname != "head") {
buf = "<html><body>" + buf;
tagnest.pushtag("html");
tagnest.pushtag("body");
tagnest["body"] = 1;
} else {
buf = "<html>" + buf;
tagnest.pushtag("html");
}
tagnest["html"] = 1;
}
//引用符のない属性値に引用符を付加(width=200 → width="200")
while (tag.match(/(<[^>]*? [a-z]+=)([^"'][^ >]*?)([ >])/i)) {
tag = tag.replace(/(<[^>]*? [a-z]+=)([^"'][^ >]*?)([ >])/i, "$1\"$2\"$3");
}
//短縮された属性値をXML形式に変換(checked → checked="checked")
while (tag.match(/(<[^>]*? )([a-z]+?)([ >])/i)) {
tag = tag.replace(/(<[^>]*? )([a-z]+?)([ >])/i, "$1$2=\"$2\"$3");
}
if(tagname.charAt(0) == '/') {
if(tagnest.stack.indexOf(tagname.substring(1)) == -1) {
//構造的に正しくない閉じタグは無視する
idx = pin + taglength;
continue;
}
tagnest.poptag(tagname.substring(1), false);
}else{
if(!tag.match(/\/>$/)) tagnest.pushtag(tagname);
}
//タグ名を強制的に小文字にする
buf += "<" + tagname + tag.substring(end);
idx = pin + taglength;
}
temp = buf;
/*
これだとネストに対応できない...
if(temp.match(/<\/p>/i) == null) {
temp = temp.replace(/(<p.*?>)/ig, "</p>$1");
temp = temp.replace(/(<p.*?>.*?)(<\/body>)/isg, "$1</p>$2");
temp = temp.replace(/(<body>.*?)<\/p>/isg, "$1");
}
if(temp.match(/<\/li>/i) == null) {
temp = temp.replace(/(<li.*?>)/ig, "</li>$1");
temp = temp.replace(/(<li.*?>.*?)(<\/ul>|<\/ol>)/isg, "$1</li>$2");
temp = temp.replace(/(<ul.*?>|<ol.*?>)(.*?)<\/li>/isg, "$1$2");
temp = temp.replace(/(<\/ul>|<\/ol>)([^<>]*?)<\/li>/isg, "$1$2");
}
if(temp.match(/<\/tr>/i) == null) {
temp = temp.replace(/(<tr.*?>)/ig, "</tr>$1");
temp = temp.replace(/(<tr.*?>.*?)(<\/thead>|<\/tfoot>|<\/tbody>|<\/table>)/isg, "$1</tr>$2");
temp = temp.replace(/(<thead.*?>|<tfoot.*?>|<tbody.*?>|<table.*?>)(.*?)<\/tr>/isg, "$1$2");
temp = temp.replace(/(<\/thead>|<\/tfoot>|<\/tbody>|<\/table>)([^<>]*?)<\/tr>/isg, "$1$2");
}
if(temp.match(/<\/td>/i) == null) {
temp = temp.replace(/(<td.*?>)/ig, "</td>$1");
temp = temp.replace(/(<td.*?>.*?)(<\/tr>)/isg, "$1</td>$2");
temp = temp.replace(/(<tr.*?>.*?)<\/td>/isg, "$1");
temp = temp.replace(/(<\/tr>[^<>]*?)<\/td>/isg, "$1");
}
if(temp.match(/<\/th>/i) == null) {
temp = temp.replace(/(<th.*?>)/ig, "</th>$1");
temp = temp.replace(/(<th.*?>.*?)(<\/tr>)/isg, "$1</th>$2");
temp = temp.replace(/(<tr.*?>.*?)<\/th>/isg, "$1");
temp = temp.replace(/(<\/tr>[^<>]*?)<\/th>/isg, "$1");
}
if(temp.match(/<\/colgroup>/i) == null) {
temp = temp.replace(/(<colgroup.*?>)/ig, "</colgroup>$1");
temp = temp.replace(/(<colgroup.*?>.*?)(<tr.*?>|<thead.*?>|<tfoot.*?>|<tbody.*?>|<\/table>)/isg, "$1</colgroup>$2");
temp = temp.replace(/(<table.*?>)(.*?)<\/colgroup>/isg, "$1$2");
}
if(temp.match(/<\/option>/i) == null) {
temp = temp.replace(/(<option.*?>)/ig, "</option>$1");
temp = temp.replace(/(<option.*?>.*?)(<\/select>|<\/optgroup>)/isg, "$1</option>$2");
temp = temp.replace(/(<select.*?>|<optgroup.*?>)(.*?)<\/option>/isg, "$1$2");
temp = temp.replace(/(<\/optgroup>)([^<>]*?)<\/option>/isg, "$1$2");
}
if(temp.match(/<\/(dt|dd)>/i) == null) {
temp = temp.replace(/(<dt.*?>|<dd.*?>)/ig, "</dt></dd>$1");
temp = temp.replace(/(<dt.*?>|<dd.*?>)(.*?)(<\/dl>)/isg, "$1$2</dt></dd>$3");
temp = temp.replace(/(<dl.*?>)(.*?)<\/dt><\/dd>/isg, "$1$2");
temp = temp.replace(/(<dt.*?>)(.*?)(<\/dt>)<\/dd>/isg, "$1$2$3");
temp = temp.replace(/(<dd.*?>)(.*?)<\/dt>(<\/dd>)/isg, "$1$2$3");
temp = temp.replace(/(<\/dl>)([^<>]*?)<\/dt><\/dd>/isg, "$1$2");
}
*/
return temp;
}
}
}