PDA

View Full Version : سوال: تبديل حروف چسبان فارسي به حروف بزرگ و منفصل



massoudghb
یک شنبه 07 اسفند 1390, 10:12 صبح
با سلام . من از راه يه XML دارم يه سري ديتا رو ميگيرم . مشكلش اينه كه مثلا خروجي يك ركوردم ميشه :"ﻧ" كه اين كاراكتر با "ن" يكي نيست و if جواب false ميده در مورد مقايسه شون .. چطور ميشه اين مشكل رو حل كرد ؟

AMIBCT
یک شنبه 07 اسفند 1390, 10:31 صبح
این برنامه همین کار رو انجام می‌دهد
البته زبان آن JavaScript است و خودتان باید زحمت تبدیلش را بکشید:

فارسی‌نویس یونیکد
http://amib.ir/weblog/?page_id=295

linux
یک شنبه 07 اسفند 1390, 12:30 عصر
اگر جستجو کنید قبلا جواب داده شده فکر کنم 2 بارش را خودم جواب دادم از کتابخانه nbidi استفاده کنید در اینترنت بگردید پیدا می کنید

massoudghb
سه شنبه 09 اسفند 1390, 09:37 صبح
با سلام . بازنويسي شد به صورت كلاس زير ، البته هنوز يه سري مورد داره ولي در مورد يك كاراكتر خوب جواب ميده :


class UnicodeHandler {

public string ProcessInput(String input) //the processing function
{
//default Values:
String brackets = "(){}[]";
int e_numbers = 1; //enable/disable arabic numbers: [0, 1]
int f_numbers = 1; //enable/disable persian numbers: [0, 1]
int e_harakat = 1; //enable/disable arabic harakat: [0, 1]
//initialize global vars:
String y = String.Empty;
int g;
String old = String.Empty;
String tstr = String.Empty;
//position of laa characters in the unicode string
int laIndex = 164;
//defining letters that can connect from the left
String left = "ـئظشسيیبلپتنمكکگطضصثقفغعه خچحج";
//defining letters that can connect from the right
String right = "ـئؤرلالآىیآةوزژظشسيپبللأا أتنمكکگطضصثقفغعهخحچجدذلإإ";
String arnumbs = "٠١٢٣٤٥٦٧٨٩";
String fanumbs = "۰۱۲۳۴۵۶۷۸۹";
String ennumbs = "0123456789";
//defining the harakat
String harakat = "ًٌٍَُِّْ";
//defining other symbols
String symbols = "ـ.،؟ @#$%^&*-+|=~,:";
//defining persian unicode chars
String unicode =
"ﺁﺁﺂﺂ" + "ﺃﺃﺄﺄ" + "ﺇﺇﺈﺈ" + "ﺍﺍﺎﺎ" + "ﺏﺑﺒﺐ" + "ﺕﺗﺘﺖ" + "ﺙﺛﺜﺚ" + "ﺝﺟﺠﺞ" + "ﺡﺣﺤﺢ" + "ﺥﺧﺨﺦ" +
"ﺩﺩﺪﺪ" + "ﺫﺫﺬﺬ" + "ﺭﺭﺮﺮ" + "ﺯﺯﺰﺰ" + "ﺱﺳﺴﺲ" + "ﺵﺷﺸﺶ" + "ﺹﺻﺼﺺ" + "ﺽﺿﻀﺾ" + "ﻁﻃﻄﻂ" + "ﻅﻇﻈﻆ" +
"ﻉﻋﻌﻊ" + "ﻍﻏﻐﻎ" + "ﻑﻓﻔﻒ" + "ﻕﻗﻘﻖ" + "ﻙﻛﻜﻚ" + "ﻝﻟﻠﻞ" + "ﻡﻣﻤﻢ" + "ﻥﻧﻨﻦ" + "ﻩﻫﻬﻪ" + "ﻭﻭﻮﻮ" +
"ﻱﻳﻴﻲ" + "ﺓﺓﺔﺔ" + "ﺅﺅﺆﺆ" + "ﺉﺋﺌﺊ" + "ﻯﻯﻰﻰ" + "گﮔﮕﮓ" + "چﭼﭽﭻ" + "پﭘﭙﭗ" + "ژﮊﮋﮋ" + "ﯼﯾﯿﯽ" +
"کﮐﮑﮏ" + "ﻵﻵﻶﻶ" + "ﻷﻷﻸﻸ" + "ﻹﻹﻺﻺ" + "ﻻﻻﻼﻼ";
String arabic =
"آ" + "أ" + "إ" + "ا" + "ب" + "ت" + "ث" + "ج" + "ح" + "خ" +
"د" + "ذ" + "ر" + "ز" + "س" + "ش" + "ص" + "ض" + "ط" + "ظ" +
"ع" + "غ" + "ف" + "ق" + "ك" + "ل" + "م" + "ن" + "ه" + "و" +
"ي" + "ة" + "ؤ" + "ئ" + "ى" + "گ" + "چ" + "پ" + "ژ" + "ی" +
"ک";
//defining all persian letters + harakat + arabic symbols
String notEng = arabic + harakat + "ء،؟";
String Space = " ";
old = String.Empty;
tstr = String.Empty;
y = input;
String x = y.Trim();
int len = x.Length;
int pos = 0;
int asd, ar_pos, uni_pos, la_pos, temp, h, mynumb;
//process each letter, submit it to tests and then add it to the output string
for (g = 0; g < len; g++) {
//ignoring the harakat
int b = 0,
a = 0;
while (harakat.IndexOf(x[g - b]) >= 0)
b = b + 1;
while (harakat.IndexOf(x[g + a]) >= 0)
a = a + 1;
//determine the position of each letter
if (g.Equals(0)) {
pos = (right.IndexOf(x[a]) >= 0) ? 1 : 0;
} else if (g.Equals((len - 1))) {
pos = (left.IndexOf(x[len - b - 1]) >= 0) ? 3 : 0;
} else {
if (left.IndexOf(x[(g - b)]) < 0) {
pos = (right.IndexOf(x[(g + a)]) < 0) ? 0 : 1;
} else if (left.IndexOf(x[(g - b)]) >= 0) {
pos = (right.IndexOf(x[(g + a)]) >= 0) ? 2 : 3;
}
}
//if this char is a bracket, reverse it
if (brackets.IndexOf(x[g]) >= 0) {
asd = brackets.IndexOf(x[g]);
addChar(brackets.Substring(asd + 1, 1));
}
//if the char is an Arabic letter.. convert it to Unicode
else if (arabic.IndexOf(x[g]) >= 0) {
//if this letter is (laam)
if (x.Substring(g, 1).Equals("ل")) {
//check if its actually a (laa) combination
ar_pos = arabic.IndexOf(x[g + 1]);
if ((ar_pos >= 0) && (ar_pos < 4)) {
addChar(unicode.Substring((ar_pos * 4) + pos + laIndex, 1));
g = g + 1;
} //if its just (laam)
else
addChar(unicode.Substring((arabic.IndexOf(x[g]) * 4) + pos, 1));
} else //if its any arabic letter other than (laam)
addChar(unicode.Substring((arabic.IndexOf(x[g]) * 4) + pos, 1));
} else if (symbols.IndexOf(x[g]) >= 0) //if the char is a symbol, add it
addChar(x.Substring(g, 1));
else
//if the char is a haraka, and harakat are enabled, add it
if (harakat.IndexOf(x[g]) >= 0) {
if (e_harakat.Equals(1))
addChar(x.Substring(g, 1));
} else
//if the char is an arabic reversed letter, reverse it back!
if (unicode.IndexOf(x[g]) >= 0) {
uni_pos = unicode.IndexOf(x[g]);
la_pos = unicode.IndexOf(x[g]);
//if its a laa combination
if (la_pos >= laIndex)
//find which laa
for (temp = 4; temp < 20; temp += 4) {
if (la_pos < (temp + laIndex)) {
addChar(arabic.Substring((temp / 4) - 1, 1));
addChar("ل");
temp = 30;
}
} else //if its any other letter
for (temp = 4; temp < 180; temp += 4) {
if (uni_pos < temp) {
addChar(arabic.Substring((temp / 4) - 1, 1));
temp = 200;
}
}
}
//if the char is none of the above, then treat it as english text (don't reverse) (english chars + numbers + symbols (as is))
else {
h = g;
while ((notEng.IndexOf(x[h]) < 0) && (unicode.IndexOf(x[h]) < 0) && (brackets.IndexOf(x[h]) < 0))//&& (x[h] != undefined)) //if this is an english sentence, or numbers, put it all in one string
{
if (ennumbs.IndexOf(x[h]) >= 0) {
mynumb = ennumbs.IndexOf(x[h]);

if (e_numbers.Equals(0)) {
x.Insert(h, arnumbs.Substring(mynumb, 1));
} else if (f_numbers.Equals(0)) { // AMIB
x.Insert(h, fanumbs.Substring(mynumb, 1));
}
} else if (arnumbs.IndexOf(x[h]) >= 0) {
mynumb = arnumbs.IndexOf(x[h]);
if (e_numbers == 0) {
x.Insert(h, ennumbs.Substring(mynumb, 1));
}
} else if (fanumbs.IndexOf(x[h]) >= 0) { // AMIB
mynumb = arnumbs.IndexOf(x[h]);
if (f_numbers.Equals(0)) {
x.Insert(h, ennumbs.Substring(mynumb, 1));
}
}
tstr = tstr + x[h];
if ((h + 1) < x.Length) {
h = h + 1;
}
}
String[] xstr = tstr.Split(' ');
int r = xstr.Length - 1;
if ((r == 1) && (xstr[0].Equals(Space))) //make sure spaces between arabic and english text display properly
tstr = Space + xstr[0];
else {
while (xstr[r].Equals(Space)) {
tstr = Space + tstr.Substring(0, (tstr.Length - 1));
r = r - 1;
}
}
frm_out = tstr + frm_out; //put together the arabic text + the new english text
tstr = String.Empty;
g = h - 1; //set the loop pointer to the first char after the english text.
}
}
frm_out = old + frm_out; //put together the old text and the last sentence
return frm_out;
}
private void addChar(String chr) //add arabic chars (change to Unicode)
{
frm_out = chr + frm_out;
}
//remove char from keyboard
public string frm_out = String.Empty;