PDA

View Full Version : مبتدی: پردازش متن(خلاصه سازی)



asefy2008
پنج شنبه 10 مرداد 1392, 11:23 صبح
سلام دوستان
یکی می تونه به من بگه مراحل ساخت یک خلاصه ساز متن چی هست و چطور می تونم اون رو بنویسم؟
متنم به زبان انگلیسی هست.
اگر توابع کتاب خونه ای و یا نمونه برنامه ای برای این مورد دوستان دارن، لطف کنن و قرار بدن بسیار ممنون میشم
اگر مطلب یا مقاله ای هم در اختیارم قرار بدن تا بتونم مراحل کار رو در کنم و یا حداقل یه دید کوچیک نسبت به این پروژه پیدا کنم ممنون میشم
دوستان از کمکهاشون هرچند کم دریغ نکنند

بسیار ممنون

مصطفی ساتکی
جمعه 18 مرداد 1392, 13:52 عصر
سلام.
شما بایستی (NLP( natural language processing رو شروع کنید در ضمن از Open Text Summarizer (http://sourceforge.net/projects/libots/) که open source هم هست می تونید استفاده کنید.

asefy2008
دوشنبه 04 آذر 1392, 18:45 عصر
سلام.
شما بایستی (NLP( natural language processing رو شروع کنید در ضمن از Open Text Summarizer (http://sourceforge.net/projects/libots/) که open source هم هست می تونید استفاده کنید.

ممنون از راهنماییتون
این فایل های xml که برای زبانهای مختلف ساخته شده و در برنامه قرار داره. چی هست؟برای مشخص کردن گرامر زبان هست؟
<?xml version="1.0"?>
<dictionary lang="English" maintainer="nadav256@hotmail.com">
<stemmer>


<step1_pre>
<rule>"|</rule>
<rule>(|</rule>
<rule>'|</rule>
</step1_pre>


<step1_post>
<rule>."|</rule>
<rule>,"|</rule>
<rule>,'|</rule>
<rule>.|</rule>
<rule>..|</rule>
<rule>...|</rule>
<rule>,|</rule>
<rule>"|</rule>
<rule>")|</rule>
<rule>)|</rule>
<rule>?|</rule>
<rule>:|</rule>
<rule>;|</rule>
<rule>!|</rule>
<rule>-|</rule>
<rule>--|</rule>
<rule>'s|</rule>
<rule>'d|</rule>
<rule>n't|</rule>
<rule>'t|</rule>
<rule>'ve|</rule>
<rule>'re|</rule>
<rule>'m|</rule>
</step1_post>


<manual>
<rule>wrote|write</rule>
<rule>came|come</rule>
<rule>went|go</rule>
<rule>choosing|choice</rule>
<rule>was|be</rule>
<rule>were|be</rule>
<rule>ate|eat</rule>
<rule>eaten|eat</rule>
<rule>beaten|beat</rule>
<rule>became|become</rule>
<rule>began|begin</rule>
<rule>beheld|behold</rule>
<rule>bent|bend</rule>
<rule>bound|bind</rule>
<rule>bleed|bled</rule>
<rule>blown|blow</rule>
<rule>blew|blow</rule>
<rule>broken|break</rule>
<rule>broke|break</rule>
<rule>brought|bring</rule>
<rule>built|build</rule>
<rule>bought|buy</rule>
<rule>caught|catch</rule>
<rule>dealt|deal</rule>
<rule>dug|dig</rule>
<rule>dove|dive</rule>
<rule>done|do</rule>
<rule>did|do</rule>
<rule>died|dead</rule>
<rule>drawn|draw</rule>
<rule>dreamt|dream</rule>
<rule>dreamed|dream</rule>
<rule>drunk|drink</rule>
<rule>drank|drink</rule>
<rule>dwelt|dwell</rule>
<rule>fell|fall</rule>
<rule>fallen|fall</rule>
<rule>fed|feed</rule>
<rule>felt|feel</rule>
<rule>flown|fly</rule>
<rule>flew|fly</rule>
<rule>forbidden|forbid</rule>
<rule>forgot|forget</rule>
<rule>forgotten|forget</rule>
<rule>forsaken|forsake</rule>
<rule>froze|freez</rule>
<rule>get|got</rule>
<rule>gave|give</rule>
<rule>gone|go</rule>
<rule>grew|grow</rule>
<rule>grown|grow</rule>
<rule>hidden|hide</rule>
<rule>hung|hang</rule>
<rule>held|hold</rule>
<rule>kept|keep</rule>
<rule>knew|know</rule>
<rule>known|know</rule>
<rule>laid|lay</rule>
<rule>lead|led</rule>
<rule>leave|left</rule>
<rule>lie|lay</rule>
<rule>lit|light</rule>
<rule>lose|lost</rule>
<rule>meet|met</rule>
<rule>made|make</rule>
<rule>misled|mislead</rule>
<rule>mistook|mistake</rule>
<rule>mistaken|mistake</rule>
<rule>overdid|overdo</rule>
<rule>overdone|overdo</rule>
<rule>paid|pay</rule>
<rule>rode|ride</rule>
<rule>rang|ring</rule>
<rule>rung|ring</rule>
<rule>rose|rise</rule>
<rule>ran|run</rule>
<rule>said|say</rule>
<rule>shot|shoot</rule>
<rule>sang|sing</rule>
<rule>sung|sing</rule>
<rule>sleep|slept</rule>
<rule>speak|spoke</rule>
<rule>spend|spent</rule>
<rule>stood|stand</rule>
<rule>stuck|stick</rule>
<rule>satrove|strive</rule>
<rule>strung|string</rule>
<rule>swept|sweep</rule>
<rule>swam|swim</rule>
<rule>took|take</rule>
<rule>taken|take</rule>
<rule>teach|taught</rule>
<rule>torn|tear</rule>
<rule>told|tell</rule>
<rule>thought|think</rule>
<rule>threw|throw</rule>
<rule>woke|wake</rule>
<rule>wept|weep</rule>
<rule>won|win</rule>
<rule>withdrawn|withdraw</rule>
<rule>withdrew|withdraw</rule>
<rule>wrote|write</rule>
<rule>written|write</rule>
</manual>


<pre>
<rule>1before1|2after2</rule>
</pre>

<post>
<rule>tions|t</rule>
<rule>sions|s</rule>
<rule>icians|</rule>
<rule>ician|</rule>
<rule>ics|</rule>
<rule>ical|</rule>
<rule>sses|ss</rule>
<rule>ss|ss</rule>
<rule>---een|</rule>
<rule>ily|y</rule>
<rule>sure|s</rule>
<rule>ans|</rule>
<rule>ian|</rule>
<rule>ials|</rule>
<rule>ial|</rule>
<rule>able|</rule>
<rule>ibility|</rule>
<rule>ity|</rule>
<rule>ble|</rule>
<rule>ist|</rule>
<rule>ence|</rule>
<rule>ement|</rule>
<rule>ment|</rule>
<rule>ize|y</rule>
<rule>ies|y</rule>
<rule>eed|</rule>
<rule>iful|</rule>
<rule>nning|n</rule>
<rule>nnable|n</rule>
<rule>nner|n</rule>
<rule>nned|n</rule>
<rule>nnen|n</rule>
<rule>gger|g</rule>
<rule>gged|g</rule>
<rule>ggen|g</rule>
<rule>gging|g</rule>
<rule>ggable|g</rule>
<rule>pper|p</rule>
<rule>pped|p</rule>
<rule>ppen|p</rule>
<rule>pping|p</rule>
<rule>ppable|p</rule>
<rule>tting|t</rule>
<rule>ting|t</rule>
<rule>tten|t</rule>
<rule>ttable|t</rule>
<rule>tter|t</rule>
<rule>tted|t</rule>
<rule>ller|ll</rule>
<rule>lled|ll</rule>
<rule>llen|ll</rule>
<rule>lling|ll</rule>
<rule>llable|ll</rule>
<rule>sser|ss</rule>
<rule>ssed|ss</rule>
<rule>ssen|ss</rule>
<rule>ssing|ss</rule>
<rule>ssable|s</rule>
<rule>dding|dd</rule>
<rule>eing|e</rule>
<rule>ing|</rule>
<rule>mies|my</rule>
<rule>ly|</rule>
<rule>en|</rule>
<rule>bl|</rule>
<rule>ic|y</rule>
<rule>izer|y</rule>
<rule>eli|</rule>
<rule>ousli|ous</rule>
<rule>ization|y</rule>
<rule>ation|</rule>
<rule>ator|</rule>
<rule>ers|</rule>
<rule>ies|</rule>
<rule>es|</rule>
<rule>ied|</rule>
<rule>ed|</rule>
<rule>cy|t</rule>
<rule>es|</rule>
<rule>is|is</rule>
<rule>s|</rule>
<rule>ee|ee</rule>
<rule>e|</rule>
</post>


<synonyms>
<rule>colour|color</rule>
<rule>honour|honor</rule>
<rule>murder|kill</rule>
<rule>assist|help</rule>
<rule>simple|basic</rule>
<rule>winsome|charming</rule>
<rule>incisive|perceptive</rule>
<rule>bay|bark</rule>
<rule>verbose|wordy</rule>
<rule>angry|mad</rule>
<rule>unhappy|sad</rule>
<rule>depressed|sad</rule>
<rule>dismal|sad</rule>
<rule>mournful|sad</rule>
<rule>dreadful|sad</rule>
<rule>dreary|sad</rule>
<rule>discouraged|sad</rule>
<rule>fled|run</rule>
<rule>fearful|afraid</rule>
<rule>terrified|afraid</rule>
<rule>hysterical|afraid</rule>
<rule>worried|afraid</rule>
<rule>scared|afraid</rule>
<rule>petrified|afraid</rule>
<rule>worse|bad</rule>
<rule>terrible|bad</rule>
<rule>horrible|bad</rule>
<rule>wicked|evil</rule>
<rule>huge|big</rule>
<rule>massive|bug</rule>
<rule>giant|big</rule>
<rule>gigantic|big</rule>
<rule>monstrous|big</rule>
<rule>tremendous|big</rule>
<rule>bulky|big</rule>
<rule>anxious|eager</rule>
<rule>intent|eager</rule>
<rule>ardent|eager</rule>
<rule>avid|eager</rule>
<rule>brave|bold</rule>
<rule>excellent|good</rule>
<rule>worthy|good</rule>
<rule>proper|good</rule>
<rule>favored|good</rule>
<rule>fine|good</rule>
<rule>brisk|happy</rule>
<rule>glad|happy</rule>
<rule>cheerful|happy</rule>
<rule>jolly|happy</rule>
<rule>pleased|happy</rule>
<rule>satisfied|happy</rule>
<rule>vivacious|happy</rule>
<rule>cheery|happy</rule>
<rule>merry|happy</rule>
<rule>injured|hurt</rule>
<rule>offended|hurt</rule>
<rule>distressed|hurt</rule>
<rule>suffering|hurt</rule>
<rule>afflicted|hurt</rule>
<rule>little|small</rule>
<rule>tiny|small</rule>
<rule>microscopic|small</rule>
<rule>miniscule|small</rule>
<rule>slender|small</rule>
<rule>insignificant|small</rule>
<rule>gaze|look</rule>
<rule>stare|look</rule>
<rule>view|look</rule>
<rule>inspect|look</rule>
<rule>glance|look</rule>
<rule>announce|say</rule>
</synonyms>


</stemmer>

<parser>

<linebreak>
<rule>."</rule>
<rule>?"</rule>
<rule>!"</rule>
<rule>.</rule>
<rule>?</rule>
<rule>;</rule>
<rule>|</rule>
<rule>!</rule>
</linebreak>

<linedontbreak>
<rule>Dr.</rule>
<rule>Mr.</rule>
<rule>Mrs.</rule>
<rule>U.S.</rule>
<rule>Rep.</rule>
<rule>Sen.</rule>
<rule>St.</rule>
<rule>Jan.</rule>
<rule>Feb.</rule>
<rule>Mar.</rule>
<rule>Apr.</rule>
<rule>May.</rule>
<rule>Jun.</rule>
<rule>Jul.</rule>
<rule>Aug.</rule>
<rule>Sep.</rule>
<rule>Oct.</rule>
<rule>Nov.</rule>
<rule>Dec.</rule>
<rule>Lt.</rule>
<rule>Gov.</rule>
<rule>a.m.</rule>
<rule>p.m.</rule>
</linedontbreak>
</parser>

<grader-syn>
<depreciate>
<rule>for example</rule>
<rule>such as</rule>
</depreciate>
</grader-syn>


<grader-tf>
<word idf="0.002">dog</word>
<word idf="0.0004">house</word>
</grader-tf>


<grader-tc>
<word>--</word>
<word>-</word>
<word>a</word>
<word>about</word>
<word>again</word>
<word>all</word>
<word>along</word>
<word>almost</word>
<word>also</word>
<word>always</word>
<word>am</word>
<word>among</word>
<word>an</word>
<word>and</word>
<word>another</word>
<word>any</word>
<word>anybody</word>
<word>anything</word>
<word>anywhere</word>
<word>apart</word>
<word>are</word>
<word>around</word>
<word>as</word>
<word>at</word>
<word>be</word>
<word>because</word>
<word>been</word>
<word>before</word>
<word>being</word>
<word>between</word>
<word>both</word>
<word>but</word>
<word>by</word>
<word>can</word>
<word>cannot</word>
<word>comes</word>
<word>could</word>
<word>couldn</word>
<word>did</word>
<word>didn</word>
<word>different</word>
<word>do</word>
<word>does</word>
<word>doesn</word>
<word>done</word>
<word>don</word>
<word>down</word>
<word>during</word>
<word>each</word>
<word>either</word>
<word>enough</word>
<word>etc</word>
<word>even</word>
<word>every</word>
<word>everybody</word>
<word>everything</word>
<word>everywhere</word>
<word>except</word>
<word>few</word>
<word>final</word>
<word>first</word>
<word>for</word>
<word>from</word>
<word>get</word>
<word>go</word>
<word>goes</word>
<word>gone</word>
<word>good</word>
<word>got</word>
<word>had</word>
<word>has</word>
<word>have</word>
<word>having</word>
<word>he</word>
<word>hence</word>
<word>her</word>
<word>him</word>
<word>his</word>
<word>how</word>
<word>however</word>
<word>I</word>
<word>i.e</word>
<word>if</word>
<word>in</word>
<word>initial</word>
<word>into</word>
<word>is</word>
<word>isn</word>
<word>it</word>
<word>its</word>
<word>it</word>
<word>itself</word>
<word>just</word>
<word>last</word>
<word>least</word>
<word>less</word>
<word>let</word>
<word>lets</word>
<word>let's</word>
<word>like</word>
<word>lot</word>
<word>made</word>
<word>make</word>
<word>many</word>
<word>may</word>
<word>maybe</word>
<word>me</word>
<word>might</word>
<word>mine</word>
<word>more</word>
<word>most</word>
<word>Mr</word>
<word>much</word>
<word>must</word>
<word>my</word>
<word>near</word>
<word>need</word>
<word>next</word>
<word>niether</word>
<word>no</word>
<word>nobody</word>
<word>nor</word>
<word>not</word>
<word>nothing</word>
<word>now</word>
<word>nowhere</word>
<word>of</word>
<word>off</word>
<word>often</word>
<word>oh</word>
<word>ok</word>
<word>okay</word>
<word>on</word>
<word>once</word>
<word>one</word>
<word>only</word>
<word>onto</word>
<word>or</word>
<word>other</word>
<word>our</word>
<word>ours</word>
<word>out</word>
<word>over</word>
<word>own</word>
<word>perhaps</word>
<word>previous</word>
<word>quite</word>
<word>rather</word>
<word>re</word>
<word>really</word>
<word>s</word>
<word>said</word>
<word>same</word>
<word>say</word>
<word>see</word>
<word>seems</word>
<word>several</word>
<word>shall</word>
<word>she</word>
<word>should</word>
<word>shouldn't</word>
<word>since</word>
<word>so</word>
<word>some</word>
<word>somebody</word>
<word>something</word>
<word>somewhere</word>
<word>still</word>
<word>stuff</word>
<word>such</word>
<word>than</word>
<word>t</word>
<word>that</word>
<word>the</word>
<word>their</word>
<word>theirs</word>
<word>them</word>
<word>then</word>
<word>there</word>
<word>these</word>
<word>they</word>
<word>thing</word>
<word>things</word>
<word>this</word>
<word>those</word>
<word>through</word>
<word>thus</word>
<word>to</word>
<word>too</word>
<word>top</word>
<word>two</word>
<word>under</word>
<word>unless</word>
<word>until</word>
<word>up</word>
<word>upon</word>
<word>us</word>
<word>use</word>
<word>v</word>
<word>ve</word>
<word>very</word>
<word>want</word>
<word>was</word>
<word>we</word>
<word>well</word>
<word>went</word>
<word>were</word>
<word>what</word>
<word>when</word>
<word>where</word>
<word>which</word>
<word>while</word>
<word>who</word>
<word>whom</word>
<word>why</word>
<word>will</word>
<word>with</word>
<word>without</word>
<word>won</word>
<word>would</word>
<word>x</word>
<word>yes</word>
<word>yet</word>
<word>you</word>
<word>you</word>
<word>your</word>
<word>yours</word>
</grader-tc>
</dictionary>

asefy2008
دوشنبه 04 آذر 1392, 23:42 عصر
دوستان این قطعه کد با فایل xml بالا داره چه کار می کنه
public static Dictionary LoadFromFile(string DictionaryLanguage)
{
string dictionaryFile = string.Format(@"{1}\dics\{0}.xml", DictionaryLanguage,
Path.GetDirectoryName(Assembly.GetExecutingAssembl y().GetName().CodeBase).Substring(6));
if(!File.Exists(dictionaryFile))
{
throw new FileNotFoundException("Could Not Load Dictionary: " + dictionaryFile);
}
Dictionary dict = new Dictionary();
XElement doc = XElement.Load(dictionaryFile);
dict.Step1PrefixRules = LoadKeyValueRule(doc, "stemmer", "step1_pre");
dict.Step1SuffixRules = LoadKeyValueRule(doc, "stemmer", "step1_post");
dict.ManualReplacementRules = LoadKeyValueRule(doc, "stemmer", "manual");
dict.PrefixRules = LoadKeyValueRule(doc, "stemmer", "pre");
dict.SuffixRules = LoadKeyValueRule(doc, "stemmer", "post");
dict.SynonymRules = LoadKeyValueRule(doc, "stemmer", "synonyms");
dict.LinebreakRules = LoadValueOnlyRule(doc, "parser", "linebreak");
dict.NotALinebreakRules = LoadValueOnlyRule(doc, "parser", "linedontbreak");
dict.DepreciateValueRule = LoadValueOnlyRule(doc, "grader-syn", "depreciate");
dict.TermFreqMultiplierRule = LoadValueOnlySection(doc, "grader-tf");

List<string> unimpwords = new List<string>();
dict.UnimportantWords = new List<Word>();
unimpwords = LoadValueOnlySection(doc, "grader-tc");
foreach (string unimpword in unimpwords)
{
dict.UnimportantWords.Add(new Word(unimpword));
}
return dict;
}

private static List<string> LoadValueOnlySection(XElement doc, string section)
{
List<string> list = new List<string>();
IEnumerable<XElement> step1pre = doc.Elements(section);
foreach (var x in step1pre.Elements())
{
list.Add(x.Value);
}
return list;
}
private static Dictionary<string, string> LoadKeyValueRule(XElement doc, string section, string container)
{
Dictionary<string, string> dictionary = new Dictionary<string, string>();
IEnumerable<XElement> step1pre = doc.Elements(section).Elements(container);
foreach (var x in step1pre.Elements())
{
string rule = x.Value;
string[] keyvalue = rule.Split('|');
if (!dictionary.ContainsKey(keyvalue[0]))
dictionary.Add(keyvalue[0], keyvalue[1]);
}
return dictionary;
}

اصلا برای چی داره این فایل xml رو می خونه و هی قسمت های مختلفش رو از هم جدا می کنه.(متوجه شدم داره چی کار می کنه اما علتش رو نمی فهمم)