abasfar
یک شنبه 07 فروردین 1390, 13:16 عصر
سلام من با روش زیر به تگ های Meta دسترسی دارم
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
namespace Tim.Examples.Classes
{
public class WebMetaData
{
public string metaTitle;
public string metaDescription;
public string metaKeywords;
public bool GetMetaTags(string url)
{
try{
//get the HTML of the given page and put into a string
string html = AcquireHTML(url);
if (GetMeta(html))
{
return true;
}
else
{
return false;
}
}
catch(Exception ex)
{
// do something with the error
return false;
}
}
private string AcquireHTML(string address)
{
HttpWebRequest request;
HttpWebResponse response = null;
StreamReader reader;
StringBuilder sbSource;
try
{
// Create and initialize the web request
request = System.Net.WebRequest.Create(address) as HttpWebRequest;
request.UserAgent = "your-search-bot";
request.KeepAlive = false;
request.Timeout = 10 * 1000;
// Get response
response = request.GetResponse() as HttpWebResponse;
if (request.HaveResponse == true && response != null)
{
// Get the response stream
reader = new StreamReader(response.GetResponseStream());
// Read it into a StringBuilder
sbSource = new StringBuilder(reader.ReadToEnd());
response.Close();
// Console application output
return sbSource.ToString();
}
else
return "";
}
catch (Exception ex)
{
response.Close();
return "";
}
}
private bool GetMeta(string strIn)
{
try
{
// --- Parse the title
Match TitleMatch = Regex.Match(strIn, "<title>([^<]*)</title>, RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaTitle = TitleMatch.Groups[1].Value;
// --- Parse the meta keywords
Match KeywordMatch = Regex.Match(strIn, "<meta name=\"keywords\" content=\"([^<]*)\">", RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaKeywords = KeywordMatch.Groups[1].Value;
// --- Parse the meta description
Match DescriptionMatch = Regex.Match(strIn, "<meta name=\"description\" content=\"([^<]*)\">", RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaDescription = DescriptionMatch.Groups[1].Value;
return true;
}
catch (Exception ex)
{
// do something with the error
return false;
}
}
}
}
کسی روش بهتری داره مثلا یک رشته داریم که با فرمت html است میخوام Meta های ان را استخراج کنیم
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
namespace Tim.Examples.Classes
{
public class WebMetaData
{
public string metaTitle;
public string metaDescription;
public string metaKeywords;
public bool GetMetaTags(string url)
{
try{
//get the HTML of the given page and put into a string
string html = AcquireHTML(url);
if (GetMeta(html))
{
return true;
}
else
{
return false;
}
}
catch(Exception ex)
{
// do something with the error
return false;
}
}
private string AcquireHTML(string address)
{
HttpWebRequest request;
HttpWebResponse response = null;
StreamReader reader;
StringBuilder sbSource;
try
{
// Create and initialize the web request
request = System.Net.WebRequest.Create(address) as HttpWebRequest;
request.UserAgent = "your-search-bot";
request.KeepAlive = false;
request.Timeout = 10 * 1000;
// Get response
response = request.GetResponse() as HttpWebResponse;
if (request.HaveResponse == true && response != null)
{
// Get the response stream
reader = new StreamReader(response.GetResponseStream());
// Read it into a StringBuilder
sbSource = new StringBuilder(reader.ReadToEnd());
response.Close();
// Console application output
return sbSource.ToString();
}
else
return "";
}
catch (Exception ex)
{
response.Close();
return "";
}
}
private bool GetMeta(string strIn)
{
try
{
// --- Parse the title
Match TitleMatch = Regex.Match(strIn, "<title>([^<]*)</title>, RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaTitle = TitleMatch.Groups[1].Value;
// --- Parse the meta keywords
Match KeywordMatch = Regex.Match(strIn, "<meta name=\"keywords\" content=\"([^<]*)\">", RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaKeywords = KeywordMatch.Groups[1].Value;
// --- Parse the meta description
Match DescriptionMatch = Regex.Match(strIn, "<meta name=\"description\" content=\"([^<]*)\">", RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaDescription = DescriptionMatch.Groups[1].Value;
return true;
}
catch (Exception ex)
{
// do something with the error
return false;
}
}
}
}
کسی روش بهتری داره مثلا یک رشته داریم که با فرمت html است میخوام Meta های ان را استخراج کنیم