A quick program to find keywords out of a document - great for a knowledge base or .txt file repository
I wrote this a while back and it became relevant again today. All this does is take in a text file, go through it making a Dictionary of all the words and counting them (ignoring the words in the exclusion list) and then sorts and outputs them. The logic is pretty straightforward and looks like this:
[code:c#]
public partial class keywords : System.Web.UI.Page
{
Dictionary<string, int> _keywords = new Dictionary<string, int>();
char[] delimiter = {' '}; // whatever you want to delimit the text with, I just use spaces
string[] DEFAULT_EXCLUDE = { "a", "the", "is", "and" };//things you want excluded even if the user does not specify (you can leave it blank too)
protected void Page_Load(object sender, EventArgs e)
{
lblExcluded.Text = string.Empty;
for(int i=0;i<DEFAULT_EXCLUDE.Length;i++)
lblExcluded.Text += i == DEFAULT_EXCLUDE.Length - 1 ? DEFAULT_EXCLUDE[i] + " " : DEFAULT_EXCLUDE[i] + ", ";
}
protected void submit_Click(object sender, EventArgs e)
{
string[] words_to_exclude = txtExclude.Text.Length<1 ? new string[] {" "} : txtExclude.Text.Split(new char[] {','});
for (int i = 0; i < words_to_exclude.Length; i++) words_to_exclude[i] = words_to_exclude[i].Trim(); //cut off all empty space
List<string> excluded = new List<string>();
foreach (string s in DEFAULT_EXCLUDE)
if (!excluded.Contains(s)) excluded.Add(s);
foreach (string s in words_to_exclude)
if (!excluded.Contains(s)) excluded.Add(s);
int keyword_threshhold = Int32.Parse(txtThreshhold.Text);
if (fu.HasFile)
{
try
{
//Save the file here
StreamReader sr = new StreamReader(fu.FileContent);//this pulls from the user, or you can pull it from where you saved it to now
while (!sr.EndOfStream)
{
string line = sr.ReadLine();
string[] splitString = line.Split(delimiter);
foreach (string s in splitString)
{
if (!excluded.Contains(s))
{
try { _keywords[s] = _keywords[s] + 1; }
catch { _keywords.Add(s, 1); }
}
}
}
bool switcher = false;
phKeywords.Controls.Add(new LiteralControl("<div id=\"keys\" class=\"main_area\"><h2>Keywords</h2>"));
var query = from p in _keywords orderby p.Value descending select p;
foreach (KeyValuePair<string, int> d in query)
{
if ((int)d.Value >= keyword_threshhold)
{
string color = switcher?"blue": "white";
phKeywords.Controls.Add(new LiteralControl("<div class=\""+ color +"\">"+ d.Value + " - " + d.Key + "</div>"));
switcher = !switcher;
}
}
phKeywords.Controls.Add(new LiteralControl("</div>"));
}
catch (Exception ex)
{ phError.Controls.Add(new LiteralControl("<div id=\"error\" class=\"main_area\"><b>Error:</b>" + ex.Message + "</div>")); }
}
else
phError.Controls.Add(new LiteralControl("<div id=\"error\" class=\"main_area\"><b>Error:</b> Can't process what you don't give me...</div>"));
}
[/code]
The inputs are fu, which is a FileUpload control that holds the file you are parsing, txtExclude which is a TextBox of the words to exclude (added to default exclusions), and txtThreshhold which is a TextBox where you enter the number of words to consider before it is counted as a keyword.