Thursday, February 18, 2010

How to find duplicate words in a file using c#?

static List countDuplicateWordsInFile(string filePath)
{
// FileStream fstream = File.Open(filePath,FileMode.Open);
using (StreamReader reader = new StreamReader(filePath))
{
string strContents = reader.ReadToEnd();
List duplicateWords = new List();
Dictionary uniqueWords = new Dictionary();
string[] strArray = strContents.Split(new string[] { " ", "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < strArray.Length; i++)
{
if (uniqueWords.ContainsKey(strArray[i]))
{
//add duplicate words
duplicateWords.Add(strArray[i]);
}
else
{
//unique words
uniqueWords.Add(strArray[i], i);
}
}

return duplicateWords;
}
}

No comments:

Post a Comment