由一个小问题说开去
时间:2010-09-20 来源:山伟
问题:一个日志文件,记录形如访问时间、url这样的记录,每行一条记录。请按每个url访问的次数进行逆排序
一、很简单的一个问题,首先用很普通的思维来解决:
1)先模拟来生成一个类似的文件:
private static void GenerateUrlLog()
{
string[] urlList = new string[7];
urlList[0] = "http://www.sohu.com";
urlList[1] = "http://www.baidu.com";
urlList[2] = "http://www.google.com";
urlList[3] = "http://www.sina.com.cn";
urlList[4] = "http://www.cnblogs.com";
urlList[5] = "http://www.yahoo.com";
urlList[6] = "http://www.csdn.net";
StreamWriter writer = new StreamWriter("url.data");
Random random = new Random(20);
int i = 0;
while (i < 40) {
int index = random.Next(7);
DateTime now = DateTime.Now;
Console.WriteLine(string.Format("{0}\t{1}", now, urlList[index]));
writer.WriteLine(string.Format("{0}\t{1}", now, urlList[index]));
i++;
Thread.Sleep(2000);
}
writer.Flush();
writer.Dispose();
Console.WriteLine("-------------------------------------");
}
2)顺心遍历文件,用Dictionary<string, int>来记录每个url的访问次数:
private static void ParseUrlLog()
{
StreamReader reader = new StreamReader("url.data");
string urlLog = null;
Dictionary<string, int> dicLogInfo = new Dictionary<string, int>();
while ((urlLog = reader.ReadLine()) != null) {
string[] logProperty = urlLog.Split('\t');
if (dicLogInfo.ContainsKey(logProperty[1])) {
dicLogInfo[logProperty[1]] = dicLogInfo[logProperty[1]] + 1;
} else {
dicLogInfo[logProperty[1]] = 1;
}
}
reader.Dispose();
Console.WriteLine("-------------------------------------");
int urlLogCount = dicLogInfo.Count;
for (int i = 0; i < urlLogCount; i++) {
int max = 0;
string maxUrl = "";
foreach (string key in dicLogInfo.Keys) {
if (max < dicLogInfo[key]) {
max = dicLogInfo[key];
maxUrl = key;
}
}
dicLogInfo.Remove(maxUrl);
Console.WriteLine(string.Format("{0}\t{1}", maxUrl, max));
}
Console.WriteLine(result.Count());
Console.WriteLine("-------------------------------------");
}
二、既然都.net 4.0了,就用新的方式来处理一下:
1)还是生成日志文件,改用.net 4的并行方式:
private static void GenerateUrlLog()
{
string[] urlList = new string[7];
urlList[0] = "http://www.sohu.com";
urlList[1] = "http://www.baidu.com";
urlList[2] = "http://www.google.com";
urlList[3] = "http://www.sina.com.cn";
urlList[4] = "http://www.cnblogs.com";
urlList[5] = "http://www.yahoo.com";
urlList[6] = "http://www.csdn.net";
StreamWriter writer = new StreamWriter("url.data");
Random random = new Random(20);
Parallel.For(0, 40, j => {
int index = random.Next(7);
DateTime now = DateTime.Now;
Console.WriteLine(string.Format("{0}\t{1}", now, urlList[index]));
writer.WriteLine(string.Format("{0}\t{1}", now, urlList[index]));
Thread.Sleep(2000);
});
writer.Flush();
writer.Dispose();
Console.WriteLine("-------------------------------------");
}
生成文件速度比之前的方式快,毕竟并行嘛,当然之前的方式也可以改用多线程来做,类似。
2)用LinQ来查询数据:
private static void ParseUrlLog()
{
StreamReader reader = new StreamReader("url.data");
string urlLog = null;
Dictionary<string, int> dicLogInfo = new Dictionary<string, int>();
while ((urlLog = reader.ReadLine()) != null) {
string[] logProperty = urlLog.Split('\t');
PrintParseLog(logProperty[0], logProperty[2]);
if (dicLogInfo.ContainsKey(logProperty[2])) {
dicLogInfo[logProperty[2]] = dicLogInfo[logProperty[2]] + 1;
} else {
dicLogInfo[logProperty[2]] = 1;
}
}
reader.Dispose();
Console.WriteLine("-------------------------------------");
var result = from url in dicLogInfo orderby url.Value descending select url;
foreach (var item in result) {
Console.WriteLine("{0}\t{1}", item.Key, item.Value);
}
Console.WriteLine(result.Count());
Console.WriteLine("-------------------------------------");
}
查数据部分少了很多代码,最重要的是打印出url访问次数后,Dictionary<string,int>还可以使用,不像上面的方式打印完后Dictionary也就清空了,而且result也可以多次使用。
3)既然LinQ是查数据的,为什么还要构建一个Dictionary,直接查不可以吗?可以:
private static void ParseUrlLog2()
{
StreamReader reader = new StreamReader("url.data");
List<string> urlLog = new List<string>();
string tempUrl = null;
while ((tempUrl = reader.ReadLine()) != null) {
urlLog.Add(tempUrl);
}
reader.Dispose();
Console.WriteLine("-------------------------------------");
var result = urlLog.GroupBy(url => url.Split('\t')[1]).OrderByDescending(s => s.Count());
foreach (var item in result) {
Console.WriteLine("{0}\t{1}", item.Key, item.Count());
}
Console.WriteLine(result.Count());
Console.WriteLine("-------------------------------------");
}