文章详情

  • 游戏榜单
  • 软件榜单
关闭导航
热搜榜
热门下载
热门标签
php爱好者> php文档>QQ消息记录统计程序

QQ消息记录统计程序

时间:2007-07-27  来源:wibrst

    目前功能:分析每个发言者的发言次数并排序列出
    根据开关sInputType来决定从拷贝记录还是导出记录的形式分析。
    主文件:organizeQQMsg.py
    排序模块:sortDict.py

---------------------------------------------------------------------------
# organizeQQMsg.py

import sys
import os
import re
import sortDict

def file2List(_uf):
    bUTF8 = 0
    f = file(_uf , 'r')
    l = f.readlines()
    if l[0][:3]=='\xef\xbb\xbf':
        bUTF8 = 1
        l[0]=l[0][3:]
        # print l[0].decode('utf8') # test the trouble line
    f.close()
    return l,bUTF8

def getFilePath():
    bValidInput = 0
    while not bValidInput:
        uInput = raw_input("input source file path:")
        if os.path.isfile ( uInput ) :
            bValidInput = 1
    return uInput

def getMsgsDictExport(aRecordRaw):
    pTitle = r'\d{4}-\d{2}-\d{2}\ \d{2}\:\d{2}\:\d{2}\ (.*)'
    cpTitle = re.compile(pTitle)
    dAuthorWords = {}

    for l in aRecordRaw:
        m = cpTitle.match(l)
        if m:
            sAuthor = m.group(1)
            if not dAuthorWords.has_key(sAuthor):
                dAuthorWords[sAuthor]=1
            dAuthorWords[sAuthor] +=1
    return dAuthorWords

def getMsgsDictHistory(aRecordRaw):
    pTitle = r'(.*)\ \d{2}\:\d{2}\:\d{2}'
    cpTitle = re.compile(pTitle)
    dAuthorWords = {}

    bTitle = 0
    bContentReading = 0
    iIdx = 0
    for i in range(len(aRecordRaw)):
        l = aRecordRaw[i]
        if bTitle:      # read first words line
            if not bContentReading:
                bTitle = 0
                bContentReading = 1
        else :
            m = cpTitle.match(l)
            if m:           # encount title
                bTitle = 1
                bContentReading = 0
                # process prev words
                if 'aWords' in dir():
                    aWords.append(i-1)
                    del aWords

                # start this
                iIdx+=1
                sAuthor = m.group(1)
                aWords = [iIdx,i+1]

                if not dAuthorWords.has_key(sAuthor):
                    dAuthorWords[sAuthor] = []
                dAuthorWords[sAuthor].append(aWords)
            else :          # continiue reading
                pass
    aWords.append(i)
    del aWords
    return dAuthorWords

def appendElement(e ):
    aResult.append( (e.decode('utf8').encode('gbk') if bUTF8 else e).strip())

def writeResult(aResult,uInput):
    b,e = os.path.splitext(os.path.abspath(uInput))
    uOutput = b+'_stat'+e
    if 0:
        print aResult
    else:
        f = file(uOutput,'w')
        f.write('\n'.join(aResult))
        f.close()

if __name__ == "__main__":

    bModeOnlyTitle = 1  # do not display msgs
    sInputType = 'export'    # export history
    uInput = 'sample.txt'
   
    # uInput = getFilePath()
    aRecordRaw,bUTF8 = file2List(uInput)
    aResult = []

    if sInputType =='export':
        dAuthorCounts = getMsgsDictExport(aRecordRaw)
        aSorted = sortDict.getListSortDict(dAuthorCounts,1)
        for i in range(len(aSorted)):
            item = aSorted[i]
            #print '%s [%d]' % (item[0],item[1])
            appendElement( '%-20s [%d]' % (item[0],item[1]) )
       
    elif sInputType == 'history':
        dAuthorWords = getMsgsDictHistory(aRecordRaw)
        aSorted = sortDict.getListSortDict(dAuthorWords,2)
        for e in range(len(aSorted)):
            item = aSorted[e]
            appendElement( '%-20s [%d]' % (item[0],item[2]) +('' if bModeOnlyTitle else ':') )
            if bModeOnlyTitle :
                continue
            aWords = item[1]
            for i in range(len(aWords)):
                r = aWords[i]
                appendElement('%02d:\t%s' % (r[0],aRecordRaw[r[1]]) )
                for j in range(r[1]+1,r[2]):
                    appendElement( '\t'+aRecordRaw[j])
            appendElement( '-------------------'+os.linesep)

    writeResult(aResult,uInput)

---------------------------------------------------------------------------

# sortDict.py

import random

def getRdmDict():
    d ={}
    iCodeBase =ord('a')

    for i in range(13):
        d[chr(iCodeBase+i)] = random.randint(1,100)
    return d

def getListSortDict(d ,iMethod):
    aSort =[]

    if iMethod == 1 :
        for i in d:
            insertElement1(aSort,i,d[i])
    elif iMethod == 2 :
        for i in d:
            insertElement2(aSort,i,d[i])

    return aSort

def insertElement1(aSort,sAuthor,iTimes):       # [sAuthor,iTimes] format
    for i in range(len(aSort)):
        if iTimes <aSort[i][1]:
            aSort.insert(i,[sAuthor,iTimes])
            return
    aSort.append([sAuthor,iTimes])

def insertElement2(aSort,sAuthor,aWords):       # [sAuthor,aWords,iTimes] format
    iTimes = len(aWords)
    for i in range(len(aSort)):
        if iTimes <aSort[i][2]:
            aSort.insert(i,[sAuthor,aWords,iTimes])
            return
    aSort.append([sAuthor,aWords,iTimes])

if __name__ == '__main__':
    d = getRdmDict()
    a = getListSortDict(d)
    print d
    print a
相关阅读 更多 +
排行榜 更多 +
找茬脑洞的世界安卓版

找茬脑洞的世界安卓版

休闲益智 下载
滑板英雄跑酷2手游

滑板英雄跑酷2手游

休闲益智 下载
披萨对对看下载

披萨对对看下载

休闲益智 下载