文章详情

  • 游戏榜单
  • 软件榜单
关闭导航
热搜榜
热门下载
热门标签
php爱好者> php文档>我的第一个C++程序,还像个C++c程序的样子吧

我的第一个C++程序,还像个C++c程序的样子吧

时间:2010-08-25  来源:finallyliuyu

目的:从数据库中抽取文章关键词,并统计这些关键词在哪些文章中出现,出现多少次。(算是词袋子模型吧),然后对每篇文章形成形成VSM模型,写成weka的数据格式,然后调用weka对文章聚类。

目前“形成此代码模型一块已经完毕”

其中词袋子的数据结构如下:

map<string,vector<pair<int,int>>>&mymap),

目前已经完成此部分的serilize(save/load)以及print 功能

#include "stdafx.h"
#include<iostream>
#include<map>
#include<vector>
#include<string>
#include<iomanip>
#include<fstream>
//#include<boost/tokenizer.hpp>
using namespace std;

 

形成词袋子模型 nt ConstructMap(map<string,vector<pair<int,int>>>&mymap)
{
    
    vector<string> mySplit(string s);
    CoInitialize(NULL);
    _ConnectionPtr pConn(__uuidof(Connection));
    _RecordsetPtr pRst(__uuidof(Recordset));
    pConn->ConnectionString="Provider=SQLOLEDB.1;Password=xxx;Persist Security Info=True; User ID=sa;Initial Catalog=ArticleCollection";
    pConn->Open("","","",adConnectUnspecified);
    pRst=pConn->Execute("select CKeyWord,ArticleId from Article order by ArticleId",NULL,adCmdText);
    while(!pRst->rsEOF)
    {    vector<string>wordcollection;
        string keywordstr=(_bstr_t)pRst->GetCollect("CKeyWord");
        if(keywordstr!="")
        {
                wordcollection=mySplit(keywordstr);
                string tempid=(_bstr_t)pRst->GetCollect("ArticleId");
                int articleid=atoi(tempid.c_str());
                for(vector<string>::iterator strit=wordcollection.begin();strit!=wordcollection.end();strit++)
                {
                    vector<pair<int,int>>::iterator it;
                    if(mymap[*strit].empty())
                    {
                        pair<int,int>mytemppair=make_pair(articleid,1);
                        mymap[*strit].push_back(mytemppair);

                    }
                    else
                    {
                        for(it=mymap[*strit].begin();it!=mymap[*strit].end();it++)
                        {  
                            if(it->first==articleid)
                            {
                                it->second=++(it->second);
                                break;
                            }
                    
                        }
                        if(it==mymap[*strit].end())
                        {
                            pair<int,int>mytemppair=make_pair(articleid,1);
                            mymap[*strit].push_back(mytemppair);
                        }

                    }

            }
            

        }
        
        
        pRst->MoveNext();
        wordcollection.clear();
    }
    pRst->Close();
    pConn->Close();
    pRst.Release();
    pConn.Release();
    CoUninitialize();
    return 0;

}

 

 

加载词袋子模型 void load(map<string,vector<pair<int,int> > >&mymap)
{
    ifstream infile("c:\\mydict.dat",ios::binary);
    int lenMyMap;//保存词典长度
    int lenVector;//保存每个词出现的文章数目
    string key;//保存读出的map的键值
    int articleId;//文章标号
    int count;//在该文章中刚出现的数目
    string comma;
    string semicolon;
    
    infile>>lenMyMap;
    while(!infile.eof())
    {
        infile>>key;
        infile>>lenVector;
        vector<pair<int,int> >temp;
        for (int i=0;i<lenVector;i++)
        {
            infile>>articleId>>count>>semicolon;
            temp.push_back(make_pair(articleId,count));
        }
        mymap[key]=temp;
        
        
    }
    

    infile.close();

}

 

保存词袋子模型 void save(map<string,vector<pair<int,int> > >&mymap)
{   ofstream outfile("c:\\mydict.dat",ios::binary);
    outfile<<mymap.size()<<endl;
    map<string,vector<pair<int,int> > >::iterator it;
    for (it=mymap.begin();it!=mymap.end();it++)
    {   outfile<<it->first<<endl;
        vector<pair<int,int>>::iterator subit;
        outfile<<it->second.size()<<endl;
        for(subit=(it->second).begin();subit!=(it->second).end();++subit)
        {
            outfile<<subit->first<<" "<<subit->second<<" "<<";"<<" ";
        }
        outfile<<endl;
    }
    //outfile.write((char *)&mymap,sizeof(mymap));

    outfile.close();
} 打印词袋子模型 void print(map<string,vector<pair<int,int> > >&mymap)
{   
    cout<<mymap.size()<<endl;
    map<string,vector<pair<int,int> > >::iterator it;
    for (it=mymap.begin();it!=mymap.end();it++)
    {   cout<<it->first<<endl;
        vector<pair<int,int>>::iterator subit;
        cout<<it->second.size()<<endl;
        for(subit=(it->second).begin();subit!=(it->second).end();++subit)
        {
            cout<<subit->first<<','<<subit->second<<";";
        }
        cout<<endl;
    }
    
}

 

 

 

相关阅读 更多 +
排行榜 更多 +
幸存者的命运

幸存者的命运

飞行射击 下载
精英战区3d

精英战区3d

飞行射击 下载
货运猎人

货运猎人

飞行射击 下载