PostgreSQL源码分析– 常用数据类型/SQL语句的解..
时间:2009-10-17 来源:fghler
主要分析文件: // basic nodes definition src/include/nodes/nodes.h // SQL parsed struct src/include/nodes/parsenodes.h // List定义 src/include/nodes/pg_list.h // List实现 src/backend/nodes/list.c // postgres运行入口文件 src/backend/tcop/postgres.c // utility Stmt运行文件 src/backend/tcop/utility.c // SQL analyze and rewrite src/backend/parser/analyze.c PostgreSQL用一种非常简单的形式实现了类似C++的继承,请看nodes.h : typedef struct Node { NodeTag type; } Node; 然后请看:parsenodes.h(SQL语句经过parser解析后都先对应该该文件的一个struct中) 假设有一个create table的sql: create table test (name varchar(100, pass varchar(100)); 将会被解析到如下structure: typedef struct CreateStmt { NodeTag type; RangeVar *relation; /* relation to create */ List *tableElts; /* column definitions (list of ColumnDef) */ List *inhRelations; /* relations to inherit from (list of * inhRelation) */ List *constraints; /* constraints (list of Constraint nodes) */ List *options; /* options from WITH clause */ OnCommitAction oncommit; /* what do we do at COMMIT? */ char *tablespacename; /* table space to use, or NULL */ } CreateStmt; 首先,看看该struct的第一个元素:type是一个NodeTag类型。 PG的很多struct的第一个元素都是NodeTag,这样在函数中传递指针变量时,他可以很 简单的把参数设置成:Node* 说简单点,其实有点像是所有的struct都继承了Node这个struct. 就是因为这个原因,看PG的代码很累,很多函数的参数和返回值都是一个简单的Node. 在nodes.h中有每个Node的值的定义,比如:上面说的CreateStmt的type的值就是:T_CreateStmt 然后,PG中大量的使用了链表类型:List 在pg_list.h中有定义: typedef struct List { NodeTag type; /* T_List, T_IntList, or T_OidList */ int length; ListCell *head; ListCell *tail; } List; 可以看到,List的定义是基于基类Node来进行的。 常用的List操作函数有: //取List第一个元素 ListCell *y = list_head(List *l); //得到List的元素个数 list_length(List *l); // 遍历List foreach(cell, l) { … } 其他的很多函数具体可以参考pg_list.h和list.c 下面接着说SQL的解释和执行。 所有的SQL都会先解析成一个与之相对应的struct. Select会解析到: typedef struct SelectStmt { NodeTag type; /* * These fields are used only in “leaf” SelectStmts. * * into, intoColNames, intoOptions, intoOnCommit, and intoTableSpaceName * are a kluge; they belong somewhere else… */ List *distinctClause; /* NULL, list of DISTINCT ON exprs, or * lcons(NIL,NIL) for all (SELECT DISTINCT) */ RangeVar *into; /* target table (for select into table) */ List *intoColNames; /* column names for into table */ List *intoOptions; /* options from WITH clause */ OnCommitAction intoOnCommit; /* what do we do at COMMIT? */ char *intoTableSpaceName; /* table space to use, or NULL */ List *targetList; /* the target list (of ResTarget) */ List *fromClause; /* the FROM clause */ Node *whereClause; /* WHERE qualification */ List *groupClause; /* GROUP BY clauses */ Node *havingClause; /* HAVING conditional-expression */ /* * In a “leaf” node representing a VALUES list, the above fields are all * null, and instead this field is set. Note that the elements of the * sublists are just expressions, without ResTarget decoration. Also note * that a list element can be DEFAULT (represented as a SetToDefault * node), regardless of the context of the VALUES list. It’s up to parse * analysis to reject that where not valid. */ List *valuesLists; /* untransformed list of expression lists */ /* * These fields are used in both “leaf” SelectStmts and upper-level * SelectStmts. */ List *sortClause; /* sort clause (a list of SortBy’s) */ Node *limitOffset; /* # of result tuples to skip */ Node *limitCount; /* # of result tuples to return */ List *lockingClause; /* FOR UPDATE (list of LockingClause’s) */ /* * These fields are used only in upper-level SelectStmts. */ SetOperation op; /* type of set op */ bool all; /* ALL specified? */ struct SelectStmt *larg; /* left child */ struct SelectStmt *rarg; /* right child */ /* Eventually add fields for CORRESPONDING spec here */ } SelectStmt; Delete会解析到: typedef struct DeleteStmt { NodeTag type; RangeVar *relation; /* relation to delete from */ List *usingClause; /* optional using clause for more tables */ Node *whereClause; /* qualifications */ List *returningList; /* list of expressions to return */ } DeleteStmt; Update会解析到: typedef struct UpdateStmt { NodeTag type; RangeVar *relation; /* relation to update */ List *targetList; /* the target list (of ResTarget) */ Node *whereClause; /* qualifications */ List *fromClause; /* optional from clause for more tables */ List *returningList; /* list of expressions to return */ } UpdateStmt; 从定义上看,Select比较复杂。其实在PG内部,把Select/Delete/Update当成一样处理,只是最后 找到相应的结果集时采取不同的操作。 postgres.c的804行可以看到这一步操作: parsetree_list = pg_parse_query(query_string); 第一步完成后,只是做了很简单、很粗糙的事情,然后,要进一步进行rewrite, 在交给优化器进行优化和 路径选择之前,所有的执行语句都要转换成struct Query: typedef struct Query { NodeTag type; CmdType commandType; /* select|insert|update|delete|utility */ /* 注意: 如果commandType为: utility,优化器不会对该SQL进行进一步优化,因为这个SQL 就是一些建表或者其他命令操作,无法进行路径选择和优化,这时候,executor直接 执行utilityStmt这个Node对应的Struct. 对于select|insert|update|delete这些SQL,优化器都需要进行评估和优化。 */ QuerySource querySource; /* where did I come from? */ bool canSetTag; /* do I set the command result tag? */ Node *utilityStmt; /* non-null if this is a non-optimizable * statement */ int resultRelation; /* rtable index of target relation for * INSERT/UPDATE/DELETE; 0 for SELECT */ RangeVar *into; /* target relation for SELECT INTO */ List *intoOptions; /* options from WITH clause */ OnCommitAction intoOnCommit; /* what do we do at COMMIT? */ char *intoTableSpaceName; /* table space to use, or NULL */ bool hasAggs; /* has aggregates in tlist or havingQual */ bool hasSubLinks; /* has subquery SubLink */ List *rtable; /* list of range table entries */ FromExpr *jointree; /* table join tree (FROM and WHERE clauses) */ List *targetList; /* target list (of TargetEntry) */ List *returningList; /* return-values list (of TargetEntry) */ List *groupClause; /* a list of GroupClause’s */ Node *havingQual; /* qualifications applied to groups */ List *distinctClause; /* a list of SortClause’s */ List *sortClause; /* a list of SortClause’s */ Node *limitOffset; /* # of result tuples to skip (int8 expr) */ Node *limitCount; /* # of result tuples to return (int8 expr) */ List *rowMarks; /* a list of RowMarkClause’s */ Node *setOperations; /* set-operation tree if this is top level of * a UNION/INTERSECT/EXCEPT query */ /* * If the resultRelation turns out to be the parent of an inheritance * tree, the planner will add all the child tables to the rtable and store * a list of the rtindexes of all the result relations here. This is done * at plan time, not parse time, since we don’t want to commit to the * exact set of child tables at parse time. XXX This field ought to go in * some sort of TopPlan plan node, not in the Query. */ List *resultRelations; /* integer list of RT indexes, or NIL */ /* * If the query has a returningList then the planner will store a list of * processed targetlists (one per result relation) here. We must have a * separate RETURNING targetlist for each result rel because column * numbers may vary within an inheritance tree. In the targetlists, Vars * referencing the result relation will have their original varno and * varattno, while Vars referencing other rels will be converted to have * varno OUTER and varattno referencing a resjunk entry in the top plan * node’s targetlist. XXX This field ought to go in some sort of TopPlan * plan node, not in the Query. */ List *returningLists; /* list of lists of TargetEntry, or NIL */ } Query; 这些rewrite比较复杂,是由一系列的transform函数完成的,具体可以查看analyze.c. 下面把一些Sql命令转化后的struct做一个简单的分析。 首先分析utilityStmt, 这些命令都比较简单。 (1) Create Table /* ———————- * Create Table Statement * * NOTE: in the raw gram.y output, ColumnDef, Constraint, and FkConstraint * nodes are intermixed in tableElts, and constraints is NIL. After parse * analysis, tableElts contains just ColumnDefs, and constraints contains * just Constraint nodes (in fact, only CONSTR_CHECK nodes, in the present * implementation). * ———————- */ typedef struct CreateStmt { // type应该为 T_CreateStmt NodeTag type; // relation->relname 就是要创建的table名字 RangeVar *relation; /* relation to create */ List *tableElts; /* column definitions (list of ColumnDef) */ // 由于PG的表是可以继承的,这部分先不分析 List *inhRelations; /* relations to inherit from (list of * inhRelation) */ // 这些都是constraints的定义 List *constraints; /* constraints (list of Constraint nodes) */ List *options; /* options from WITH clause */ OnCommitAction oncommit; /* what do we do at COMMIT? */ char *tablespacename; /* table space to use, or NULL */ } CreateStmt; 其中最重要的就是:tableElts这个字段,里面包含了要创建table的所有columns. 我写了几行代码,可以遍历这一List. List *schema; ListCell *col; elog(LOG, “begin create:%s”, ((CreateStmt *)parsetree)->relation->relname); schema = ((CreateStmt *)parsetree)->tableElts; // 遍历 foreach(col, schema) { ColumnDef *entry = lfirst(col); elog(LOG, “column name:%s column type:%s”, entry->colname, TypeNameToString(entry->typename)); } 建表语句: pgsql=# create table lijianghua ( name varchar(100), pass varchar(100)); CREATE TABLE log输出: LOG: begin create:lijianghua LOG: column name:name column type:pg_catalog.varchar LOG: column name:pass column type:pg_catalog.varchar (2) drop object 删除一个对象时,比如:drop table/drop view……, 命令会被解析到DropStmt struct: /* ———————- * Drop Table|Sequence|View|Index|Type|Domain|Conversion|Schema Statement * ———————- */ typedef struct DropStmt { NodeTag type; // 需要删除的对象列表 List *objects; /* list of sublists of names (as Values) */ // 对象类型 ObjectType removeType; /* object type */ DropBehavior behavior; /* RESTRICT or CASCADE behavior */ bool missing_ok; /* skip error if object is missing? */ } DropStmt; ObjectType的定义也非常简单: typedef enum ObjectType { OBJECT_AGGREGATE, OBJECT_CAST, OBJECT_COLUMN, OBJECT_CONSTRAINT, OBJECT_CONVERSION, OBJECT_DATABASE, OBJECT_DOMAIN, OBJECT_FUNCTION, OBJECT_INDEX, OBJECT_LANGUAGE, OBJECT_LARGEOBJECT, OBJECT_OPCLASS, OBJECT_OPERATOR, OBJECT_ROLE, OBJECT_RULE, OBJECT_SCHEMA, OBJECT_SEQUENCE, OBJECT_TABLE, OBJECT_TABLESPACE, OBJECT_TRIGGER, OBJECT_TYPE, OBJECT_VIEW } ObjectType; 同样,我也写了几行代码,进行了相关验证: if(parsetree->type == T_DropStmt) { List *tables; ListCell *table; tables = ((DropStmt *)parsetree)->objects; foreach(table, tables) { List *names = (List *) lfirst(table); RangeVar *rel; if(((DropStmt *)parsetree)->removeType == OBJECT_TABLE) { rel = makeRangeVarFromNameList(names); elog(LOG, “want to delete table:%s”, rel->relname); } } } SQL语句: pgsql=# drop table test,test21; DROP TABLE log输出: LOG: want to delete table:test LOG: want to delete table:test21 (3) 把比较复杂的Insert/Update/Delete/Select做一些详细的分析 未完待续! |
相关阅读 更多 +