as86的词法分析是手写代码实现的,没有使用lex之类的自动构造工具。原因包括,汇编器的词法分析相对于编译器的语法分析来说,简单多了;手工实现使得代码也简短的多。这与lcc的作者的主张相同。词法分析采用的方法一般都是字符映射的方法,定一个256个元素的数组,也就是ASICII码的个数,对于一个字符,以它的值作为索引,相应的数组元素就是这个字符所属的符号类别。参见scan.c。
PRIVATE char symofchar[256] = /* table to convert chars to their symbols */
{
EOLSYM, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, EOLSYM, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, EOLSYM, STRINGCONST, IMMEDIATE, /* !"# */
HEXCONST, BINCONST, ANDOP, CHARCONST, /* $%&' */
LPAREN, RPAREN, STAR, ADDOP, /* ()*+ */
COMMA, SUBOP, IDENT, SLASH, /* ,-./ */
INTCONST, INTCONST, INTCONST, INTCONST, /* 0123 */
INTCONST, INTCONST, INTCONST, INTCONST, /* 4567 */
INTCONST, INTCONST, COLON, EOLSYM, /* 89:; */
LESSTHAN, EQOP, GREATERTHAN, MACROARG, /* <=>? */
INDIRECT, IDENT, IDENT, IDENT, /* @ABC */
IDENT, IDENT, IDENT, IDENT, /* DEFG */
IDENT, IDENT, IDENT, IDENT, /* HIJK */
IDENT, IDENT, IDENT, IDENT, /* LMNO */
IDENT, IDENT, IDENT, IDENT, /* PQRS */
IDENT, IDENT, IDENT, IDENT, /* TUVW */
IDENT, IDENT, IDENT, LBRACKET, /* XYZ[ */
OTHERSYM, RBRACKET, OTHERSYM, IDENT, /* \]^_ */
OTHERSYM, IDENT, IDENT, IDENT, /* `abc */
IDENT, IDENT, IDENT, IDENT, /* defg */
IDENT, IDENT, IDENT, IDENT, /* hijk */
IDENT, IDENT, IDENT, IDENT, /* lmno */
IDENT, IDENT, IDENT, IDENT, /* pqrs */
IDENT, IDENT, IDENT, IDENT, /* tuvw */
IDENT, IDENT, IDENT, OTHERSYM, /* xyz{ */
OROP, OTHERSYM, NOTOP, OTHERSYM, /* |}~ */
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE,
WHITESPACE, WHITESPACE, WHITESPACE, WHITESPACE
};
函数getsym()使用symofchar数组,对一行字符进行不断的映射,直到得到一整个符号单词(token,不知道怎么翻译好),然后返回。