Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

syntax.cc

Go to the documentation of this file.
00001 /* 
00002  *      HT Editor
00003  *      syntax.cc
00004  *
00005  *      Copyright (C) 1999-2002 Stefan Weyergraf (stefan@weyergraf.de)
00006  *
00007  *      This program is free software; you can redistribute it and/or modify
00008  *      it under the terms of the GNU General Public License version 2 as
00009  *      published by the Free Software Foundation.
00010  *
00011  *      This program is distributed in the hope that it will be useful,
00012  *      but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *      GNU General Public License for more details.
00015  *
00016  *      You should have received a copy of the GNU General Public License
00017  *      along with this program; if not, write to the Free Software
00018  *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00019  */
00020 
00021 #include <ctype.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <sys/types.h>
00025 
00026 #include "htexcept.h"
00027 extern "C" {
00028 #include "regex.h"
00029 }
00030 #include "syntax.h"
00031 #include "htpal.h"
00032 
00033 bool regmatch(char *str, regex_t *preg, int *len)
00034 {
00035 // FIXME: use REG_STARTEND ? non-POSIX but could be pretty useful.
00036         regmatch_t m;
00037         int r=regexec(preg, str, 1, &m, 0);
00038         if ((r==0) && (m.rm_so==0)) {
00039                 if (len) *len=m.rm_eo-m.rm_so;
00040                 return true;
00041         }
00042         return false;
00043 }
00044 
00045 bool match_sorted_stringtable(char *s, UINT slen, char **strings, UINT strings_count)
00046 {
00047         int a=0, b=strings_count-1;
00048         int m;
00049         while (a<=b) {
00050                 m=(a+b)>>1;
00051                 UINT x=strlen(strings[m]);
00052                 if (slen>x) x=slen;
00053                 int d=strncmp(s, strings[m], x);
00054                 if (d<0) b=m-1; else
00055                         if (d>0) a=m+1; else return true;
00056         }
00057         return false;
00058 }
00059 
00060 /*
00061  *      CLASS ht_syntax_lexer
00062  */
00063 
00064 void ht_syntax_lexer::config_changed()
00065 {
00066 }
00067 
00068 /*
00069  *      CLASS ht_lang_syntax_lexer
00070  */
00071 
00072 #define LSTSET(state) (1<<(state))
00073 
00074 #define SL_RULE_ANYCHAR(needstates, token)\
00075         { needstates,\
00076           false, LRST_ANYCHAR, NULL, 0, token }
00077 
00078 #define SL_RULE_LINEEND(needstates, state)\
00079         { needstates,\
00080           false, LRST_REGEX, "$", state, 0 }
00081 
00082 void ht_lang_syntax_lexer::init(syntax_lexer_rule *lr)
00083 {
00084         ht_syntax_lexer::init();
00085         set_lexer_rules(lr);
00086 }
00087 
00088 void ht_lang_syntax_lexer::done()
00089 {
00090         free_lexer_rules();
00091         ht_syntax_lexer::done();
00092 }
00093 
00094 void ht_lang_syntax_lexer::free_lexer_rules()
00095 {
00096         for (int i=0; i<lexer_rules_count; i++) {
00097                 if (lexer_rules[i].string_type==LRST_REGEX) {
00098                         regfree((regex_t*)lexer_rules_precompiled[i]);
00099                         free(lexer_rules_precompiled[i]);
00100                 } else {
00101                 }
00102         }
00103         free(lexer_rules_precompiled);
00104 }
00105 
00106 
00107 lexer_token ht_lang_syntax_lexer::gettoken(void *b, UINT buflen, text_pos p, bool start_of_line, lexer_state *ret_state, UINT *ret_len)
00108 {
00109         syntax_lexer_rule *lr=lexer_rules;
00110         char *buf = (char*)b;
00111         int i=0;
00112         while (lr->needstate) {
00113                 if ((lr->needstate & LSTSET(*ret_state)) &&
00114                 (!lr->need_line_start || start_of_line)) {
00115                         int l=0;
00116                         bool matched=false;
00117                         bool preserve_state=false;
00118                         switch (lr->string_type) {
00119                                 case LRST_ANYCHAR: {
00120                                         if (buflen>0) l=1;
00121                                         break;
00122                                 }
00123                                 case LRST_WHITESPACE: {
00124 //                                      if ((buflen>0) && ((byte)*buf<=32)) l=1;
00125                                         if ((buflen>0) && ((*buf==32) || (*buf=='\t'))) l=1;
00126                                         break;
00127                                 }
00128                                 case LRST_STRING: {
00129                                         UINT sl = strlen(lr->string);
00130                                         if ((buflen >= sl) && (memcmp(buf, lr->string, sl)==0)) {
00131                                                 l = sl;
00132                                         }
00133                                         break;
00134                                 }
00135                                 case LRST_STRING_EXPECT: {
00136 /*                      char *q = strstr(buf, lr->string);
00137                                         if (q) {
00138                                                 l=q-buf+strlen(lr->string);
00139                                         } else {
00140                                                 l = strlen(buf);
00141                                                 preserve_state=true;
00142                                         }*/
00143                                         break;
00144                                 }
00145                                 case LRST_REGEX: {
00146 #if 0
00147                                         if /*(*/(strcmp(lr->string, "$")==0)/* && (buflen>0))*/ {
00148                                                 matched=true;
00149                                         } else if (regmatch(buf, (regex_t*)lexer_rules_precompiled[i], &l)) {
00150                                         }
00151 #else
00152                                         if (strcmp(lr->string, "$") == 0) {
00153                                                 matched = (buflen == 0);
00154                                         } else if (regmatch(buf, (regex_t*)lexer_rules_precompiled[i], &l)) {
00155                                         }
00156 #endif
00157                                         break;
00158                                 }
00159                                 case LRST_CHARSET: {
00160                                         if (buflen) {
00161                                                 if (*buf && (strchr(lr->string, *buf))) l=1;
00162                                         }
00163                                         break;
00164                                 }
00165                                 case LRST_IDENTIFIER: {
00166                                         if (isalpha(*buf) || *buf == '_') {
00167                                                 char *b = buf+1;
00168                                                 while (isalnum(*b) || *b == '_') b++;
00169                                                 l = b-buf;
00170                                         }
00171                                         break;
00172                                 }
00173                                 case LRST_DQSTRING: {
00174                                         if (*buf == '"') {
00175                                                 char *b = buf+1;
00176                                                 while (*b && (*b != '"')) b++;
00177                                                 if (*b == '"') l = b+1-buf;
00178                                         }
00179                                         break;
00180                                 }
00181                                 case LRST_QSTRING: {
00182                                         if (*buf == '\'') {
00183                                                 char *b = buf+1;
00184                                                 while (*b && (*b != '\'')) b++;
00185                                                 if (*b == '\'') l = b+1-buf;
00186                                         }
00187                                         break;
00188                                 }
00189                                 case LRST_EMPTY:
00190                                         break;
00191                         }
00192                         if (matched || l) {
00193                                 if (lr->state && !preserve_state) *ret_state=lr->state;
00194                                 *ret_len=l;
00195                                 return lr->token;
00196                         }
00197                 }
00198                 lr++;
00199                 i++;
00200         }
00201 /* error, no rule matched... */
00202         if (buflen) {
00203                 *ret_len=1;
00204                 return geterrortoken();
00205         }
00206         *ret_len=0;
00207         return 0;
00208 }
00209 
00210 void ht_lang_syntax_lexer::set_lexer_rules(syntax_lexer_rule *lr)
00211 {
00212         lexer_rules=lr;
00213         
00214         lexer_rules_count=0;
00215         while (lexer_rules[lexer_rules_count].needstate) {
00216                 lexer_rules_count++;
00217         }
00218         lexer_rules_precompiled=(void**)malloc(sizeof (void**) * lexer_rules_count);
00219         for (int i=0; i<lexer_rules_count; i++) {
00220                 if (lexer_rules[i].string_type==LRST_REGEX) {
00221                         regex_t *preg=(regex_t*)malloc(sizeof (regex_t));
00222 
00223                         /* add an anchor in front of regex */
00224                         int rl=strlen(lexer_rules[i].string)+1;
00225                         char *regex=(char*)malloc(1+rl);
00226                         *regex='^';
00227                         memmove(regex+1, lexer_rules[i].string, rl);
00228                         
00229                         if (regcomp(preg, regex, REG_EXTENDED))
00230                                 throw ht_exception();
00231                                 
00232                         free(regex);
00233                         
00234                         lexer_rules_precompiled[i]=preg;
00235                 } else {
00236                         lexer_rules_precompiled[i]=NULL;
00237                 }
00238         }
00239 }
00240 
00241 /*
00242  *      CLASS ht_c_syntax_lexer
00243  */
00244 
00245 /* C lexer states */
00246 #define LEX_CST_NORMAL                  1
00247 #define LEX_CST_STRING                  2
00248 #define LEX_CST_PREPROCESS              3
00249 #define LEX_CST_COMMENT                 4
00250 #define LEX_CST_COMMENT_EOL        5
00251 
00252 /* C lexer tokens */
00253 #define LEX_CTOK_ERROR                  1
00254 #define LEX_CTOK_WHITESPACE             2
00255 #define LEX_CTOK_COMMENT                        3
00256 #define LEX_CTOK_PREPROCESS             4
00257 #define LEX_CTOK_IDENTIFIER             5
00258 #define LEX_CTOK_RIDENTIFIER            6
00259 #define LEX_CTOK_NUMBER                 7
00260 #define LEX_CTOK_FNUMBER                        8
00261 #define LEX_CTOK_STRING                 9
00262 #define LEX_CTOK_CHAR                   10
00263 #define LEX_CTOK_SYMBOL                 11
00264 
00265 syntax_lexer_rule c_syntax_lexer_rules[]={
00266 /* preprocessor directives */
00267         { LSTSET(LEX_CST_NORMAL),
00268           true, LRST_REGEX, " *#", LEX_CST_PREPROCESS, LEX_CTOK_PREPROCESS },
00269         SL_RULE_LINEEND(LSTSET(LEX_CST_PREPROCESS), LEX_CST_NORMAL),
00270         SL_RULE_ANYCHAR(LSTSET(LEX_CST_PREPROCESS), LEX_CTOK_PREPROCESS),
00271 /* whitespaces */
00272         { LSTSET(LEX_CST_NORMAL),
00273           false, LRST_WHITESPACE, NULL, 0, LEX_CTOK_WHITESPACE },
00274 /* '/ *' - '* /' multiline comments */
00275         { LSTSET(LEX_CST_NORMAL) | LSTSET(LEX_CST_PREPROCESS),
00276           false, LRST_STRING, "/*", LEX_CST_COMMENT, LEX_CTOK_COMMENT },
00277         { LSTSET(LEX_CST_COMMENT),
00278           false, LRST_STRING, "*/", LEX_CST_NORMAL, LEX_CTOK_COMMENT },
00279         SL_RULE_ANYCHAR(LSTSET(LEX_CST_COMMENT), LEX_CTOK_COMMENT),
00280 /* "..." (multiline) strings */
00281         { LSTSET(LEX_CST_NORMAL),
00282           false, LRST_STRING, "\"", LEX_CST_STRING, LEX_CTOK_STRING },
00283         { LSTSET(LEX_CST_STRING),
00284           false, LRST_STRING, "\\\"", LEX_CST_STRING, LEX_CTOK_STRING },
00285         { LSTSET(LEX_CST_STRING),
00286           false, LRST_STRING, "\"", LEX_CST_NORMAL, LEX_CTOK_STRING },
00287         SL_RULE_ANYCHAR(LSTSET(LEX_CST_STRING), LEX_CTOK_STRING),
00288 /* '//' one line comments */
00289         { LSTSET(LEX_CST_NORMAL) | LSTSET(LEX_CST_PREPROCESS),
00290           false, LRST_STRING, "//", LEX_CST_COMMENT_EOL, LEX_CTOK_COMMENT },
00291         SL_RULE_LINEEND(LSTSET(LEX_CST_COMMENT_EOL), LEX_CST_NORMAL),
00292         SL_RULE_ANYCHAR(LSTSET(LEX_CST_COMMENT_EOL), LEX_CTOK_COMMENT),
00293 /* symbols */
00294         { LSTSET(LEX_CST_NORMAL),
00295           false, LRST_CHARSET, "(){};,.[]!~%+-/*=<>|&^?:", 0, LEX_CTOK_SYMBOL },
00296 /* identifiers */
00297         { LSTSET(LEX_CST_NORMAL),
00298           false, LRST_IDENTIFIER, NULL, 0, LEX_CTOK_IDENTIFIER },
00299 /* floats */
00300         { LSTSET(LEX_CST_NORMAL),
00301           false, LRST_REGEX, "[0-9]+\\.[0-9]+(e[+-]?[0-9]+)?", 0, LEX_CTOK_FNUMBER },
00302 /* numbers */
00303         { LSTSET(LEX_CST_NORMAL),
00304           false, LRST_REGEX, "0[xX][0-9a-fA-F]+", 0, LEX_CTOK_NUMBER },
00305         { LSTSET(LEX_CST_NORMAL),
00306           false, LRST_REGEX, "[0-9]+", 0, LEX_CTOK_NUMBER },
00307 /* chars */
00308         { LSTSET(LEX_CST_NORMAL),
00309           false, LRST_REGEX, "'[^'\\]'", 0, LEX_CTOK_CHAR },
00310         { LSTSET(LEX_CST_NORMAL),
00311           false, LRST_REGEX, "'\\\\.{1,3}'", 0, LEX_CTOK_CHAR },
00312 
00313         { 0, 0, LRST_EMPTY, false, false, 0 }
00314 };
00315 
00316 char *c_reserved[]=
00317 {
00318 /* types */
00319         "bool", "char", "void", "int", "short", "long",
00320         "unsigned", "signed", "float", "double",
00321 /* consts */
00322         "true", "false",
00323 /* statements */
00324         "return", "if", "else", "while", "do", "goto", "asm",
00325         "switch", "case", "default", "break", "continue", "for",
00326 /* object */
00327         "new", "delete", "this",
00328 /* declarations */
00329         "struct", "union", "enum", "class", "template", "operator",
00330         "typedef",
00331 /* modifiers */
00332         "public", "protected", "private", "friend", "const",
00333         "extern", "inline", "register", "static", "volatile", "virtual",
00334 /* exceptions */
00335         "try", "catch", "throw",
00336 /* misc */
00337         "sizeof",
00338 
00339         NULL
00340 };
00341 
00342 #define palkey_syntax_c_default         "c/default"
00343 
00344 void ht_c_syntax_lexer::init()
00345 {
00346         ht_lang_syntax_lexer::init(c_syntax_lexer_rules);
00347         c_reserved_sorted=create_sorted_stringtable(c_reserved);
00348 
00349         char **table=c_reserved;
00350         
00351         char **x=table;
00352         while (*x) x++;
00353         c_reserved_count=x-table;
00354 
00355         c_pal.data = NULL;
00356         c_pal.size = 0;
00357 
00358         config_changed();
00359 }
00360 
00361 void ht_c_syntax_lexer::done()
00362 {
00363         if (c_pal.data) free(c_pal.data);
00364         if (c_reserved_sorted) free(c_reserved_sorted);
00365         ht_lang_syntax_lexer::done();
00366 }
00367 
00368 void ht_c_syntax_lexer::config_changed()
00369 {
00370         reloadpalette();
00371 }
00372 
00373 vcp ht_c_syntax_lexer::getcolor_syntax(UINT pal_index)
00374 {
00375         return getcolorv(&c_pal, pal_index);
00376 }
00377 
00378 lexer_token ht_c_syntax_lexer::geterrortoken()
00379 {
00380         return LEX_CTOK_ERROR;
00381 }
00382 
00383 lexer_state ht_c_syntax_lexer::getinitstate()
00384 {
00385         return LEX_CST_NORMAL;
00386 }
00387 
00388 char *ht_c_syntax_lexer::getname()
00389 {
00390         return "C/C++";
00391 }
00392 
00393 lexer_token ht_c_syntax_lexer::gettoken(void *buf, UINT buflen, text_pos p, bool start_of_line, lexer_state *ret_state, UINT *ret_len)
00394 {
00395         lexer_token t=ht_lang_syntax_lexer::gettoken(buf, buflen, p, start_of_line, ret_state, ret_len);
00396         if (t==LEX_CTOK_IDENTIFIER) {
00397                 if (match_sorted_stringtable((char*)buf, *ret_len, c_reserved_sorted, c_reserved_count)) {
00398                         t=LEX_CTOK_RIDENTIFIER;
00399                 }
00400         }
00401         return t;
00402 }
00403 
00404 vcp ht_c_syntax_lexer::gettoken_color(lexer_token t)
00405 {
00406         switch (t) {
00407                 case LEX_CTOK_WHITESPACE: return getcolor_syntax(palidx_syntax_whitespace);
00408                 case LEX_CTOK_COMMENT: return getcolor_syntax(palidx_syntax_comment);
00409                 case LEX_CTOK_PREPROCESS: return getcolor_syntax(palidx_syntax_preprocess);
00410                 case LEX_CTOK_IDENTIFIER: return getcolor_syntax(palidx_syntax_identifier);
00411                 case LEX_CTOK_RIDENTIFIER: return getcolor_syntax(palidx_syntax_reserved);
00412                 case LEX_CTOK_NUMBER: return getcolor_syntax(palidx_syntax_intnum);
00413                 case LEX_CTOK_FNUMBER: return getcolor_syntax(palidx_syntax_floatnum);
00414                 case LEX_CTOK_STRING: return getcolor_syntax(palidx_syntax_string);
00415                 case LEX_CTOK_CHAR: return getcolor_syntax(palidx_syntax_char);
00416                 case LEX_CTOK_SYMBOL: return getcolor_syntax(palidx_syntax_symbol);
00417         }
00418         return VCP(VC_BLACK, VC_RED);
00419 }
00420 
00421 void ht_c_syntax_lexer::reloadpalette()
00422 {
00423         if (c_pal.data) {
00424             free(c_pal.data);
00425             c_pal.data = NULL;
00426         }           
00427         load_pal(palclasskey_syntax, palkey_syntax_c_default, &c_pal);
00428 }
00429 
00430 #ifdef HT_HTML_SYNTAX_LEXER
00431 /*
00432  *      CLASS ht_html_syntax_lexer
00433  */
00434 
00435 #if 0
00436 /* HTML lexer states */
00437 #define LEX_HTMLST_NORMAL                       1
00438 #define LEX_HTMLST_TAG                          2
00439 #define LEX_HTMLST_COMMENT                      3
00440 #define LEX_HTMLST_CSS                          4
00441 #define LEX_HTMLST_SCRIPT                       5
00442 
00443 /* HTML lexer tokens */
00444 #define LEX_HTMLTOK_ERROR                       1
00445 #define LEX_HTMLTOK_WHITESPACE          2
00446 #define LEX_HTMLTOK_NORMAL                      3
00447 #define LEX_HTMLTOK_COMMENT                     4
00448 #define LEX_HTMLTOK_TAG                         5
00449 #define LEX_HTMLTOK_ATTRIBUTE                   6
00450 #define LEX_HTMLTOK_SYMBOL                      7
00451 #define LEX_HTMLTOK_CDATA                       8
00452 #define LEX_HTMLTOK_ENTITY                      9
00453 
00454 syntax_lexer_rule html_syntax_lexer_rules[] = {
00455 /* whitespaces */
00456         { LSTSET(LEX_HTMLST_NORMAL),
00457           false, LRST_WHITESPACE, NULL, 0, LEX_CTOK_WHITESPACE },
00458 /* '<!--' - '-->' multiline comments */
00459         { LSTSET(LEX_HTMLST_NORMAL),
00460           false, LRST_STRING, "<!--", LEX_HTMLST_COMMENT, LEX_HTMLTOK_COMMENT },
00461         { LSTSET(LEX_HTMLST_COMMENT),
00462           false, LRST_STRING, "-->", LEX_HTMLST_NORMAL, LEX_HTMLTOK_COMMENT },
00463         SL_RULE_ANYCHAR(LSTSET(LEX_HTMLST_COMMENT), LEX_HTMLTOK_COMMENT),
00464 /* '<' - '>' tags */
00465         { LSTSET(LEX_HTMLST_NORMAL),
00466           false, LRST_STRING, "<!", LEX_HTMLST_TAG, LEX_HTMLTOK_TAG},
00467         { LSTSET(LEX_HTMLST_NORMAL),
00468           false, LRST_REGEX, "</[-_A-Za-z0-9]+", LEX_HTMLST_TAG, LEX_HTMLTOK_TAG},
00469         { LSTSET(LEX_HTMLST_NORMAL),
00470           false, LRST_REGEX, "<[-_A-Za-z0-9]+", LEX_HTMLST_TAG, LEX_HTMLTOK_TAG},
00471         { LSTSET(LEX_HTMLST_TAG),
00472           false, LRST_STRING, ">", LEX_HTMLST_NORMAL, LEX_HTMLTOK_TAG},
00473         { LSTSET(LEX_HTMLST_TAG),
00474           false, LRST_STRING, "/>", LEX_HTMLST_NORMAL, LEX_HTMLTOK_TAG},
00475         { LSTSET(LEX_HTMLST_TAG),
00476           false, LRST_WHITESPACE, NULL, 0, LEX_HTMLTOK_TAG },
00477         { LSTSET(LEX_HTMLST_TAG),
00478           false, LRST_REGEX, "[-_A-Za-z0-9]+", 0, LEX_HTMLTOK_ATTRIBUTE },
00479         { LSTSET(LEX_HTMLST_TAG),
00480           false, LRST_CHARSET, "=", 0, LEX_HTMLTOK_SYMBOL },
00481         { LSTSET(LEX_HTMLST_TAG),
00482           false, LRST_DQSTRING, NULL, 0, LEX_HTMLTOK_CDATA },
00483         { LSTSET(LEX_HTMLST_TAG),
00484           false, LRST_QSTRING, NULL, 0, LEX_HTMLTOK_CDATA },
00485         { LSTSET(LEX_HTMLST_NORMAL),
00486           false, LRST_REGEX, "&[#A-Za-z0-9]+?;", 0, LEX_HTMLTOK_ENTITY },
00487         SL_RULE_ANYCHAR(LSTSET(LEX_HTMLST_NORMAL), LEX_HTMLTOK_NORMAL),
00488 
00489         { 0, 0, LRST_EMPTY, false, false, 0 }
00490 };
00491 #else
00492 
00493 /* HTML lexer states */
00494 #define LEX_HTMLST_NORMAL                       1
00495 #define LEX_HTMLST_TAG_OPEN                     2
00496 #define LEX_HTMLST_TAG                          3
00497 #define LEX_HTMLST_COMMENT                      4
00498 #define LEX_HTMLST_CSS                          5
00499 #define LEX_HTMLST_SCRIPT                       6
00500 
00501 /* HTML lexer tokens */
00502 #define LEX_HTMLTOK_ERROR                       1
00503 #define LEX_HTMLTOK_WHITESPACE          2
00504 #define LEX_HTMLTOK_NORMAL                      3
00505 #define LEX_HTMLTOK_COMMENT                     4
00506 #define LEX_HTMLTOK_TAG                         5
00507 #define LEX_HTMLTOK_ATTRIBUTE                   6
00508 #define LEX_HTMLTOK_SYMBOL                      7
00509 #define LEX_HTMLTOK_CDATA                       8
00510 #define LEX_HTMLTOK_ENTITY                      9
00511 
00512 syntax_lexer_rule html_syntax_lexer_rules[] = {
00513 /* whitespaces */
00514         { LSTSET(LEX_HTMLST_NORMAL),
00515           false, LRST_WHITESPACE, NULL, 0, LEX_CTOK_WHITESPACE },
00516 /* '<!--' - '-->' multiline comments */
00517         { LSTSET(LEX_HTMLST_NORMAL),
00518           false, LRST_STRING, "<!--", LEX_HTMLST_COMMENT, LEX_HTMLTOK_COMMENT },
00519         { LSTSET(LEX_HTMLST_COMMENT),
00520           false, LRST_STRING, "-->", LEX_HTMLST_NORMAL, LEX_HTMLTOK_COMMENT },
00521         SL_RULE_ANYCHAR(LSTSET(LEX_HTMLST_COMMENT), LEX_HTMLTOK_COMMENT),
00522 /* '<' - '>' tags */
00523         { LSTSET(LEX_HTMLST_NORMAL),
00524           false, LRST_STRING, "<", LEX_HTMLST_TAG_OPEN, LEX_HTMLTOK_TAG},
00525         { LSTSET(LEX_HTMLST_TAG_OPEN),
00526           false, LRST_STRING, "/", LEX_HTMLST_TAG_OPEN, LEX_HTMLTOK_TAG},
00527         { LSTSET(LEX_HTMLST_TAG_OPEN),
00528           false, LRST_STRING, "!", LEX_HTMLST_TAG, LEX_HTMLTOK_TAG},
00529         { LSTSET(LEX_HTMLST_TAG_OPEN),
00530           false, LRST_REGEX, "[-_A-Za-z0-9]+", LEX_HTMLST_TAG, LEX_HTMLTOK_TAG},
00531         { LSTSET(LEX_HTMLST_TAG),
00532           false, LRST_STRING, ">", LEX_HTMLST_NORMAL, LEX_HTMLTOK_TAG},
00533         { LSTSET(LEX_HTMLST_TAG),
00534           false, LRST_STRING, "/>", LEX_HTMLST_NORMAL, LEX_HTMLTOK_TAG},
00535         { LSTSET(LEX_HTMLST_TAG),
00536           false, LRST_WHITESPACE, NULL, 0, LEX_HTMLTOK_TAG },
00537         { LSTSET(LEX_HTMLST_TAG),
00538           false, LRST_REGEX, "[-_A-Za-z0-9]+", 0, LEX_HTMLTOK_ATTRIBUTE },
00539         { LSTSET(LEX_HTMLST_TAG),
00540           false, LRST_CHARSET, "=", 0, LEX_HTMLTOK_SYMBOL },
00541         { LSTSET(LEX_HTMLST_TAG),
00542           false, LRST_DQSTRING, NULL, 0, LEX_HTMLTOK_CDATA },
00543         { LSTSET(LEX_HTMLST_TAG),
00544           false, LRST_QSTRING, NULL, 0, LEX_HTMLTOK_CDATA },
00545         { LSTSET(LEX_HTMLST_NORMAL),
00546           false, LRST_REGEX, "[^&<]", 0, LEX_HTMLTOK_NORMAL },
00547         { LSTSET(LEX_HTMLST_NORMAL),
00548           false, LRST_REGEX, "&[#A-Za-z0-9]+?;", 0, LEX_HTMLTOK_ENTITY },
00549 //      SL_RULE_ANYCHAR(LSTSET(LEX_HTMLST_NORMAL), LEX_HTMLTOK_NORMAL),
00550 
00551         { 0, 0, LRST_EMPTY, false, false, 0 }
00552 };
00553 #endif
00554 
00555 void ht_html_syntax_lexer::init()
00556 {
00557         ht_lang_syntax_lexer::init(html_syntax_lexer_rules);
00558 /*      html_reserved_sorted=create_sorted_stringtable(html_reserved);
00559         char **table=c_reserved;
00560         
00561         char **x=table;
00562         while (*x) x++;
00563         c_reserved_count=x-table;
00564 
00565         c_pal.data = NULL;
00566         c_pal.size = 0;*/
00567 
00568         config_changed();
00569 }
00570 
00571 void ht_html_syntax_lexer::done()
00572 {
00573 //      if (c_pal.data) free(c_pal.data);
00574 //      if (c_reserved_sorted) free(c_reserved_sorted);
00575         ht_lang_syntax_lexer::done();
00576 }
00577 
00578 void ht_html_syntax_lexer::config_changed()
00579 {
00580         reloadpalette();
00581 }
00582 
00583 vcp ht_html_syntax_lexer::getcolor_syntax(UINT pal_index)
00584 {
00585 //      return getcolorv(&c_pal, pal_index);
00586         return VCP(VC_LIGHT(VC_BLUE), VC_TRANSPARENT);
00587 }
00588 
00589 lexer_token ht_html_syntax_lexer::geterrortoken()
00590 {
00591         return LEX_HTMLTOK_ERROR;
00592 }
00593 
00594 lexer_state ht_html_syntax_lexer::getinitstate()
00595 {
00596         return LEX_HTMLST_NORMAL;
00597 }
00598 
00599 char *ht_html_syntax_lexer::getname()
00600 {
00601         return "HTML";
00602 }
00603 
00604 lexer_token ht_html_syntax_lexer::gettoken(void *buf, UINT buflen, text_pos p, bool start_of_line, lexer_state *ret_state, UINT *ret_len)
00605 {
00606         lexer_token t=ht_lang_syntax_lexer::gettoken(buf, buflen, p, start_of_line, ret_state, ret_len);
00607 /*      if (t==LEX_CTOK_IDENTIFIER) {
00608                 if (match_sorted_stringtable((char*)buf, *ret_len, c_reserved_sorted, c_reserved_count)) {
00609                         t=LEX_CTOK_RIDENTIFIER;
00610                 }
00611         }*/
00612         return t;
00613 }
00614 
00615 vcp ht_html_syntax_lexer::gettoken_color(lexer_token t)
00616 {
00617         switch (t) {
00618                 case LEX_HTMLTOK_ERROR: return VCP(VC_BLACK, VC_RED);
00619                 case LEX_HTMLTOK_WHITESPACE: return VCP(VC_LIGHT(VC_YELLOW), VC_TRANSPARENT);
00620                 case LEX_HTMLTOK_NORMAL: return VCP(VC_LIGHT(VC_YELLOW), VC_TRANSPARENT);
00621                 case LEX_HTMLTOK_COMMENT: return VCP(VC_WHITE, VC_TRANSPARENT);
00622                 case LEX_HTMLTOK_TAG: return VCP(VC_LIGHT(VC_GREEN), VC_TRANSPARENT);
00623                 case LEX_HTMLTOK_ATTRIBUTE: return VCP(VC_GREEN, VC_TRANSPARENT);
00624                 case LEX_HTMLTOK_SYMBOL: return VCP(VC_LIGHT(VC_WHITE), VC_TRANSPARENT);
00625                 case LEX_HTMLTOK_CDATA: return VCP(VC_LIGHT(VC_MAGENTA), VC_TRANSPARENT);
00626                 case LEX_HTMLTOK_ENTITY: return VCP(VC_LIGHT(VC_WHITE), VC_TRANSPARENT);
00627         }
00628         return VCP(VC_BLACK, VC_RED);
00629 }
00630 
00631 void ht_html_syntax_lexer::reloadpalette()
00632 {
00633 /*      if (c_pal.data) {
00634             free(c_pal.data);
00635             c_pal.data = NULL;
00636         }           
00637         load_pal(palclasskey_syntax, palkey_syntax_c_default, &c_pal);*/
00638 }
00639 #endif
00640 
00641 /*
00642  *      sorted stringtable
00643  */
00644 
00645 int qsort_stringlist(const void *e1, const void *e2)
00646 {
00647         return strcmp(*(char **)e1, *(char **)e2);
00648 }
00649         
00650 char **create_sorted_stringtable(char **table)
00651 {
00652         char **x=table;
00653         while (*x) x++;
00654         char **stab=(char **)malloc(sizeof (char*) * (x-table+1));
00655         memmove(stab, table, sizeof (char*) * (x-table+1));
00656         
00657         qsort(stab, x-table, sizeof(char*), qsort_stringlist);
00658         return stab;
00659 }
00660 

Generated on Fri May 7 21:15:44 2004 by doxygen 1.3.5