Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F2916354
lexer.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
33 KB
Referenced Files
None
Subscribers
None
lexer.c
View Options
#include
"silver/lexer.h"
#include
"escape.h"
#include
"internal.h"
#include
"tokens.h"
#include
"utf8.h"
#include
"errors.h"
#include
<runtime.h>
#include
<math.h>
#include
<string.h>
void
sv_lexer_init
(
sv_lexer_t
*
lx
,
ant_t
*
js
,
const
char
*
code
,
jsoff_t
clen
,
bool
strict
)
{
lx
->
js
=
js
;
lx
->
code
=
code
;
lx
->
clen
=
clen
;
lx
->
strict
=
strict
;
lx
->
st
.
pos
=
0
;
lx
->
st
.
toff
=
0
;
lx
->
st
.
tlen
=
0
;
lx
->
st
.
tval
=
js_mkundef
();
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
consumed
=
1
;
lx
->
st
.
had_newline
=
false
;
}
void
sv_lexer_set_error_site
(
sv_lexer_t
*
lx
)
{
ant_t
*
js
=
lx
->
js
;
jsoff_t
off
=
lx
->
st
.
toff
>
0
?
lx
->
st
.
toff
:
lx
->
st
.
pos
;
js_set_error_site
(
js
,
lx
->
code
,
lx
->
clen
,
js
->
filename
,
off
,
lx
->
st
.
tlen
);
}
void
sv_lexer_save_state
(
const
sv_lexer_t
*
lx
,
sv_lexer_state_t
*
st
)
{
*
st
=
lx
->
st
;
}
void
sv_lexer_restore_state
(
sv_lexer_t
*
lx
,
const
sv_lexer_state_t
*
st
)
{
lx
->
st
=
*
st
;
}
void
sv_lexer_push_source
(
sv_lexer_t
*
lx
,
sv_lexer_checkpoint_t
*
cp
,
const
char
*
code
,
jsoff_t
clen
)
{
cp
->
code
=
lx
->
code
;
cp
->
clen
=
lx
->
clen
;
cp
->
strict
=
lx
->
strict
;
cp
->
st
=
lx
->
st
;
lx
->
code
=
code
;
lx
->
clen
=
clen
;
lx
->
st
.
pos
=
0
;
lx
->
st
.
toff
=
0
;
lx
->
st
.
tlen
=
0
;
lx
->
st
.
tval
=
js_mkundef
();
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
consumed
=
1
;
lx
->
st
.
had_newline
=
false
;
}
void
sv_lexer_pop_source
(
sv_lexer_t
*
lx
,
const
sv_lexer_checkpoint_t
*
cp
)
{
lx
->
code
=
cp
->
code
;
lx
->
clen
=
cp
->
clen
;
lx
->
strict
=
cp
->
strict
;
lx
->
st
=
cp
->
st
;
}
sv_lex_string_t
sv_lexer_str_literal
(
sv_lexer_t
*
lx
)
{
ant_t
*
js
=
lx
->
js
;
sv_lex_string_t
outv
=
{
.
str
=
NULL
,
.
len
=
0
,
.
ok
=
false
};
uint8_t
*
in
=
(
uint8_t
*
)
&
lx
->
code
[
lx
->
st
.
toff
];
size_t
n1
=
0
,
n2
=
0
;
size_t
cap
=
(
size_t
)
lx
->
st
.
tlen
;
if
(
cap
==
0
)
{
outv
.
str
=
""
;
outv
.
ok
=
true
;
return
outv
;
}
uint8_t
*
out
=
code_arena_bump
(
cap
);
if
(
!
out
)
{
(
void
)
js_mkerr
(
js
,
"oom"
);
return
outv
;
}
while
(
n2
++
+
2
<
(
size_t
)
lx
->
st
.
tlen
)
{
if
(
in
[
n2
]
==
'\\'
)
{
if
(
lx
->
strict
&&
is_octal_escape
(
in
,
n2
))
{
sv_lexer_set_error_site
(
lx
);
(
void
)
js_mkerr_typed
(
js
,
JS_ERR_SYNTAX
,
"Octal escape sequences are not allowed in strict mode."
);
return
outv
;
}
size_t
extra
=
decode_escape
(
in
,
n2
,
(
size_t
)
lx
->
st
.
tlen
,
out
,
&
n1
,
in
[
0
]);
n2
+=
extra
+
1
;
}
else
{
out
[
n1
++
]
=
((
uint8_t
*
)
lx
->
code
)[
lx
->
st
.
toff
+
n2
];
}
}
outv
.
str
=
(
const
char
*
)
out
;
outv
.
len
=
(
uint32_t
)
n1
;
outv
.
ok
=
true
;
return
outv
;
}
static
int
is_unicode_space
(
const
unsigned
char
*
p
,
jsoff_t
remaining
,
bool
*
is_line_term
)
{
if
(
is_line_term
)
*
is_line_term
=
false
;
if
(
p
[
0
]
<
0x80
)
return
0
;
utf8proc_int32_t
cp
;
utf8proc_ssize_t
n
=
utf8_next
(
p
,
(
utf8proc_ssize_t
)
remaining
,
&
cp
);
if
(
cp
<
0
)
return
0
;
if
(
cp
==
0xFEFF
)
return
(
int
)
n
;
utf8proc_category_t
cat
=
utf8proc_category
(
cp
);
if
(
cat
==
UTF8PROC_CATEGORY_ZS
)
return
(
int
)
n
;
if
(
cat
==
UTF8PROC_CATEGORY_ZL
||
cat
==
UTF8PROC_CATEGORY_ZP
)
{
if
(
is_line_term
)
*
is_line_term
=
true
;
return
(
int
)
n
;
}
return
0
;
}
enum
{
C_0
=
0
,
C_SPC
,
C_NL
,
C_SL
,
C_HI
};
static
const
uint8_t
cc
[
128
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
C_SPC
,
C_NL
,
C_SPC
,
C_SPC
,
C_SPC
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
C_SPC
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
C_SL
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
};
static
jsoff_t
sv_skiptonext
(
const
char
*
code
,
jsoff_t
len
,
jsoff_t
n
,
bool
*
nl
)
{
static
const
void
*
D
[]
=
{
&&
L0
,
&&
LS
,
&&
LN
,
&&
LSL
,
&&
LH
};
bool
saw_nl
=
false
;
unsigned
char
c
;
const
char
*
p
=
code
+
n
;
const
char
*
end
=
code
+
len
;
if
(
__builtin_expect
(
p
==
code
&&
end
-
p
>=
2
&&
p
[
0
]
==
'#'
&&
p
[
1
]
==
'!'
,
0
))
{
for
(
p
+=
2
;
p
<
end
&&
*
p
!=
'\n'
;
p
++
);
if
(
p
<
end
)
{
saw_nl
=
true
;
p
++
;
}
}
if
(
__builtin_expect
(
p
>=
end
,
0
))
goto
L0
;
c
=
(
unsigned
char
)
*
p
;
goto
*
D
[
c
&
0x80
?
C_HI
:
cc
[
c
]];
LS
:
p
++
;
if
(
__builtin_expect
(
p
>=
end
,
0
))
goto
L0
;
c
=
(
unsigned
char
)
*
p
;
goto
*
D
[
c
&
0x80
?
C_HI
:
cc
[
c
]];
LN
:
saw_nl
=
true
;
p
++
;
if
(
__builtin_expect
(
p
>=
end
,
0
))
goto
L0
;
c
=
(
unsigned
char
)
*
p
;
goto
*
D
[
c
&
0x80
?
C_HI
:
cc
[
c
]];
LSL
:
if
(
p
+
1
>=
end
)
goto
L0
;
if
(
p
[
1
]
==
'/'
)
{
for
(
p
+=
2
;
p
<
end
&&
*
p
!=
'\n'
;
p
++
);
if
(
p
<
end
)
{
saw_nl
=
true
;
p
++
;
}
if
(
__builtin_expect
(
p
>=
end
,
0
))
goto
L0
;
c
=
(
unsigned
char
)
*
p
;
goto
*
D
[
c
&
0x80
?
C_HI
:
cc
[
c
]];
}
if
(
p
[
1
]
==
'*'
)
{
for
(
p
+=
2
;
p
+
1
<
end
;
p
++
)
{
if
(
*
p
==
'*'
&&
p
[
1
]
==
'/'
)
{
p
+=
2
;
break
;
}
if
(
*
p
==
'\n'
)
saw_nl
=
true
;
}
if
(
__builtin_expect
(
p
>=
end
,
0
))
goto
L0
;
c
=
(
unsigned
char
)
*
p
;
goto
*
D
[
c
&
0x80
?
C_HI
:
cc
[
c
]];
}
goto
L0
;
LH
:
{
bool
lt
;
int
u
=
is_unicode_space
((
const
unsigned
char
*
)
p
,
(
jsoff_t
)(
end
-
p
),
&
lt
);
if
(
u
>
0
)
{
if
(
lt
)
saw_nl
=
true
;
p
+=
u
;
if
(
__builtin_expect
(
p
>=
end
,
0
))
goto
L0
;
c
=
(
unsigned
char
)
*
p
;
goto
*
D
[
c
&
0x80
?
C_HI
:
cc
[
c
]];
}
}
L0
:
if
(
nl
)
*
nl
=
saw_nl
;
return
(
jsoff_t
)(
p
-
code
);
}
#define K(s, t) if (len == sizeof(s)-1 && !memcmp(buf, s, sizeof(s)-1)) return t
#define M(s) (len == sizeof(s)-1 && !memcmp(buf, s, sizeof(s)-1))
bool
is_eval_or_arguments_name
(
const
char
*
buf
,
size_t
len
)
{
switch
(
buf
[
0
])
{
case
'a'
:
if
M
(
"arguments"
)
return
true
;
break
;
case
'e'
:
if
M
(
"eval"
)
return
true
;
break
;
}
return
false
;
}
bool
is_strict_reserved_name
(
const
char
*
buf
,
size_t
len
)
{
switch
(
buf
[
0
])
{
case
'i'
:
if
M
(
"interface"
)
return
true
;
if
M
(
"implements"
)
return
true
;
break
;
case
'l'
:
if
M
(
"let"
)
return
true
;
break
;
case
'p'
:
if
M
(
"private"
)
return
true
;
if
M
(
"package"
)
return
true
;
if
M
(
"public"
)
return
true
;
if
M
(
"protected"
)
return
true
;
break
;
case
's'
:
if
M
(
"static"
)
return
true
;
break
;
case
'y'
:
if
M
(
"yield"
)
return
true
;
break
;
}
return
false
;
}
static
uint8_t
parsekeyword
(
const
char
*
buf
,
size_t
len
)
{
switch
(
buf
[
0
])
{
case
'a'
:
K
(
"as"
,
TOK_AS
);
K
(
"async"
,
TOK_ASYNC
);
K
(
"await"
,
TOK_AWAIT
);
break
;
case
'b'
:
K
(
"break"
,
TOK_BREAK
);
break
;
case
'c'
:
K
(
"case"
,
TOK_CASE
);
K
(
"catch"
,
TOK_CATCH
);
K
(
"class"
,
TOK_CLASS
);
K
(
"const"
,
TOK_CONST
);
K
(
"continue"
,
TOK_CONTINUE
);
break
;
case
'd'
:
K
(
"do"
,
TOK_DO
);
K
(
"default"
,
TOK_DEFAULT
);
K
(
"delete"
,
TOK_DELETE
);
K
(
"debugger"
,
TOK_DEBUGGER
);
break
;
case
'e'
:
K
(
"else"
,
TOK_ELSE
);
K
(
"export"
,
TOK_EXPORT
);
break
;
case
'f'
:
K
(
"for"
,
TOK_FOR
);
K
(
"from"
,
TOK_FROM
);
K
(
"false"
,
TOK_FALSE
);
K
(
"finally"
,
TOK_FINALLY
);
K
(
"function"
,
TOK_FUNC
);
break
;
case
'g'
:
K
(
"globalThis"
,
TOK_GLOBAL_THIS
);
break
;
case
'i'
:
K
(
"if"
,
TOK_IF
);
K
(
"in"
,
TOK_IN
);
K
(
"import"
,
TOK_IMPORT
);
K
(
"instanceof"
,
TOK_INSTANCEOF
);
break
;
case
'l'
:
K
(
"let"
,
TOK_LET
);
break
;
case
'n'
:
K
(
"new"
,
TOK_NEW
);
K
(
"null"
,
TOK_NULL
);
break
;
case
'o'
:
K
(
"of"
,
TOK_OF
);
break
;
case
'r'
:
K
(
"return"
,
TOK_RETURN
);
break
;
case
's'
:
K
(
"super"
,
TOK_SUPER
);
K
(
"static"
,
TOK_STATIC
);
K
(
"switch"
,
TOK_SWITCH
);
break
;
case
't'
:
K
(
"try"
,
TOK_TRY
);
K
(
"this"
,
TOK_THIS
);
K
(
"true"
,
TOK_TRUE
);
K
(
"throw"
,
TOK_THROW
);
K
(
"typeof"
,
TOK_TYPEOF
);
break
;
case
'u'
:
K
(
"undefined"
,
TOK_UNDEF
);
break
;
case
'v'
:
K
(
"var"
,
TOK_VAR
);
K
(
"void"
,
TOK_VOID
);
break
;
case
'w'
:
K
(
"while"
,
TOK_WHILE
);
K
(
"with"
,
TOK_WITH
);
K
(
"window"
,
TOK_WINDOW
);
break
;
case
'y'
:
K
(
"yield"
,
TOK_YIELD
);
break
;
}
return
TOK_IDENTIFIER
;
}
#undef K
#undef M
#define CHAR_DIGIT 0x01
#define CHAR_XDIGIT 0x02
#define CHAR_ALPHA 0x04
#define CHAR_IDENT 0x08
#define CHAR_IDENT1 0x10
#define CHAR_WS 0x20
#define CHAR_OCTAL 0x40
static
const
uint8_t
char_type
[
256
]
=
{
[
'\t'
]
=
CHAR_WS
,
[
'\n'
]
=
CHAR_WS
,
[
'\r'
]
=
CHAR_WS
,
[
' '
]
=
CHAR_WS
,
[
'0'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
|
CHAR_OCTAL
,
[
'1'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
|
CHAR_OCTAL
,
[
'2'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
|
CHAR_OCTAL
,
[
'3'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
|
CHAR_OCTAL
,
[
'4'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
|
CHAR_OCTAL
,
[
'5'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
|
CHAR_OCTAL
,
[
'6'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
|
CHAR_OCTAL
,
[
'7'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
|
CHAR_OCTAL
,
[
'8'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
,
[
'9'
]
=
CHAR_DIGIT
|
CHAR_XDIGIT
|
CHAR_IDENT
,
[
'A'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'B'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'C'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'D'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'E'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'F'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'a'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'b'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'c'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'd'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'e'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'f'
]
=
CHAR_XDIGIT
|
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'G'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'H'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'I'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'J'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'K'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'L'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'M'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'N'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'O'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'P'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'Q'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'R'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'S'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'T'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'U'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'V'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'W'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'X'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'Y'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'Z'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'g'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'h'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'i'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'j'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'k'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'l'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'm'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'n'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'o'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'p'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'q'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'r'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
's'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
't'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'u'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'v'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'w'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'x'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'y'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'z'
]
=
CHAR_ALPHA
|
CHAR_IDENT
|
CHAR_IDENT1
,
[
'_'
]
=
CHAR_IDENT
|
CHAR_IDENT1
,
[
'$'
]
=
CHAR_IDENT
|
CHAR_IDENT1
,
};
#define IS_DIGIT(c) (char_type[(uint8_t)(c)] & CHAR_DIGIT)
#define IS_XDIGIT(c) (char_type[(uint8_t)(c)] & CHAR_XDIGIT)
#define IS_IDENT(c) (char_type[(uint8_t)(c)] & CHAR_IDENT)
#define IS_IDENT1(c) (char_type[(uint8_t)(c)] & CHAR_IDENT1)
#define IS_OCTAL(c) (char_type[(uint8_t)(c)] & CHAR_OCTAL)
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
static
const
uint8_t
single_char_tok
[
128
]
=
{
[
'('
]
=
TOK_LPAREN
,
[
')'
]
=
TOK_RPAREN
,
[
'{'
]
=
TOK_LBRACE
,
[
'}'
]
=
TOK_RBRACE
,
[
'['
]
=
TOK_LBRACKET
,
[
']'
]
=
TOK_RBRACKET
,
[
';'
]
=
TOK_SEMICOLON
,
[
','
]
=
TOK_COMMA
,
[
':'
]
=
TOK_COLON
,
[
'~'
]
=
TOK_TILDA
,
[
'#'
]
=
TOK_HASH
,
};
bool
is_space
(
int
c
)
{
if
(
c
<
0
||
c
>=
256
)
return
false
;
return
(
char_type
[(
uint8_t
)
c
]
&
CHAR_WS
)
!=
0
;
}
bool
is_digit
(
int
c
)
{
if
(
c
<
0
||
c
>=
256
)
return
false
;
return
(
char_type
[(
uint8_t
)
c
]
&
CHAR_DIGIT
)
!=
0
;
}
static
bool
is_ident_begin
(
int
c
)
{
if
(
c
<
0
)
return
false
;
if
(
c
<
128
)
return
(
char_type
[(
uint8_t
)
c
]
&
CHAR_IDENT1
)
!=
0
;
return
(
c
&
0x80
)
!=
0
;
}
static
bool
is_ident_continue
(
int
c
)
{
if
(
c
<
0
)
return
false
;
if
(
c
<
128
)
return
(
char_type
[(
uint8_t
)
c
]
&
(
CHAR_IDENT
|
CHAR_IDENT1
))
!=
0
;
return
(
c
&
0x80
)
!=
0
;
}
static
int
parse_unicode_escape
(
const
char
*
buf
,
jsoff_t
len
,
jsoff_t
pos
,
uint32_t
*
codepoint
)
{
if
(
pos
+
3
>=
len
)
return
0
;
if
(
buf
[
pos
]
!=
'\\'
||
buf
[
pos
+
1
]
!=
'u'
)
return
0
;
if
(
buf
[
pos
+
2
]
==
'{'
)
{
jsoff_t
i
=
pos
+
3
;
uint32_t
cp
=
0
;
int
ndigits
=
0
;
while
(
i
<
len
&&
is_xdigit
((
unsigned
char
)
buf
[
i
]))
{
cp
=
(
cp
<<
4
)
|
((
unsigned
char
)
buf
[
i
]
<=
'9'
?
buf
[
i
]
-
'0'
:
((
unsigned
char
)
buf
[
i
]
|
0x20
)
-
'a'
+
10
);
i
++
;
ndigits
++
;
}
if
(
ndigits
==
0
||
i
>=
len
||
buf
[
i
]
!=
'}'
||
cp
>
0x10FFFF
)
return
0
;
*
codepoint
=
cp
;
return
(
int
)(
i
-
pos
+
1
);
}
if
(
pos
+
5
>=
len
)
return
0
;
uint32_t
cp
=
0
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
int
c
=
(
unsigned
char
)
buf
[
pos
+
2
+
i
];
if
(
!
is_xdigit
(
c
))
return
0
;
cp
<<=
4
;
cp
|=
(
c
<=
'9'
)
?
(
c
-
'0'
)
:
((
c
|
0x20
)
-
'a'
+
10
);
}
*
codepoint
=
cp
;
return
6
;
}
static
bool
is_unicode_id_start
(
utf8proc_int32_t
cp
)
{
utf8proc_category_t
cat
=
utf8proc_category
(
cp
);
return
cat
==
UTF8PROC_CATEGORY_LU
||
cat
==
UTF8PROC_CATEGORY_LL
||
cat
==
UTF8PROC_CATEGORY_LT
||
cat
==
UTF8PROC_CATEGORY_LM
||
cat
==
UTF8PROC_CATEGORY_LO
||
cat
==
UTF8PROC_CATEGORY_NL
;
}
static
bool
is_unicode_id_continue
(
utf8proc_int32_t
cp
)
{
if
(
is_unicode_id_start
(
cp
))
return
true
;
utf8proc_category_t
cat
=
utf8proc_category
(
cp
);
return
cat
==
UTF8PROC_CATEGORY_MN
||
cat
==
UTF8PROC_CATEGORY_MC
||
cat
==
UTF8PROC_CATEGORY_ND
||
cat
==
UTF8PROC_CATEGORY_PC
||
cp
==
0x200C
||
cp
==
0x200D
;
}
static
bool
is_unicode_ident_begin
(
uint32_t
cp
)
{
if
(
cp
<
128
)
return
(
char_type
[(
uint8_t
)
cp
]
&
CHAR_IDENT1
)
!=
0
;
return
is_unicode_id_start
((
utf8proc_int32_t
)
cp
);
}
static
bool
is_unicode_ident_continue
(
uint32_t
cp
)
{
if
(
cp
<
128
)
return
(
char_type
[(
uint8_t
)
cp
]
&
(
CHAR_IDENT
|
CHAR_IDENT1
))
!=
0
;
return
is_unicode_id_continue
((
utf8proc_int32_t
)
cp
);
}
static
size_t
decode_ident_escapes
(
const
char
*
src
,
size_t
srclen
,
char
*
dst
,
size_t
dstlen
)
{
size_t
si
=
0
,
di
=
0
;
while
(
si
<
srclen
&&
di
+
4
<
dstlen
)
{
uint32_t
cp
;
int
el
=
parse_unicode_escape
(
src
,
(
jsoff_t
)
srclen
,
(
jsoff_t
)
si
,
&
cp
);
if
(
el
>
0
)
{
di
+=
utf8_encode
(
cp
,
dst
+
di
);
si
+=
el
;
}
else
dst
[
di
++
]
=
src
[
si
++
];
}
dst
[
di
]
=
'\0'
;
return
di
;
}
static
uint8_t
parseident
(
const
char
*
buf
,
jsoff_t
len
,
jsoff_t
*
tlen
)
{
if
(
len
==
0
)
return
TOK_ERR
;
unsigned
char
c
=
(
unsigned
char
)
buf
[
0
];
jsoff_t
i
=
0
;
if
(
c
<
128
&&
c
!=
'\\'
&&
is_ident_begin
(
c
))
{
i
=
1
;
while
(
i
<
len
)
{
c
=
(
unsigned
char
)
buf
[
i
];
if
(
c
>=
128
||
c
==
'\\'
)
goto
slow_path_continue
;
if
(
!
is_ident_continue
(
c
))
break
;
i
++
;
}
*
tlen
=
i
;
return
parsekeyword
(
buf
,
i
);
}
if
(
c
==
'\\'
)
{
uint32_t
first_cp
;
int
esc_len
=
parse_unicode_escape
(
buf
,
len
,
0
,
&
first_cp
);
if
(
esc_len
<=
0
||
!
is_unicode_ident_begin
(
first_cp
))
return
TOK_ERR
;
*
tlen
=
esc_len
;
goto
slow_path_loop
;
}
if
(
c
>=
128
)
{
utf8proc_int32_t
cp
;
utf8proc_ssize_t
n
=
utf8_next
((
const
utf8proc_uint8_t
*
)
buf
,
(
utf8proc_ssize_t
)
len
,
&
cp
);
if
(
cp
<
0
||
!
is_unicode_id_start
(
cp
))
return
TOK_ERR
;
i
=
(
jsoff_t
)
n
;
*
tlen
=
i
;
goto
slow_path_loop
;
}
return
TOK_ERR
;
slow_path_continue
:
*
tlen
=
i
;
slow_path_loop
:;
int
has_escapes
=
(
buf
[
0
]
==
'\\'
);
while
(
*
tlen
<
len
)
{
c
=
(
unsigned
char
)
buf
[
*
tlen
];
if
(
c
==
'\\'
)
{
uint32_t
cp
;
int
el
=
parse_unicode_escape
(
buf
,
len
,
*
tlen
,
&
cp
);
if
(
el
<=
0
||
!
is_unicode_ident_continue
(
cp
))
break
;
*
tlen
+=
el
;
has_escapes
=
1
;
}
else
if
(
c
<
128
)
{
if
(
!
is_ident_continue
(
c
))
break
;
(
*
tlen
)
++
;
}
else
{
utf8proc_int32_t
cp
;
utf8proc_ssize_t
n
=
utf8_next
(
(
const
utf8proc_uint8_t
*
)
&
buf
[
*
tlen
],
(
utf8proc_ssize_t
)(
len
-
*
tlen
),
&
cp
);
if
(
cp
<
0
||
!
is_unicode_id_continue
(
cp
))
break
;
*
tlen
+=
(
jsoff_t
)
n
;
}
}
if
(
has_escapes
)
{
char
decoded
[
256
];
size_t
decoded_len
=
decode_ident_escapes
(
buf
,
*
tlen
,
decoded
,
sizeof
(
decoded
));
uint8_t
kw
=
parsekeyword
(
decoded
,
decoded_len
);
if
(
kw
!=
TOK_IDENTIFIER
)
return
TOK_ERR
;
return
TOK_IDENTIFIER
;
}
return
parsekeyword
(
buf
,
*
tlen
);
}
static
inline
jsoff_t
parse_decimal
(
const
char
*
buf
,
jsoff_t
maxlen
,
double
*
out
)
{
uint64_t
int_part
=
0
,
frac_part
=
0
;
int
frac_digits
=
0
;
jsoff_t
i
=
0
;
while
(
i
<
maxlen
&&
(
IS_DIGIT
(
buf
[
i
])
||
buf
[
i
]
==
'_'
))
{
if
(
buf
[
i
]
!=
'_'
)
int_part
=
int_part
*
10
+
(
buf
[
i
]
-
'0'
);
i
++
;
}
if
(
i
<
maxlen
&&
buf
[
i
]
==
'.'
)
{
i
++
;
while
(
i
<
maxlen
&&
(
IS_DIGIT
(
buf
[
i
])
||
buf
[
i
]
==
'_'
))
{
if
(
buf
[
i
]
!=
'_'
)
{
frac_part
=
frac_part
*
10
+
(
buf
[
i
]
-
'0'
);
frac_digits
++
;
}
i
++
;
}
}
static
const
double
neg_pow10
[]
=
{
1e0
,
1e-1
,
1e-2
,
1e-3
,
1e-4
,
1e-5
,
1e-6
,
1e-7
,
1e-8
,
1e-9
,
1e-10
,
1e-11
,
1e-12
,
1e-13
,
1e-14
,
1e-15
,
1e-16
,
1e-17
,
1e-18
,
1e-19
,
1e-20
};
static
const
double
pos_pow10
[]
=
{
1e0
,
1e1
,
1e2
,
1e3
,
1e4
,
1e5
,
1e6
,
1e7
,
1e8
,
1e9
,
1e10
,
1e11
,
1e12
,
1e13
,
1e14
,
1e15
,
1e16
,
1e17
,
1e18
,
1e19
,
1e20
};
double
val
=
(
double
)
int_part
;
if
(
frac_digits
>
0
)
{
val
+=
(
frac_digits
<=
20
)
?
(
double
)
frac_part
*
neg_pow10
[
frac_digits
]
:
(
double
)
frac_part
*
pow
(
10.0
,
-
frac_digits
);
}
if
(
i
<
maxlen
&&
(
buf
[
i
]
==
'e'
||
buf
[
i
]
==
'E'
))
{
i
++
;
int
exp_sign
=
1
,
exp_val
=
0
;
if
(
i
<
maxlen
&&
(
buf
[
i
]
==
'+'
||
buf
[
i
]
==
'-'
))
{
exp_sign
=
(
buf
[
i
]
==
'-'
)
?
-1
:
1
;
i
++
;
}
while
(
i
<
maxlen
&&
(
IS_DIGIT
(
buf
[
i
])
||
buf
[
i
]
==
'_'
))
{
if
(
buf
[
i
]
!=
'_'
)
exp_val
=
exp_val
*
10
+
(
buf
[
i
]
-
'0'
);
i
++
;
}
if
(
exp_val
<=
20
)
{
val
=
(
exp_sign
>
0
)
?
val
*
pos_pow10
[
exp_val
]
:
val
*
neg_pow10
[
exp_val
];
}
else
val
*=
pow
(
10.0
,
exp_sign
*
exp_val
);
}
*
out
=
val
;
return
i
;
}
static
inline
jsoff_t
parse_binary
(
const
char
*
buf
,
jsoff_t
maxlen
,
double
*
out
)
{
double
val
=
0
;
jsoff_t
i
=
2
;
while
(
i
<
maxlen
&&
(
buf
[
i
]
==
'0'
||
buf
[
i
]
==
'1'
||
buf
[
i
]
==
'_'
))
{
if
(
buf
[
i
]
!=
'_'
)
val
=
val
*
2
+
(
buf
[
i
]
-
'0'
);
i
++
;
}
*
out
=
val
;
return
i
;
}
static
inline
jsoff_t
parse_octal
(
const
char
*
buf
,
jsoff_t
maxlen
,
double
*
out
)
{
double
val
=
0
;
jsoff_t
i
=
2
;
while
(
i
<
maxlen
&&
(
IS_OCTAL
(
buf
[
i
])
||
buf
[
i
]
==
'_'
))
{
if
(
buf
[
i
]
!=
'_'
)
val
=
val
*
8
+
(
buf
[
i
]
-
'0'
);
i
++
;
}
*
out
=
val
;
return
i
;
}
static
inline
jsoff_t
parse_legacy_octal
(
const
char
*
buf
,
jsoff_t
maxlen
,
double
*
out
)
{
double
val
=
0
;
jsoff_t
i
=
1
;
while
(
i
<
maxlen
&&
IS_OCTAL
(
buf
[
i
]))
{
val
=
val
*
8
+
(
buf
[
i
]
-
'0'
);
i
++
;
}
*
out
=
val
;
return
i
;
}
static
inline
jsoff_t
parse_hex
(
const
char
*
buf
,
jsoff_t
maxlen
,
double
*
out
)
{
double
val
=
0
;
jsoff_t
i
=
2
;
while
(
i
<
maxlen
&&
(
IS_XDIGIT
(
buf
[
i
])
||
buf
[
i
]
==
'_'
))
{
if
(
buf
[
i
]
!=
'_'
)
{
int
d
=
(
buf
[
i
]
>=
'a'
)
?
(
buf
[
i
]
-
'a'
+
10
)
:
(
buf
[
i
]
>=
'A'
)
?
(
buf
[
i
]
-
'A'
+
10
)
:
(
buf
[
i
]
-
'0'
);
val
=
val
*
16
+
d
;
}
i
++
;
}
*
out
=
val
;
return
i
;
}
static
inline
uint8_t
parse_number
(
sv_lexer_t
*
lx
,
const
char
*
buf
,
jsoff_t
remaining
)
{
double
value
=
0
;
jsoff_t
numlen
=
0
;
if
(
buf
[
0
]
==
'0'
&&
remaining
>
1
)
{
char
c1
=
buf
[
1
]
|
0x20
;
if
(
c1
==
'b'
)
{
numlen
=
parse_binary
(
buf
,
remaining
,
&
value
);
}
else
if
(
c1
==
'o'
)
{
numlen
=
parse_octal
(
buf
,
remaining
,
&
value
);
}
else
if
(
c1
==
'x'
)
{
numlen
=
parse_hex
(
buf
,
remaining
,
&
value
);
}
else
if
(
IS_OCTAL
(
buf
[
1
]))
{
if
(
lx
->
strict
)
{
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
tlen
=
1
;
return
TOK_ERR
;
}
numlen
=
parse_legacy_octal
(
buf
,
remaining
,
&
value
);
}
else
if
(
is_digit
(
buf
[
1
])
&&
lx
->
strict
)
{
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
tlen
=
1
;
return
TOK_ERR
;
}
else
numlen
=
parse_decimal
(
buf
,
remaining
,
&
value
);
}
else
numlen
=
parse_decimal
(
buf
,
remaining
,
&
value
);
lx
->
st
.
tval
=
tov
(
value
);
if
(
numlen
<
remaining
&&
buf
[
numlen
]
==
'n'
)
{
lx
->
st
.
tok
=
TOK_BIGINT
;
lx
->
st
.
tlen
=
numlen
+
1
;
}
else
{
lx
->
st
.
tok
=
TOK_NUMBER
;
lx
->
st
.
tlen
=
numlen
;
}
return
lx
->
st
.
tok
;
}
static
inline
uint8_t
scan_string
(
sv_lexer_t
*
lx
,
const
char
*
buf
,
jsoff_t
rem
,
char
quote
)
{
jsoff_t
i
=
1
;
while
(
i
<
rem
)
{
const
char
*
p
=
buf
+
i
;
jsoff_t
search_len
=
rem
-
i
;
const
char
*
q
=
memchr
(
p
,
quote
,
search_len
);
const
char
*
b
=
memchr
(
p
,
'\\'
,
search_len
);
if
(
q
==
NULL
)
{
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
tlen
=
rem
;
return
TOK_ERR
;
}
if
(
b
==
NULL
||
q
<
b
)
{
i
=
(
jsoff_t
)((
q
-
buf
)
+
1
);
lx
->
st
.
tok
=
TOK_STRING
;
lx
->
st
.
tlen
=
i
;
return
TOK_STRING
;
}
jsoff_t
esc_pos
=
(
jsoff_t
)(
b
-
buf
);
if
(
esc_pos
+
1
>=
rem
)
{
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
tlen
=
rem
;
return
TOK_ERR
;
}
char
esc_char
=
buf
[
esc_pos
+
1
];
jsoff_t
skip
=
2
;
if
(
esc_char
==
'x'
)
{
skip
=
4
;
}
else
if
(
esc_char
==
'u'
)
{
skip
=
(
esc_pos
+
2
<
rem
&&
buf
[
esc_pos
+
2
]
==
'{'
)
?
0
:
6
;
if
(
skip
==
0
)
{
jsoff_t
j
=
esc_pos
+
3
;
while
(
j
<
rem
&&
buf
[
j
]
!=
'}'
)
j
++
;
skip
=
(
j
<
rem
)
?
(
j
-
esc_pos
+
1
)
:
(
rem
-
esc_pos
);
}
}
if
(
esc_pos
+
skip
>
rem
)
{
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
tlen
=
rem
;
return
TOK_ERR
;
}
i
=
esc_pos
+
skip
;
}
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
tlen
=
rem
;
return
TOK_ERR
;
}
static
inline
jsoff_t
skip_string_literal
(
const
char
*
buf
,
jsoff_t
rem
,
jsoff_t
start
,
char
quote
)
{
jsoff_t
i
=
start
+
1
;
while
(
i
<
rem
)
{
if
(
buf
[
i
]
==
'\\'
)
{
i
+=
2
;
continue
;
}
if
(
buf
[
i
]
==
quote
)
{
return
i
+
1
;
}
i
++
;
}
return
rem
;
}
static
inline
jsoff_t
skip_line_comment
(
const
char
*
buf
,
jsoff_t
rem
,
jsoff_t
start
)
{
jsoff_t
i
=
start
+
2
;
while
(
i
<
rem
&&
buf
[
i
]
!=
'\n'
)
i
++
;
return
i
;
}
static
inline
jsoff_t
skip_block_comment
(
const
char
*
buf
,
jsoff_t
rem
,
jsoff_t
start
)
{
jsoff_t
i
=
start
+
2
;
while
(
i
+
1
<
rem
&&
!
(
buf
[
i
]
==
'*'
&&
buf
[
i
+
1
]
==
'/'
))
i
++
;
return
(
i
+
1
<
rem
)
?
(
i
+
2
)
:
rem
;
}
static
inline
bool
is_expr_ws
(
char
c
)
{
return
c
==
' '
||
c
==
'\t'
||
c
==
'\n'
||
c
==
'\r'
||
c
==
'\f'
||
c
==
'\v'
;
}
static
inline
bool
is_ident_ascii_start
(
char
c
)
{
return
(
c
==
'_'
||
c
==
'$'
||
(
c
>=
'a'
&&
c
<=
'z'
)
||
(
c
>=
'A'
&&
c
<=
'Z'
));
}
static
inline
bool
is_ident_ascii_continue
(
char
c
)
{
return
is_ident_ascii_start
(
c
)
||
(
c
>=
'0'
&&
c
<=
'9'
);
}
static
inline
jsoff_t
skip_regex_literal
(
const
char
*
buf
,
jsoff_t
rem
,
jsoff_t
start
)
{
jsoff_t
i
=
start
+
1
;
bool
in_class
=
false
;
while
(
i
<
rem
)
{
char
c
=
buf
[
i
];
if
(
c
==
'\\'
&&
i
+
1
<
rem
)
{
i
+=
2
;
continue
;
}
if
(
c
==
'['
)
in_class
=
true
;
else
if
(
c
==
']'
&&
in_class
)
in_class
=
false
;
else
if
(
c
==
'/'
&&
!
in_class
)
{
i
++
;
break
;
}
i
++
;
}
while
(
i
<
rem
&&
is_ident_ascii_start
(
buf
[
i
]))
i
++
;
return
i
;
}
// todo: modularize
static
inline
bool
regex_allowed_after_ident
(
const
char
*
buf
,
jsoff_t
start
,
jsoff_t
end
)
{
size_t
n
=
(
size_t
)(
end
-
start
);
if
(
n
==
2
&&
!
memcmp
(
buf
+
start
,
"if"
,
2
))
return
true
;
if
(
n
==
2
&&
!
memcmp
(
buf
+
start
,
"in"
,
2
))
return
true
;
if
(
n
==
2
&&
!
memcmp
(
buf
+
start
,
"do"
,
2
))
return
true
;
if
(
n
==
2
&&
!
memcmp
(
buf
+
start
,
"of"
,
2
))
return
true
;
if
(
n
==
3
&&
!
memcmp
(
buf
+
start
,
"for"
,
3
))
return
true
;
if
(
n
==
4
&&
!
memcmp
(
buf
+
start
,
"else"
,
4
))
return
true
;
if
(
n
==
4
&&
!
memcmp
(
buf
+
start
,
"case"
,
4
))
return
true
;
if
(
n
==
5
&&
!
memcmp
(
buf
+
start
,
"throw"
,
5
))
return
true
;
if
(
n
==
6
&&
!
memcmp
(
buf
+
start
,
"return"
,
6
))
return
true
;
if
(
n
==
6
&&
!
memcmp
(
buf
+
start
,
"typeof"
,
6
))
return
true
;
if
(
n
==
6
&&
!
memcmp
(
buf
+
start
,
"delete"
,
6
))
return
true
;
if
(
n
==
4
&&
!
memcmp
(
buf
+
start
,
"void"
,
4
))
return
true
;
if
(
n
==
3
&&
!
memcmp
(
buf
+
start
,
"new"
,
3
))
return
true
;
if
(
n
==
10
&&
!
memcmp
(
buf
+
start
,
"instanceof"
,
10
))
return
true
;
return
false
;
}
static
jsoff_t
skip_template_literal
(
const
char
*
buf
,
jsoff_t
rem
,
jsoff_t
start
)
{
jsoff_t
i
=
start
+
1
;
int
expr_depth
=
0
;
bool
can_start_regex
=
true
;
while
(
i
<
rem
)
{
char
c
=
buf
[
i
];
if
(
c
==
'\\'
)
{
i
+=
2
;
continue
;
}
if
(
expr_depth
==
0
)
{
if
(
c
==
'`'
)
return
i
+
1
;
if
(
c
==
'$'
&&
i
+
1
<
rem
&&
buf
[
i
+
1
]
==
'{'
)
{
expr_depth
=
1
;
can_start_regex
=
true
;
i
+=
2
;
continue
;
}
i
++
;
continue
;
}
if
(
c
==
'\''
||
c
==
'"'
)
{
i
=
skip_string_literal
(
buf
,
rem
,
i
,
c
);
continue
;
}
if
(
c
==
'`'
)
{
jsoff_t
next
=
skip_template_literal
(
buf
,
rem
,
i
);
if
(
next
<=
i
)
return
rem
;
i
=
next
;
can_start_regex
=
false
;
continue
;
}
if
(
c
==
'/'
&&
i
+
1
<
rem
)
{
if
(
buf
[
i
+
1
]
==
'/'
)
{
i
=
skip_line_comment
(
buf
,
rem
,
i
);
continue
;
}
if
(
buf
[
i
+
1
]
==
'*'
)
{
i
=
skip_block_comment
(
buf
,
rem
,
i
);
continue
;
}
if
(
can_start_regex
)
{
i
=
skip_regex_literal
(
buf
,
rem
,
i
);
can_start_regex
=
false
;
continue
;
}
i
++
;
can_start_regex
=
true
;
continue
;
}
if
(
is_expr_ws
(
c
))
{
i
++
;
continue
;
}
if
(
is_ident_ascii_start
(
c
))
{
jsoff_t
id_start
=
i
;
i
++
;
while
(
i
<
rem
&&
is_ident_ascii_continue
(
buf
[
i
]))
i
++
;
can_start_regex
=
regex_allowed_after_ident
(
buf
,
id_start
,
i
);
continue
;
}
if
((
c
>=
'0'
&&
c
<=
'9'
)
||
(
c
==
'.'
&&
i
+
1
<
rem
&&
(
buf
[
i
+
1
]
>=
'0'
&&
buf
[
i
+
1
]
<=
'9'
)))
{
i
++
;
while
(
i
<
rem
)
{
char
d
=
buf
[
i
];
if
((
d
>=
'0'
&&
d
<=
'9'
)
||
d
==
'_'
||
d
==
'.'
)
i
++
;
else
break
;
}
can_start_regex
=
false
;
continue
;
}
if
(
c
==
'{'
)
{
expr_depth
++
;
i
++
;
can_start_regex
=
true
;
continue
;
}
if
(
c
==
'}'
)
{
expr_depth
--
;
i
++
;
can_start_regex
=
false
;
continue
;
}
if
(
c
==
'('
||
c
==
'['
||
c
==
','
||
c
==
';'
||
c
==
':'
||
c
==
'?'
||
c
==
'!'
||
c
==
'~'
||
c
==
'+'
||
c
==
'-'
||
c
==
'*'
||
c
==
'%'
||
c
==
'&'
||
c
==
'|'
||
c
==
'^'
||
c
==
'='
||
c
==
'<'
||
c
==
'>'
)
{
i
++
;
can_start_regex
=
true
;
continue
;
}
if
(
c
==
')'
||
c
==
']'
)
{
i
++
;
can_start_regex
=
false
;
continue
;
}
i
++
;
can_start_regex
=
false
;
}
return
rem
;
}
static
inline
uint8_t
scan_template
(
sv_lexer_t
*
lx
,
const
char
*
buf
,
jsoff_t
rem
)
{
jsoff_t
end
=
skip_template_literal
(
buf
,
rem
,
0
);
if
(
end
<=
1
||
end
>
rem
)
{
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
tlen
=
rem
;
return
TOK_ERR
;
}
lx
->
st
.
tok
=
TOK_TEMPLATE
;
lx
->
st
.
tlen
=
end
;
return
TOK_TEMPLATE
;
}
static
inline
uint8_t
parse_operator
(
sv_lexer_t
*
lx
,
const
char
*
buf
,
jsoff_t
rem
)
{
#define MATCH2(c1,c2) (rem >= 2 && buf[1] == (c2))
#define MATCH3(c1,c2,c3) (rem >= 3 && buf[1] == (c2) && buf[2] == (c3))
#define MATCH4(c1,c2,c3,c4) (rem >= 4 && buf[1]==(c2) && buf[2]==(c3) && buf[3]==(c4))
switch
(
buf
[
0
])
{
case
'?'
:
if
(
MATCH3
(
'?'
,
'?'
,
'='
))
{
lx
->
st
.
tok
=
TOK_NULLISH_ASSIGN
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH2
(
'?'
,
'?'
))
{
lx
->
st
.
tok
=
TOK_NULLISH
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'?'
,
'.'
))
{
lx
->
st
.
tok
=
TOK_OPTIONAL_CHAIN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_Q
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'!'
:
if
(
MATCH3
(
'!'
,
'='
,
'='
))
{
lx
->
st
.
tok
=
TOK_SNE
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH2
(
'!'
,
'='
))
{
lx
->
st
.
tok
=
TOK_NE
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_NOT
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'='
:
if
(
MATCH3
(
'='
,
'='
,
'='
))
{
lx
->
st
.
tok
=
TOK_SEQ
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH2
(
'='
,
'='
))
{
lx
->
st
.
tok
=
TOK_EQ
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'='
,
'>'
))
{
lx
->
st
.
tok
=
TOK_ARROW
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_ASSIGN
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'<'
:
if
(
MATCH3
(
'<'
,
'<'
,
'='
))
{
lx
->
st
.
tok
=
TOK_SHL_ASSIGN
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH2
(
'<'
,
'<'
))
{
lx
->
st
.
tok
=
TOK_SHL
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'<'
,
'='
))
{
lx
->
st
.
tok
=
TOK_LE
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_LT
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'>'
:
if
(
MATCH4
(
'>'
,
'>'
,
'>'
,
'='
))
{
lx
->
st
.
tok
=
TOK_ZSHR_ASSIGN
;
lx
->
st
.
tlen
=
4
;
}
else
if
(
MATCH3
(
'>'
,
'>'
,
'>'
))
{
lx
->
st
.
tok
=
TOK_ZSHR
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH3
(
'>'
,
'>'
,
'='
))
{
lx
->
st
.
tok
=
TOK_SHR_ASSIGN
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH2
(
'>'
,
'>'
))
{
lx
->
st
.
tok
=
TOK_SHR
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'>'
,
'='
))
{
lx
->
st
.
tok
=
TOK_GE
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_GT
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'&'
:
if
(
MATCH3
(
'&'
,
'&'
,
'='
))
{
lx
->
st
.
tok
=
TOK_LAND_ASSIGN
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH2
(
'&'
,
'&'
))
{
lx
->
st
.
tok
=
TOK_LAND
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'&'
,
'='
))
{
lx
->
st
.
tok
=
TOK_AND_ASSIGN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_AND
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'|'
:
if
(
MATCH3
(
'|'
,
'|'
,
'='
))
{
lx
->
st
.
tok
=
TOK_LOR_ASSIGN
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH2
(
'|'
,
'|'
))
{
lx
->
st
.
tok
=
TOK_LOR
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'|'
,
'='
))
{
lx
->
st
.
tok
=
TOK_OR_ASSIGN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_OR
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'+'
:
if
(
MATCH2
(
'+'
,
'+'
))
{
lx
->
st
.
tok
=
TOK_POSTINC
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'+'
,
'='
))
{
lx
->
st
.
tok
=
TOK_PLUS_ASSIGN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_PLUS
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'-'
:
if
(
MATCH2
(
'-'
,
'-'
))
{
lx
->
st
.
tok
=
TOK_POSTDEC
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'-'
,
'='
))
{
lx
->
st
.
tok
=
TOK_MINUS_ASSIGN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_MINUS
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'*'
:
if
(
MATCH3
(
'*'
,
'*'
,
'='
))
{
lx
->
st
.
tok
=
TOK_EXP_ASSIGN
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
MATCH2
(
'*'
,
'*'
))
{
lx
->
st
.
tok
=
TOK_EXP
;
lx
->
st
.
tlen
=
2
;
}
else
if
(
MATCH2
(
'*'
,
'='
))
{
lx
->
st
.
tok
=
TOK_MUL_ASSIGN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_MUL
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'/'
:
if
(
MATCH2
(
'/'
,
'='
))
{
lx
->
st
.
tok
=
TOK_DIV_ASSIGN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_DIV
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'%'
:
if
(
MATCH2
(
'%'
,
'='
))
{
lx
->
st
.
tok
=
TOK_REM_ASSIGN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_REM
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'^'
:
if
(
MATCH2
(
'^'
,
'='
))
{
lx
->
st
.
tok
=
TOK_XOR_ASSIGN
;
lx
->
st
.
tlen
=
2
;
}
else
{
lx
->
st
.
tok
=
TOK_XOR
;
lx
->
st
.
tlen
=
1
;
}
break
;
case
'.'
:
if
(
MATCH3
(
'.'
,
'.'
,
'.'
))
{
lx
->
st
.
tok
=
TOK_REST
;
lx
->
st
.
tlen
=
3
;
}
else
if
(
rem
>
1
&&
IS_DIGIT
(
buf
[
1
]))
{
double
val
;
lx
->
st
.
tlen
=
parse_decimal
(
buf
,
rem
,
&
val
);
lx
->
st
.
tval
=
tov
(
val
);
lx
->
st
.
tok
=
TOK_NUMBER
;
}
else
{
lx
->
st
.
tok
=
TOK_DOT
;
lx
->
st
.
tlen
=
1
;
}
break
;
default
:
return
0
;
}
#undef MATCH2
#undef MATCH3
#undef MATCH4
return
lx
->
st
.
tok
;
}
static
uint8_t
sv_next_raw
(
sv_lexer_t
*
lx
)
{
if
(
likely
(
lx
->
st
.
consumed
==
0
))
return
lx
->
st
.
tok
;
lx
->
st
.
consumed
=
0
;
lx
->
st
.
tok
=
TOK_ERR
;
lx
->
st
.
toff
=
lx
->
st
.
pos
=
sv_skiptonext
(
lx
->
code
,
lx
->
clen
,
lx
->
st
.
pos
,
&
lx
->
st
.
had_newline
);
lx
->
st
.
tlen
=
0
;
if
(
unlikely
(
lx
->
st
.
toff
>=
lx
->
clen
))
{
lx
->
st
.
tok
=
TOK_EOF
;
return
TOK_EOF
;
}
const
char
*
buf
=
lx
->
code
+
lx
->
st
.
toff
;
jsoff_t
rem
=
lx
->
clen
-
lx
->
st
.
toff
;
uint8_t
c
=
(
uint8_t
)
buf
[
0
];
if
(
likely
(
c
<
128
))
{
uint8_t
simple_tok
=
single_char_tok
[
c
];
if
(
simple_tok
!=
0
)
{
lx
->
st
.
tok
=
simple_tok
;
lx
->
st
.
tlen
=
1
;
lx
->
st
.
pos
=
lx
->
st
.
toff
+
1
;
return
simple_tok
;
}
}
if
(
likely
(
IS_IDENT1
(
c
)))
{
lx
->
st
.
tok
=
parseident
(
buf
,
rem
,
&
lx
->
st
.
tlen
);
lx
->
st
.
pos
=
lx
->
st
.
toff
+
lx
->
st
.
tlen
;
return
lx
->
st
.
tok
;
}
if
(
IS_DIGIT
(
c
))
{
parse_number
(
lx
,
buf
,
rem
);
if
(
lx
->
st
.
tlen
==
0
)
lx
->
st
.
tlen
=
1
;
lx
->
st
.
pos
=
lx
->
st
.
toff
+
lx
->
st
.
tlen
;
return
lx
->
st
.
tok
;
}
if
(
c
==
'"'
||
c
==
'\''
)
{
scan_string
(
lx
,
buf
,
rem
,
c
);
if
(
lx
->
st
.
tlen
==
0
)
lx
->
st
.
tlen
=
1
;
lx
->
st
.
pos
=
lx
->
st
.
toff
+
lx
->
st
.
tlen
;
return
lx
->
st
.
tok
;
}
if
(
c
==
'`'
)
{
scan_template
(
lx
,
buf
,
rem
);
if
(
lx
->
st
.
tlen
==
0
)
lx
->
st
.
tlen
=
1
;
lx
->
st
.
pos
=
lx
->
st
.
toff
+
lx
->
st
.
tlen
;
return
lx
->
st
.
tok
;
}
if
(
parse_operator
(
lx
,
buf
,
rem
))
{
if
(
lx
->
st
.
tlen
==
0
)
lx
->
st
.
tlen
=
1
;
lx
->
st
.
pos
=
lx
->
st
.
toff
+
lx
->
st
.
tlen
;
return
lx
->
st
.
tok
;
}
lx
->
st
.
tok
=
parseident
(
buf
,
rem
,
&
lx
->
st
.
tlen
);
if
(
lx
->
st
.
tlen
==
0
)
lx
->
st
.
tlen
=
1
;
lx
->
st
.
pos
=
lx
->
st
.
toff
+
lx
->
st
.
tlen
;
return
lx
->
st
.
tok
;
}
uint8_t
sv_lexer_next
(
sv_lexer_t
*
lx
)
{
if
(
likely
(
lx
->
st
.
consumed
==
0
))
return
lx
->
st
.
tok
;
return
sv_next_raw
(
lx
);
}
uint8_t
sv_lexer_lookahead
(
sv_lexer_t
*
lx
)
{
uint8_t
old
=
lx
->
st
.
tok
,
tok
=
0
;
uint8_t
old_consumed
=
lx
->
st
.
consumed
;
jsoff_t
pos
=
lx
->
st
.
pos
;
jsoff_t
toff
=
lx
->
st
.
toff
;
jsoff_t
tlen
=
lx
->
st
.
tlen
;
bool
had_newline
=
lx
->
st
.
had_newline
;
lx
->
st
.
consumed
=
1
;
tok
=
sv_lexer_next
(
lx
);
lx
->
st
.
pos
=
pos
;
lx
->
st
.
tok
=
old
;
lx
->
st
.
toff
=
toff
;
lx
->
st
.
tlen
=
tlen
;
lx
->
st
.
had_newline
=
had_newline
;
lx
->
st
.
consumed
=
old_consumed
;
return
tok
;
}
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Thu, Mar 26, 4:47 PM (2 d)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
511853
Default Alt Text
lexer.c (33 KB)
Attached To
Mode
rANT Ant
Attached
Detach File
Event Timeline
Log In to Comment