Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F4434981
regex.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
76 KB
Referenced Files
None
Subscribers
None
regex.c
View Options
// TODO: cleanup module, make cleaner
#include
<stdlib.h>
#include
<string.h>
#include
<stdio.h>
#include
"ant.h"
#include
"utf8.h"
#include
"errors.h"
#include
"runtime.h"
#include
"internal.h"
#include
"utils.h"
#include
"escape.h"
#include
"descriptors.h"
#include
"silver/engine.h"
#include
"modules/regex.h"
#include
"modules/symbol.h"
#include
"gc/objects.h"
#include
<pcre2.h>
typedef
struct
{
ant_object_t
*
obj
;
pcre2_code
*
code
;
pcre2_match_data
*
match_data
;
bool
jit_ready
;
}
regex_cache_entry_t
;
enum
{
REGEXP_FLAG_HAS_INDICES
=
1
<<
0
,
REGEXP_FLAG_GLOBAL
=
1
<<
1
,
REGEXP_FLAG_IGNORE_CASE
=
1
<<
2
,
REGEXP_FLAG_MULTILINE
=
1
<<
3
,
REGEXP_FLAG_DOTALL
=
1
<<
4
,
REGEXP_FLAG_UNICODE
=
1
<<
5
,
REGEXP_FLAG_UNICODE_SET
=
1
<<
6
,
REGEXP_FLAG_STICKY
=
1
<<
7
,
};
static
regex_cache_entry_t
*
regex_cache
=
NULL
;
static
ant_value_t
regexp_matchall_iter_proto_val
=
0
;
static
size_t
regex_cache_count
=
0
;
static
size_t
regex_cache_cap
=
0
;
static
inline
uint8_t
regexp_parse_flags_mask
(
const
char
*
fstr
,
ant_offset_t
flen
)
{
uint8_t
mask
=
0
;
for
(
ant_offset_t
k
=
0
;
k
<
flen
;
k
++
)
{
switch
(
fstr
[
k
])
{
case
'd'
:
mask
|=
REGEXP_FLAG_HAS_INDICES
;
break
;
case
'g'
:
mask
|=
REGEXP_FLAG_GLOBAL
;
break
;
case
'i'
:
mask
|=
REGEXP_FLAG_IGNORE_CASE
;
break
;
case
'm'
:
mask
|=
REGEXP_FLAG_MULTILINE
;
break
;
case
's'
:
mask
|=
REGEXP_FLAG_DOTALL
;
break
;
case
'u'
:
mask
|=
REGEXP_FLAG_UNICODE
;
break
;
case
'v'
:
mask
|=
REGEXP_FLAG_UNICODE_SET
;
break
;
case
'y'
:
mask
|=
REGEXP_FLAG_STICKY
;
break
;
default
:
break
;
}}
return
mask
;
}
static
inline
uint8_t
regexp_flags_mask
(
ant_t
*
js
,
ant_value_t
regexp
)
{
ant_offset_t
flags_off
=
lkp
(
js
,
regexp
,
"flags"
,
5
);
if
(
flags_off
==
0
)
return
0
;
ant_value_t
flags_val
=
js_propref_load
(
js
,
flags_off
);
if
(
vtype
(
flags_val
)
!=
T_STR
)
return
0
;
ant_value_t
cached_flags
=
js_get_slot
(
regexp
,
SLOT_REGEXP_FLAGS_STRING
);
ant_value_t
cached
=
js_get_slot
(
regexp
,
SLOT_REGEXP_FLAGS_MASK
);
if
(
flags_val
==
cached_flags
&&
vtype
(
cached
)
==
T_NUM
)
return
(
uint8_t
)
tod
(
cached
);
ant_offset_t
flen
,
foff
=
vstr
(
js
,
flags_val
,
&
flen
);
uint8_t
mask
=
regexp_parse_flags_mask
((
const
char
*
)(
uintptr_t
)
foff
,
flen
);
js_set_slot
(
regexp
,
SLOT_REGEXP_FLAGS_MASK
,
tov
((
double
)
mask
));
js_set_slot
(
regexp
,
SLOT_REGEXP_FLAGS_STRING
,
flags_val
);
return
mask
;
}
static
ant_value_t
regexp_build_named_groups_meta
(
ant_t
*
js
,
pcre2_code
*
code
)
{
uint32_t
namecount
=
0
;
pcre2_pattern_info
(
code
,
PCRE2_INFO_NAMECOUNT
,
&
namecount
);
if
(
namecount
==
0
)
return
js_mkundef
();
uint32_t
nameentrysize
=
0
;
PCRE2_SPTR
nametable
=
NULL
;
pcre2_pattern_info
(
code
,
PCRE2_INFO_NAMEENTRYSIZE
,
&
nameentrysize
);
pcre2_pattern_info
(
code
,
PCRE2_INFO_NAMETABLE
,
(
void
*
)
&
nametable
);
ant_value_t
meta
=
js_mkarr
(
js
);
if
(
is_err
(
meta
))
return
meta
;
PCRE2_SPTR
tabptr
=
nametable
;
for
(
uint32_t
i
=
0
;
i
<
namecount
;
i
++
)
{
int
n
=
(
tabptr
[
0
]
<<
8
)
|
tabptr
[
1
];
const
char
*
name
=
(
const
char
*
)(
tabptr
+
2
);
ant_value_t
name_val
=
js_mkstr
(
js
,
name
,
strlen
(
name
));
if
(
is_err
(
name_val
))
return
name_val
;
js_arr_push
(
js
,
meta
,
name_val
);
js_arr_push
(
js
,
meta
,
tov
((
double
)
n
));
tabptr
+=
nameentrysize
;
}
return
meta
;
}
static
void
update_regexp_statics
(
ant_t
*
js
,
const
char
*
str_ptr
,
PCRE2_SIZE
*
ovector
,
uint32_t
ovcount
)
{
ant_value_t
regexp_ctor
=
js_get
(
js
,
js_glob
(
js
),
"RegExp"
);
if
(
is_err
(
regexp_ctor
)
||
vtype
(
regexp_ctor
)
==
T_UNDEF
)
return
;
ant_value_t
empty
=
js_mkstr
(
js
,
""
,
0
);
for
(
int
i
=
1
;
i
<=
9
;
i
++
)
{
char
key
[
3
]
=
{
'$'
,
(
char
)(
'0'
+
i
),
'\0'
};
ant_value_t
val
=
empty
;
if
((
uint32_t
)
i
<
ovcount
&&
ovector
[
2
*
i
]
!=
PCRE2_UNSET
)
val
=
js_mkstr
(
js
,
str_ptr
+
ovector
[
2
*
i
],
ovector
[
2
*
i
+
1
]
-
ovector
[
2
*
i
]);
if
(
is_err
(
setprop_cstr
(
js
,
regexp_ctor
,
key
,
2
,
val
)))
return
;
}
ant_value_t
match0
=
empty
;
if
(
ovcount
>
0
&&
ovector
[
0
]
!=
PCRE2_UNSET
)
match0
=
js_mkstr
(
js
,
str_ptr
+
ovector
[
0
],
ovector
[
1
]
-
ovector
[
0
]);
if
(
is_err
(
setprop_cstr
(
js
,
regexp_ctor
,
"lastMatch"
,
9
,
match0
)))
return
;
(
void
)
setprop_cstr
(
js
,
regexp_ctor
,
"$&"
,
2
,
match0
);
}
static
inline
bool
is_pcre2_passthrough_escape
(
char
c
)
{
switch
(
c
)
{
case
'd'
:
case
'D'
:
case
'w'
:
case
'W'
:
case
's'
:
case
'S'
:
case
'b'
:
case
'B'
:
case
'n'
:
case
'r'
:
case
't'
:
case
'f'
:
case
'1'
:
case
'2'
:
case
'3'
:
case
'4'
:
case
'5'
:
case
'6'
:
case
'7'
:
case
'8'
:
case
'9'
:
case
'.'
:
case
'*'
:
case
'+'
:
case
'?'
:
case
'('
:
case
')'
:
case
'['
:
case
']'
:
case
'{'
:
case
'}'
:
case
'|'
:
case
'^'
:
case
'$'
:
case
'\\'
:
case
'/'
:
case
'-'
:
return
true
;
default
:
return
false
;
}}
static
inline
bool
is_class_shorthand
(
char
c
)
{
return
c
==
'w'
||
c
==
'W'
||
c
==
'd'
||
c
==
'D'
||
c
==
's'
||
c
==
'S'
;
}
static
size_t
v_close_bracket
(
const
char
*
src
,
size_t
src_len
,
size_t
open
)
{
int
depth
=
0
;
for
(
size_t
i
=
open
;
i
<
src_len
;
i
++
)
{
if
(
src
[
i
]
==
'\\'
&&
i
+
1
<
src_len
)
{
i
++
;
continue
;
}
if
(
src
[
i
]
==
'['
)
depth
++
;
else
if
(
src
[
i
]
==
']'
)
{
if
(
--
depth
==
0
)
return
i
;
}
}
return
src_len
;
}
static
size_t
v_translate_part
(
const
char
*
p
,
size_t
len
,
char
*
out
,
size_t
out_size
)
{
if
(
len
&&
p
[
0
]
==
'['
)
return
js_to_pcre2_pattern
(
p
,
len
,
out
,
out_size
,
false
);
char
tmp
[
1024
];
if
(
len
>=
sizeof
(
tmp
)
-
2
)
return
0
;
tmp
[
0
]
=
'['
;
memcpy
(
tmp
+
1
,
p
,
len
);
tmp
[
len
+
1
]
=
']'
;
return
js_to_pcre2_pattern
(
tmp
,
len
+
2
,
out
,
out_size
,
false
);
}
static
int
v_set_op
(
const
char
*
src
,
size_t
start
,
size_t
end
,
size_t
*
op_pos
)
{
int
depth
=
0
;
for
(
size_t
i
=
start
;
i
<
end
;
)
{
if
(
src
[
i
]
==
'\\'
&&
i
+
1
<
end
)
{
char
n
=
src
[
i
+
1
];
if
((
n
==
'p'
||
n
==
'P'
)
&&
i
+
2
<
end
&&
src
[
i
+
2
]
==
'{'
)
{
i
+=
3
;
while
(
i
<
end
&&
src
[
i
]
!=
'}'
)
i
++
;
if
(
i
<
end
)
i
++
;
continue
;
}
if
((
n
==
'u'
||
n
==
'x'
)
&&
i
+
2
<
end
&&
src
[
i
+
2
]
==
'{'
)
{
i
+=
3
;
while
(
i
<
end
&&
src
[
i
]
!=
'}'
)
i
++
;
if
(
i
<
end
)
i
++
;
continue
;
}
i
+=
2
;
continue
;
}
if
(
src
[
i
]
==
'['
)
{
depth
++
;
i
++
;
continue
;
}
if
(
src
[
i
]
==
']'
)
{
if
(
depth
>
0
)
{
depth
--
;
i
++
;
continue
;
}
break
;
}
if
(
!
depth
&&
i
+
1
<
end
)
{
if
(
src
[
i
]
==
'&'
&&
src
[
i
+
1
]
==
'&'
)
{
*
op_pos
=
i
;
return
1
;
}
if
(
src
[
i
]
==
'-'
&&
src
[
i
+
1
]
==
'-'
)
{
*
op_pos
=
i
;
return
2
;
}
}
i
++
;
}
return
0
;
}
size_t
js_to_pcre2_pattern
(
const
char
*
src
,
size_t
src_len
,
char
*
dst
,
size_t
dst_size
,
bool
v_flag
)
{
size_t
di
=
0
;
int
charclass_depth
=
0
;
#define OUT(ch) do { if (di < dst_size - 1) dst[di++] = (ch); } while(0)
for
(
size_t
si
=
0
;
si
<
src_len
&&
di
<
dst_size
-
1
;
si
++
)
{
if
(
src
[
si
]
==
'['
)
{
if
(
v_flag
&&
charclass_depth
==
0
)
{
size_t
close
=
v_close_bracket
(
src
,
src_len
,
si
);
size_t
op_pos
;
int
op_type
=
v_set_op
(
src
,
si
+
1
,
close
,
&
op_pos
);
if
(
op_type
&&
close
<
src_len
)
{
char
ao
[
1024
],
bo
[
1024
];
size_t
aol
=
v_translate_part
(
&
src
[
si
+
1
],
op_pos
-
si
-
1
,
ao
,
sizeof
(
ao
));
size_t
bol
=
v_translate_part
(
&
src
[
op_pos
+
2
],
close
-
op_pos
-
2
,
bo
,
sizeof
(
bo
));
const
char
*
la
=
op_type
==
1
?
ao
:
bo
,
*
ra
=
op_type
==
1
?
bo
:
ao
;
size_t
ll
=
op_type
==
1
?
aol
:
bol
,
rl
=
op_type
==
1
?
bol
:
aol
;
OUT
(
'('
);
OUT
(
'?'
);
OUT
(
op_type
==
1
?
'='
:
'!'
);
for
(
size_t
k
=
0
;
k
<
ll
;
k
++
)
OUT
(
la
[
k
]);
OUT
(
')'
);
for
(
size_t
k
=
0
;
k
<
rl
;
k
++
)
OUT
(
ra
[
k
]);
si
=
close
;
continue
;
}
}
charclass_depth
++
;
OUT
(
'['
);
continue
;
}
if
(
src
[
si
]
==
']'
&&
charclass_depth
>
0
)
{
charclass_depth
--
;
OUT
(
']'
);
continue
;
}
if
(
charclass_depth
>
0
&&
src
[
si
]
==
'-'
&&
si
>
0
&&
src
[
si
-
1
]
!=
'['
&&
si
+
1
<
src_len
&&
src
[
si
+
1
]
!=
']'
)
{
bool
prev_is_shorthand
=
(
si
>=
2
&&
src
[
si
-
2
]
==
'\\'
&&
is_class_shorthand
(
src
[
si
-
1
]));
bool
next_is_shorthand
=
(
si
+
2
<
src_len
&&
src
[
si
+
1
]
==
'\\'
&&
is_class_shorthand
(
src
[
si
+
2
]));
if
(
prev_is_shorthand
||
next_is_shorthand
)
{
OUT
(
'\\'
);
OUT
(
'-'
);
continue
;
}
OUT
(
'-'
);
continue
;
}
if
(
src
[
si
]
!=
'\\'
||
si
+
1
>=
src_len
)
{
OUT
(
src
[
si
]);
continue
;
}
char
next
=
src
[
si
+
1
];
if
(
next
==
'v'
)
{
OUT
(
'\\'
);
OUT
(
'x'
);
OUT
(
'{'
);
OUT
(
'0'
);
OUT
(
'b'
);
OUT
(
'}'
);
si
++
;
continue
;
}
if
(
next
==
'u'
&&
si
+
2
<
src_len
&&
src
[
si
+
2
]
==
'{'
)
{
size_t
brace_start
=
si
+
3
;
size_t
brace_end
=
brace_start
;
while
(
brace_end
<
src_len
&&
src
[
brace_end
]
!=
'}'
&&
is_xdigit
(
src
[
brace_end
]))
brace_end
++
;
if
(
brace_end
<
src_len
&&
src
[
brace_end
]
==
'}'
&&
brace_end
>
brace_start
)
{
OUT
(
'\\'
);
OUT
(
'x'
);
OUT
(
'{'
);
for
(
size_t
k
=
brace_start
;
k
<
brace_end
;
k
++
)
OUT
(
src
[
k
]);
OUT
(
'}'
);
si
=
brace_end
;
continue
;
}
}
if
(
next
==
'u'
&&
si
+
5
<
src_len
&&
is_xdigit
(
src
[
si
+
2
])
&&
is_xdigit
(
src
[
si
+
3
])
&&
is_xdigit
(
src
[
si
+
4
])
&&
is_xdigit
(
src
[
si
+
5
]))
{
OUT
(
'\\'
);
OUT
(
'x'
);
OUT
(
'{'
);
OUT
(
src
[
si
+
2
]);
OUT
(
src
[
si
+
3
]);
OUT
(
src
[
si
+
4
]);
OUT
(
src
[
si
+
5
]);
OUT
(
'}'
);
si
+=
5
;
continue
;
}
if
(
next
==
'u'
)
{
si
++
;
OUT
(
'u'
);
continue
;
}
if
(
next
==
'x'
&&
si
+
3
<
src_len
&&
is_xdigit
(
src
[
si
+
2
])
&&
is_xdigit
(
src
[
si
+
3
]))
{
OUT
(
'\\'
);
OUT
(
'x'
);
OUT
(
src
[
si
+
2
]);
OUT
(
src
[
si
+
3
]);
si
+=
3
;
continue
;
}
if
(
next
==
'x'
)
{
si
++
;
OUT
(
'x'
);
continue
;
}
if
(
next
==
'0'
&&
(
si
+
2
>=
src_len
||
src
[
si
+
2
]
<
'0'
||
src
[
si
+
2
]
>
'9'
))
{
OUT
(
'\\'
);
OUT
(
'x'
);
OUT
(
'{'
);
OUT
(
'0'
);
OUT
(
'}'
);
si
++
;
continue
;
}
if
(
next
>=
'0'
&&
next
<=
'7'
)
{
unsigned
int
octal
=
next
-
'0'
;
size_t
advance
=
1
;
if
(
si
+
2
<
src_len
&&
src
[
si
+
2
]
>=
'0'
&&
src
[
si
+
2
]
<=
'7'
)
{
octal
=
octal
*
8
+
(
src
[
si
+
2
]
-
'0'
);
advance
=
2
;
if
(
si
+
3
<
src_len
&&
src
[
si
+
3
]
>=
'0'
&&
src
[
si
+
3
]
<=
'7'
&&
octal
*
8
+
(
src
[
si
+
3
]
-
'0'
)
<=
255
)
{
octal
=
octal
*
8
+
(
src
[
si
+
3
]
-
'0'
);
advance
=
3
;
}
}
if
(
advance
>
1
||
next
==
'0'
)
{
char
hex
[
8
];
int
hlen
=
snprintf
(
hex
,
sizeof
(
hex
),
"
\\
x{%02x}"
,
octal
);
for
(
int
k
=
0
;
k
<
hlen
&&
di
<
dst_size
-
1
;
k
++
)
OUT
(
hex
[
k
]);
si
+=
advance
;
continue
;
}
}
if
(
next
==
'c'
&&
si
+
2
<
src_len
&&
((
src
[
si
+
2
]
>=
'A'
&&
src
[
si
+
2
]
<=
'Z'
)
||
(
src
[
si
+
2
]
>=
'a'
&&
src
[
si
+
2
]
<=
'z'
)))
{
OUT
(
'\\'
);
OUT
(
'c'
);
OUT
(
src
[
si
+
2
]);
si
+=
2
;
continue
;
}
if
(
next
==
'c'
)
{
OUT
(
'\\'
);
OUT
(
'\\'
);
OUT
(
'c'
);
si
++
;
continue
;
}
if
((
next
==
'p'
||
next
==
'P'
)
&&
si
+
2
<
src_len
&&
src
[
si
+
2
]
==
'{'
)
{
size_t
brace_start
=
si
+
3
;
size_t
brace_end
=
brace_start
;
while
(
brace_end
<
src_len
&&
src
[
brace_end
]
!=
'}'
)
brace_end
++
;
if
(
brace_end
<
src_len
&&
src
[
brace_end
]
==
'}'
)
{
const
char
*
prop
=
&
src
[
brace_start
];
size_t
prop_len
=
brace_end
-
brace_start
;
static
const
struct
{
const
char
*
name
;
const
char
*
code
;
}
gc_map
[]
=
{
{
"Letter"
,
"L"
},{
"Cased_Letter"
,
"LC"
},{
"Uppercase_Letter"
,
"Lu"
},
{
"Lowercase_Letter"
,
"Ll"
},{
"Titlecase_Letter"
,
"Lt"
},
{
"Modifier_Letter"
,
"Lm"
},{
"Other_Letter"
,
"Lo"
},
{
"Mark"
,
"M"
},{
"Nonspacing_Mark"
,
"Mn"
},{
"Spacing_Mark"
,
"Mc"
},
{
"Enclosing_Mark"
,
"Me"
},
{
"Number"
,
"N"
},{
"Decimal_Number"
,
"Nd"
},{
"Letter_Number"
,
"Nl"
},
{
"Other_Number"
,
"No"
},
{
"Punctuation"
,
"P"
},{
"Connector_Punctuation"
,
"Pc"
},
{
"Dash_Punctuation"
,
"Pd"
},{
"Open_Punctuation"
,
"Ps"
},
{
"Close_Punctuation"
,
"Pe"
},{
"Initial_Punctuation"
,
"Pi"
},
{
"Final_Punctuation"
,
"Pf"
},{
"Other_Punctuation"
,
"Po"
},
{
"Symbol"
,
"S"
},{
"Math_Symbol"
,
"Sm"
},{
"Currency_Symbol"
,
"Sc"
},
{
"Modifier_Symbol"
,
"Sk"
},{
"Other_Symbol"
,
"So"
},
{
"Separator"
,
"Z"
},{
"Space_Separator"
,
"Zs"
},
{
"Line_Separator"
,
"Zl"
},{
"Paragraph_Separator"
,
"Zp"
},
{
"Other"
,
"C"
},{
"Control"
,
"Cc"
},{
"Format"
,
"Cf"
},
{
"Surrogate"
,
"Cs"
},{
"Private_Use"
,
"Co"
},{
"Unassigned"
,
"Cn"
},
};
static
const
struct
{
const
char
*
script
;
const
char
*
range
;
}
u17_scripts
[]
=
{
{
"Sidetic"
,
"
\\
x{10940}-
\\
x{1095F}"
},
{
"Garay"
,
"
\\
x{10D40}-
\\
x{10D8F}"
},
{
"Gurung_Khema"
,
"
\\
x{16100}-
\\
x{1613F}"
},
{
"Kirat_Rai"
,
"
\\
x{16D40}-
\\
x{16D7F}"
},
{
"Ol_Onal"
,
"
\\
x{1E5D0}-
\\
x{1E5FF}"
},
{
"Sunuwar"
,
"
\\
x{11BC0}-
\\
x{11BFF}"
},
{
"Tulu_Tigalari"
,
"
\\
x{11380}-
\\
x{113FF}"
},
};
bool
has_eq
=
(
memchr
(
prop
,
'='
,
prop_len
)
!=
NULL
);
bool
has_colon
=
(
memchr
(
prop
,
':'
,
prop_len
)
!=
NULL
);
if
(
!
has_eq
&&
!
has_colon
&&
next
==
'p'
&&
charclass_depth
==
0
)
{
static
const
struct
{
const
char
*
name
;
const
char
*
exp
;
}
sprops
[]
=
{
{
"Emoji_Keycap_Sequence"
,
"(?:
\\
x{23}
\\
x{fe0f}
\\
x{20e3}|
\\
x{2a}
\\
x{fe0f}
\\
x{20e3}|[
\\
x{30}-
\\
x{39}]
\\
x{fe0f}
\\
x{20e3})"
},
{
"RGI_Emoji"
,
"(?:[
\\
x{1f1e6}-
\\
x{1f1ff}]{2}|(?:
\\
p{Emoji}[
\\
x{1f3fb}-
\\
x{1f3ff}]?
\\
x{200d})+
\\
p{Emoji}[
\\
x{1f3fb}-
\\
x{1f3ff}]?|
\\
p{Emoji}[
\\
x{1f3fb}-
\\
x{1f3ff}]|
\\
p{Emoji}
\\
x{fe0f}?)"
},
};
for
(
size_t
m
=
0
;
m
<
sizeof
(
sprops
)
/
sizeof
(
sprops
[
0
]);
m
++
)
{
if
(
strlen
(
sprops
[
m
].
name
)
==
prop_len
&&
memcmp
(
sprops
[
m
].
name
,
prop
,
prop_len
)
==
0
)
{
for
(
const
char
*
r
=
sprops
[
m
].
exp
;
*
r
&&
di
<
dst_size
-
1
;
r
++
)
OUT
(
*
r
);
si
=
brace_end
;
goto
next_char
;
}
}
}
if
(
has_eq
||
has_colon
)
{
char
sep
=
has_eq
?
'='
:
':'
;
const
char
*
val
=
memchr
(
prop
,
sep
,
prop_len
);
if
(
val
)
{
val
++
;
size_t
val_len
=
prop_len
-
(
size_t
)(
val
-
prop
);
for
(
size_t
m
=
0
;
m
<
sizeof
(
u17_scripts
)
/
sizeof
(
u17_scripts
[
0
]);
m
++
)
{
if
(
strlen
(
u17_scripts
[
m
].
script
)
==
val_len
&&
memcmp
(
u17_scripts
[
m
].
script
,
val
,
val_len
)
==
0
)
{
const
char
*
r
=
u17_scripts
[
m
].
range
;
OUT
(
'['
);
if
(
next
==
'P'
)
OUT
(
'^'
);
for
(;
*
r
;
r
++
)
OUT
(
*
r
);
OUT
(
']'
);
si
=
brace_end
;
goto
next_char
;
}
}
}
}
if
(
!
has_eq
&&
!
has_colon
)
{
static
const
struct
{
const
char
*
name
;
const
char
*
range
;
}
rangeprops
[]
=
{
{
"ASCII"
,
"
\\
x{0}-
\\
x{7f}"
},
{
"Any"
,
"
\\
x{0}-
\\
x{10ffff}"
},
};
for
(
size_t
m
=
0
;
m
<
sizeof
(
rangeprops
)
/
sizeof
(
rangeprops
[
0
]);
m
++
)
{
if
(
strlen
(
rangeprops
[
m
].
name
)
==
prop_len
&&
memcmp
(
rangeprops
[
m
].
name
,
prop
,
prop_len
)
==
0
)
{
if
(
charclass_depth
>
0
)
{
for
(
const
char
*
r
=
rangeprops
[
m
].
range
;
*
r
;
r
++
)
OUT
(
*
r
);
}
else
{
OUT
(
'['
);
if
(
next
==
'P'
)
OUT
(
'^'
);
for
(
const
char
*
r
=
rangeprops
[
m
].
range
;
*
r
;
r
++
)
OUT
(
*
r
);
OUT
(
']'
);
}
si
=
brace_end
;
goto
next_char
;
}
}
}
const
char
*
replacement
=
NULL
;
if
(
!
has_eq
&&
!
has_colon
)
{
for
(
size_t
m
=
0
;
m
<
sizeof
(
gc_map
)
/
sizeof
(
gc_map
[
0
]);
m
++
)
{
if
(
strlen
(
gc_map
[
m
].
name
)
==
prop_len
&&
memcmp
(
gc_map
[
m
].
name
,
prop
,
prop_len
)
==
0
)
{
replacement
=
gc_map
[
m
].
code
;
break
;
}
}
}
static
const
struct
{
const
char
*
prop
;
const
char
*
extra
;
}
u17_props
[]
=
{
{
"Emoji"
,
"
\\
x{1FACD}-
\\
x{1FACE}
\\
x{1FAE9}
\\
x{1FAF9}"
},
};
const
char
*
extra_range
=
NULL
;
if
(
!
has_eq
&&
!
has_colon
&&
!
replacement
)
{
for
(
size_t
m
=
0
;
m
<
sizeof
(
u17_props
)
/
sizeof
(
u17_props
[
0
]);
m
++
)
{
if
(
strlen
(
u17_props
[
m
].
prop
)
==
prop_len
&&
memcmp
(
u17_props
[
m
].
prop
,
prop
,
prop_len
)
==
0
)
{
extra_range
=
u17_props
[
m
].
extra
;
break
;
}
}
}
if
(
extra_range
&&
charclass_depth
==
0
)
{
const
char
*
pfx
=
(
next
==
'p'
)
?
"(?:
\\
p{"
:
"(?:
\\
P{"
;
for
(
const
char
*
r
=
pfx
;
*
r
;
r
++
)
OUT
(
*
r
);
for
(
size_t
k
=
brace_start
;
k
<
brace_end
;
k
++
)
OUT
(
src
[
k
]);
OUT
(
'}'
);
OUT
(
'|'
);
OUT
(
'['
);
if
(
next
==
'P'
)
OUT
(
'^'
);
for
(
const
char
*
r
=
extra_range
;
*
r
;
r
++
)
OUT
(
*
r
);
OUT
(
']'
);
OUT
(
')'
);
}
else
{
OUT
(
'\\'
);
OUT
(
next
);
OUT
(
'{'
);
if
(
replacement
)
{
for
(
const
char
*
r
=
replacement
;
*
r
;
r
++
)
OUT
(
*
r
);
}
else
{
for
(
size_t
k
=
brace_start
;
k
<
brace_end
;
k
++
)
OUT
(
src
[
k
]);
}
OUT
(
'}'
);
}
si
=
brace_end
;
continue
;
}
OUT
(
'\\'
);
OUT
(
next
);
si
++
;
continue
;
}
if
(
is_pcre2_passthrough_escape
(
next
))
{
OUT
(
'\\'
);
OUT
(
next
);
si
++
;
continue
;
}
si
++
;
OUT
(
next
);
next_char
:;
}
#undef OUT
dst
[
di
]
=
'\0'
;
return
di
;
}
#define REGEXP_SET_PROP(js, obj, key, klen, val, is_new) \
((is_new) ? js_mkprop_fast(js, obj, key, klen, val) \
: js_setprop(js, obj, js_mkstr(js, key, klen), val))
static
void
regexp_init_flags
(
ant_t
*
js
,
ant_value_t
obj
,
const
char
*
fstr
,
ant_offset_t
flen
,
bool
is_new
)
{
uint8_t
mask
=
regexp_parse_flags_mask
(
fstr
,
flen
);
bool
d
=
(
mask
&
REGEXP_FLAG_HAS_INDICES
)
!=
0
;
bool
g
=
(
mask
&
REGEXP_FLAG_GLOBAL
)
!=
0
;
bool
i
=
(
mask
&
REGEXP_FLAG_IGNORE_CASE
)
!=
0
;
bool
m
=
(
mask
&
REGEXP_FLAG_MULTILINE
)
!=
0
;
bool
s
=
(
mask
&
REGEXP_FLAG_DOTALL
)
!=
0
;
bool
u
=
(
mask
&
REGEXP_FLAG_UNICODE
)
!=
0
;
bool
v
=
(
mask
&
REGEXP_FLAG_UNICODE_SET
)
!=
0
;
bool
y
=
(
mask
&
REGEXP_FLAG_STICKY
)
!=
0
;
char
sorted
[
10
];
int
si
=
0
;
if
(
d
)
sorted
[
si
++
]
=
'd'
;
if
(
g
)
sorted
[
si
++
]
=
'g'
;
if
(
i
)
sorted
[
si
++
]
=
'i'
;
if
(
m
)
sorted
[
si
++
]
=
'm'
;
if
(
s
)
sorted
[
si
++
]
=
's'
;
if
(
u
)
sorted
[
si
++
]
=
'u'
;
if
(
v
)
sorted
[
si
++
]
=
'v'
;
if
(
y
)
sorted
[
si
++
]
=
'y'
;
ant_value_t
flags_value
=
js_mkstr
(
js
,
sorted
,
si
);
REGEXP_SET_PROP
(
js
,
obj
,
"flags"
,
5
,
flags_value
,
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"hasIndices"
,
10
,
mkval
(
T_BOOL
,
d
?
1
:
0
),
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"global"
,
6
,
mkval
(
T_BOOL
,
g
?
1
:
0
),
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"ignoreCase"
,
10
,
mkval
(
T_BOOL
,
i
?
1
:
0
),
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"multiline"
,
9
,
mkval
(
T_BOOL
,
m
?
1
:
0
),
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"dotAll"
,
6
,
mkval
(
T_BOOL
,
s
?
1
:
0
),
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"unicode"
,
7
,
mkval
(
T_BOOL
,
u
?
1
:
0
),
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"unicodeSets"
,
11
,
mkval
(
T_BOOL
,
v
?
1
:
0
),
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"sticky"
,
6
,
mkval
(
T_BOOL
,
y
?
1
:
0
),
is_new
);
REGEXP_SET_PROP
(
js
,
obj
,
"lastIndex"
,
9
,
tov
(
0
),
is_new
);
js_set_slot
(
obj
,
SLOT_REGEXP_FLAGS_MASK
,
tov
((
double
)
mask
));
js_set_slot
(
obj
,
SLOT_REGEXP_FLAGS_STRING
,
flags_value
);
js_set_slot
(
obj
,
SLOT_REGEXP_NAMED_GROUPS
,
js_mkundef
());
}
ant_value_t
is_regexp_like
(
ant_t
*
js
,
ant_value_t
value
)
{
if
(
!
is_object_type
(
value
))
return
js_false
;
ant_value_t
match_sym
=
get_match_sym
();
if
(
vtype
(
match_sym
)
==
T_SYMBOL
)
{
ant_value_t
match_val
=
js_get_sym
(
js
,
value
,
match_sym
);
if
(
is_err
(
match_val
))
return
match_val
;
if
(
vtype
(
match_val
)
!=
T_UNDEF
)
return
js_bool
(
js_truthy
(
js
,
match_val
));
}
ant_value_t
regexp_ctor
=
js_get
(
js
,
js_glob
(
js
),
"RegExp"
);
if
(
is_err
(
regexp_ctor
))
return
regexp_ctor
;
ant_value_t
regexp_proto
=
js_get
(
js
,
regexp_ctor
,
"prototype"
);
if
(
is_err
(
regexp_proto
))
return
regexp_proto
;
if
(
!
is_object_type
(
regexp_proto
))
return
js_false
;
return
js_bool
(
proto_chain_contains
(
js
,
value
,
regexp_proto
));
}
static
ant_value_t
should_regexp_passthrough
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
if
(
vtype
(
js
->
new_target
)
!=
T_UNDEF
)
return
js_false
;
if
(
nargs
<=
0
)
return
js_false
;
if
(
nargs
>=
2
&&
vtype
(
args
[
1
])
!=
T_UNDEF
)
return
js_false
;
if
(
!
is_object_type
(
args
[
0
]))
return
js_false
;
ant_value_t
is_re
=
is_regexp_like
(
js
,
args
[
0
]);
if
(
is_err
(
is_re
))
return
is_re
;
if
(
!
js_truthy
(
js
,
is_re
))
return
js_false
;
ant_value_t
ctor
=
js_getprop_fallback
(
js
,
args
[
0
],
"constructor"
);
if
(
is_err
(
ctor
))
return
ctor
;
ant_value_t
regexp_ctor
=
js_get
(
js
,
js_glob
(
js
),
"RegExp"
);
if
(
is_err
(
regexp_ctor
))
return
regexp_ctor
;
return
js_bool
(
same_ctor_identity
(
js
,
ctor
,
regexp_ctor
));
}
ant_value_t
reject_regexp_arg
(
ant_t
*
js
,
ant_value_t
value
,
const
char
*
method_name
)
{
ant_value_t
is_re
=
is_regexp_like
(
js
,
value
);
if
(
is_err
(
is_re
))
return
is_re
;
if
(
js_truthy
(
js
,
is_re
))
{
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"First argument to %s must not be a RegExp"
,
method_name
);
}
return
js_mkundef
();
}
static
ant_value_t
regexp_species_construct
(
ant_t
*
js
,
ant_value_t
rx
,
ant_value_t
ctor
,
ant_value_t
*
ctor_args
,
int
nargs
)
{
ant_value_t
seed
=
js_mkobj
(
js
);
if
(
is_err
(
seed
))
return
seed
;
ant_value_t
proto
=
js_get
(
js
,
ctor
,
"prototype"
);
if
(
is_err
(
proto
))
return
proto
;
if
(
is_object_type
(
proto
))
js_set_proto_init
(
seed
,
proto
);
ant_value_t
saved
=
js
->
new_target
;
js
->
new_target
=
ctor
;
ant_value_t
result
=
sv_vm_call
(
js
->
vm
,
js
,
ctor
,
seed
,
ctor_args
,
nargs
,
NULL
,
true
);
js
->
new_target
=
saved
;
if
(
is_err
(
result
))
return
result
;
if
(
!
is_object_type
(
result
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp species constructor returned non-object"
);
return
result
;
}
static
ant_value_t
regexp_exec_abstract
(
ant_t
*
js
,
ant_value_t
rx
,
ant_value_t
str
);
static
ant_value_t
builtin_regexp_exec
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
);
static
regex_cache_entry_t
*
regex_cache_lookup
(
ant_object_t
*
obj
)
{
for
(
size_t
i
=
0
;
i
<
regex_cache_count
;
i
++
)
{
if
(
regex_cache
[
i
].
obj
==
obj
)
return
&
regex_cache
[
i
];
}
return
NULL
;
}
static
regex_cache_entry_t
*
regex_cache_insert
(
ant_object_t
*
obj
,
pcre2_code
*
code
,
pcre2_match_data
*
match_data
,
bool
jit_ready
)
{
if
(
regex_cache_count
>=
regex_cache_cap
)
{
size_t
new_cap
=
regex_cache_cap
?
regex_cache_cap
*
2
:
64
;
regex_cache_entry_t
*
new_cache
=
realloc
(
regex_cache
,
new_cap
*
sizeof
(
regex_cache_entry_t
));
if
(
!
new_cache
)
return
NULL
;
regex_cache
=
new_cache
;
regex_cache_cap
=
new_cap
;
}
regex_cache_entry_t
*
entry
=
&
regex_cache
[
regex_cache_count
++
];
entry
->
obj
=
obj
;
entry
->
code
=
code
;
entry
->
match_data
=
match_data
;
entry
->
jit_ready
=
jit_ready
;
return
entry
;
}
typedef
struct
{
pcre2_code
*
code
;
pcre2_match_data
*
match_data
;
bool
jit_ready
;
}
compiled_regex_t
;
static
bool
regex_get_or_compile
(
ant_t
*
js
,
ant_value_t
regexp_obj
,
compiled_regex_t
*
out
)
{
ant_object_t
*
obj_ptr
=
js_obj_ptr
(
regexp_obj
);
uint8_t
flags_mask
=
regexp_flags_mask
(
js
,
regexp_obj
);
regex_cache_entry_t
*
cached
=
regex_cache_lookup
(
obj_ptr
);
if
(
cached
)
{
out
->
code
=
cached
->
code
;
out
->
match_data
=
cached
->
match_data
;
out
->
jit_ready
=
cached
->
jit_ready
;
return
true
;
}
ant_offset_t
source_off
=
lkp
(
js
,
regexp_obj
,
"source"
,
6
);
if
(
source_off
==
0
)
return
false
;
ant_value_t
source_val
=
js_propref_load
(
js
,
source_off
);
if
(
vtype
(
source_val
)
!=
T_STR
)
return
false
;
ant_offset_t
plen
,
poff
=
vstr
(
js
,
source_val
,
&
plen
);
const
char
*
pattern_ptr
=
(
char
*
)(
uintptr_t
)(
poff
);
char
pcre2_pattern
[
4096
];
size_t
pcre2_len
=
js_to_pcre2_pattern
(
pattern_ptr
,
plen
,
pcre2_pattern
,
sizeof
(
pcre2_pattern
),
(
flags_mask
&
REGEXP_FLAG_UNICODE_SET
)
!=
0
);
uint32_t
options
=
PCRE2_UTF
|
PCRE2_UCP
|
PCRE2_MATCH_UNSET_BACKREF
|
PCRE2_DUPNAMES
;
if
(
flags_mask
&
REGEXP_FLAG_IGNORE_CASE
)
options
|=
PCRE2_CASELESS
;
if
(
flags_mask
&
REGEXP_FLAG_MULTILINE
)
options
|=
PCRE2_MULTILINE
;
if
(
flags_mask
&
REGEXP_FLAG_DOTALL
)
options
|=
PCRE2_DOTALL
;
int
errcode
;
PCRE2_SIZE
erroffset
;
pcre2_code
*
re
=
pcre2_compile
((
PCRE2_SPTR
)
pcre2_pattern
,
pcre2_len
,
options
,
&
errcode
,
&
erroffset
,
NULL
);
if
(
re
==
NULL
)
return
false
;
pcre2_match_data
*
match_data
=
pcre2_match_data_create_from_pattern
(
re
,
NULL
);
bool
jit_ready
=
pcre2_jit_compile
(
re
,
PCRE2_JIT_COMPLETE
)
==
0
;
regex_cache_insert
(
obj_ptr
,
re
,
match_data
,
jit_ready
);
ant_value_t
groups_meta
=
regexp_build_named_groups_meta
(
js
,
re
);
if
(
is_err
(
groups_meta
))
{
pcre2_match_data_free
(
match_data
);
pcre2_code_free
(
re
);
regex_cache_count
--
;
return
false
;
}
js_set_slot
(
regexp_obj
,
SLOT_REGEXP_NAMED_GROUPS
,
groups_meta
);
out
->
code
=
re
;
out
->
match_data
=
match_data
;
out
->
jit_ready
=
jit_ready
;
return
true
;
}
static
ant_value_t
builtin_RegExp
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
bool
pattern_is_regexp
=
false
;
if
(
nargs
>
0
)
{
ant_value_t
is_re
=
is_regexp_like
(
js
,
args
[
0
]);
if
(
is_err
(
is_re
))
return
is_re
;
pattern_is_regexp
=
js_truthy
(
js
,
is_re
);
}
if
(
vtype
(
js
->
new_target
)
==
T_UNDEF
&&
nargs
>
0
&&
pattern_is_regexp
)
{
if
(
nargs
<
2
||
vtype
(
args
[
1
])
==
T_UNDEF
)
{
ant_value_t
ctor
=
js_getprop_fallback
(
js
,
args
[
0
],
"constructor"
);
if
(
is_err
(
ctor
))
return
ctor
;
ant_value_t
regexp_ctor
=
js_get
(
js
,
js_glob
(
js
),
"RegExp"
);
if
(
is_err
(
regexp_ctor
))
return
regexp_ctor
;
if
(
same_ctor_identity
(
js
,
ctor
,
regexp_ctor
))
return
args
[
0
];
}
}
ant_value_t
regexp_obj
=
js
->
this_val
;
bool
use_this
=
(
vtype
(
js
->
new_target
)
!=
T_UNDEF
&&
vtype
(
regexp_obj
)
==
T_OBJ
);
if
(
!
use_this
)
{
regexp_obj
=
mkobj
(
js
,
0
);
if
(
is_err
(
regexp_obj
))
return
regexp_obj
;
}
ant_value_t
regexp_proto
=
js_get_ctor_proto
(
js
,
"RegExp"
,
6
);
ant_value_t
instance_proto
=
js_instance_proto_from_new_target
(
js
,
regexp_proto
);
if
(
is_object_type
(
instance_proto
))
js_set_proto_init
(
regexp_obj
,
instance_proto
);
if
(
vtype
(
js
->
new_target
)
==
T_FUNC
||
vtype
(
js
->
new_target
)
==
T_CFUNC
)
{
js_set_slot
(
regexp_obj
,
SLOT_CTOR
,
js
->
new_target
);
}
ant_value_t
pattern
=
js_mkstr
(
js
,
""
,
0
);
ant_value_t
flags
=
js_mkstr
(
js
,
""
,
0
);
if
(
nargs
>
0
)
{
if
(
pattern_is_regexp
)
{
ant_value_t
src
=
js_getprop_fallback
(
js
,
args
[
0
],
"source"
);
if
(
is_err
(
src
))
return
src
;
pattern
=
js_tostring_val
(
js
,
src
);
if
(
is_err
(
pattern
))
return
pattern
;
if
(
nargs
>=
2
&&
vtype
(
args
[
1
])
!=
T_UNDEF
)
{
flags
=
js_tostring_val
(
js
,
args
[
1
]);
}
else
{
ant_value_t
fl
=
js_getprop_fallback
(
js
,
args
[
0
],
"flags"
);
if
(
is_err
(
fl
))
return
fl
;
flags
=
js_tostring_val
(
js
,
fl
);
}
if
(
is_err
(
flags
))
return
flags
;
}
else
if
(
vtype
(
args
[
0
])
==
T_STR
)
{
pattern
=
args
[
0
];
if
(
nargs
>
1
&&
vtype
(
args
[
1
])
==
T_STR
)
flags
=
args
[
1
];
}
else
if
(
vtype
(
args
[
0
])
!=
T_UNDEF
)
{
ant_value_t
s
=
js_tostring_val
(
js
,
args
[
0
]);
if
(
is_err
(
s
))
return
s
;
pattern
=
s
;
if
(
nargs
>
1
&&
vtype
(
args
[
1
])
==
T_STR
)
flags
=
args
[
1
];
}
}
js_mkprop_fast
(
js
,
regexp_obj
,
"source"
,
6
,
pattern
);
ant_offset_t
flags_len
,
flags_off
=
vstr
(
js
,
flags
,
&
flags_len
);
regexp_init_flags
(
js
,
regexp_obj
,
(
const
char
*
)(
uintptr_t
)(
flags_off
),
flags_len
,
true
);
return
regexp_obj
;
}
static
ant_value_t
builtin_regexp_groups_getter
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
result_arr
=
js
->
this_val
;
if
(
!
is_object_type
(
result_arr
))
return
js_mkundef
();
ant_value_t
cached
=
js_get_slot
(
result_arr
,
SLOT_REGEXP_GROUPS_CACHE
);
if
(
is_object_type
(
cached
))
return
cached
;
ant_value_t
meta
=
js_get_slot
(
result_arr
,
SLOT_REGEXP_RESULT_GROUPS
);
if
(
!
is_object_type
(
meta
))
return
js_mkundef
();
ant_value_t
groups
=
js_mkobj
(
js
);
if
(
is_err
(
groups
))
return
groups
;
js_set_proto_init
(
groups
,
js_mknull
());
for
(
ant_offset_t
i
=
0
;
;
i
+=
2
)
{
ant_value_t
name
=
js_arr_get
(
js
,
meta
,
i
);
if
(
vtype
(
name
)
==
T_UNDEF
)
break
;
ant_value_t
index_val
=
js_arr_get
(
js
,
meta
,
i
+
1
);
ant_offset_t
index
=
(
vtype
(
index_val
)
==
T_NUM
)
?
(
ant_offset_t
)
tod
(
index_val
)
:
0
;
char
idxstr
[
16
];
(
void
)
uint_to_str
(
idxstr
,
sizeof
(
idxstr
),
(
uint64_t
)
index
);
ant_value_t
value
=
js_getprop_fallback
(
js
,
result_arr
,
idxstr
);
ant_offset_t
name_len
,
name_off
=
vstr
(
js
,
name
,
&
name_len
);
ant_value_t
status
=
setprop_cstr
(
js
,
groups
,
(
const
char
*
)(
uintptr_t
)
name_off
,
(
size_t
)
name_len
,
value
);
if
(
is_err
(
status
))
return
status
;
}
js_set_slot
(
result_arr
,
SLOT_REGEXP_GROUPS_CACHE
,
groups
);
return
groups
;
}
static
ant_value_t
regexp_build_indices_pair
(
ant_t
*
js
,
PCRE2_SIZE
start
,
PCRE2_SIZE
end
)
{
if
(
start
==
PCRE2_UNSET
)
return
js_mkundef
();
ant_value_t
pair
=
js_mkarr
(
js
);
if
(
is_err
(
pair
))
return
pair
;
js_arr_push
(
js
,
pair
,
tov
((
double
)
start
));
js_arr_push
(
js
,
pair
,
tov
((
double
)
end
));
return
pair
;
}
static
ant_value_t
regexp_build_indices_groups
(
ant_t
*
js
,
ant_value_t
groups_meta
,
ant_value_t
indices_arr
)
{
ant_value_t
groups
=
js_mkobj
(
js
);
if
(
is_err
(
groups
))
return
groups
;
js_set_proto_init
(
groups
,
js_mknull
());
for
(
ant_offset_t
i
=
0
;
;
i
+=
2
)
{
ant_value_t
name
=
js_arr_get
(
js
,
groups_meta
,
i
);
if
(
vtype
(
name
)
==
T_UNDEF
)
break
;
ant_value_t
index_val
=
js_arr_get
(
js
,
groups_meta
,
i
+
1
);
ant_offset_t
index
=
(
vtype
(
index_val
)
==
T_NUM
)
?
(
ant_offset_t
)
tod
(
index_val
)
:
0
;
char
idxstr
[
16
];
(
void
)
uint_to_str
(
idxstr
,
sizeof
(
idxstr
),
(
uint64_t
)
index
);
ant_value_t
value
=
js_getprop_fallback
(
js
,
indices_arr
,
idxstr
);
ant_offset_t
name_len
,
name_off
=
vstr
(
js
,
name
,
&
name_len
);
ant_value_t
status
=
setprop_cstr
(
js
,
groups
,
(
const
char
*
)(
uintptr_t
)
name_off
,
(
size_t
)
name_len
,
value
);
if
(
is_err
(
status
))
return
status
;
}
return
groups
;
}
static
ant_value_t
regexp_build_indices_result
(
ant_t
*
js
,
ant_value_t
regexp
,
PCRE2_SIZE
*
ovector
,
uint32_t
ovcount
)
{
ant_value_t
indices_arr
=
js_mkarr
(
js
);
if
(
is_err
(
indices_arr
))
return
indices_arr
;
for
(
uint32_t
i
=
0
;
i
<
ovcount
&&
i
<
32
;
i
++
)
{
ant_value_t
pair
=
regexp_build_indices_pair
(
js
,
ovector
[
2
*
i
],
ovector
[
2
*
i
+
1
]);
if
(
is_err
(
pair
))
return
pair
;
js_arr_push
(
js
,
indices_arr
,
pair
);
}
ant_value_t
groups_meta
=
js_get_slot
(
regexp
,
SLOT_REGEXP_NAMED_GROUPS
);
if
(
is_object_type
(
groups_meta
))
{
ant_value_t
groups
=
regexp_build_indices_groups
(
js
,
groups_meta
,
indices_arr
);
if
(
is_err
(
groups
))
return
groups
;
if
(
is_err
(
setprop_cstr
(
js
,
indices_arr
,
"groups"
,
6
,
groups
)))
return
js_mkerr
(
js
,
"oom"
);
}
else
if
(
is_err
(
setprop_cstr
(
js
,
indices_arr
,
"groups"
,
6
,
js_mkundef
())))
return
js_mkerr
(
js
,
"oom"
);
return
indices_arr
;
}
static
ant_value_t
regexp_exec_internal
(
ant_t
*
js
,
ant_value_t
regexp
,
ant_value_t
str_arg
,
bool
truthy_only
)
{
ant_offset_t
str_len
,
str_off
=
vstr
(
js
,
str_arg
,
&
str_len
);
const
char
*
str_ptr
=
(
char
*
)(
uintptr_t
)(
str_off
);
uint8_t
flags_mask
=
regexp_flags_mask
(
js
,
regexp
);
bool
global_flag
=
(
flags_mask
&
REGEXP_FLAG_GLOBAL
)
!=
0
;
bool
has_indices
=
(
flags_mask
&
REGEXP_FLAG_HAS_INDICES
)
!=
0
;
bool
sticky_flag
=
(
flags_mask
&
REGEXP_FLAG_STICKY
)
!=
0
;
// TODO: reduce nesting
PCRE2_SIZE
start_offset
=
0
;
if
(
global_flag
||
sticky_flag
)
{
ant_offset_t
lastindex_off
=
lkp
(
js
,
regexp
,
"lastIndex"
,
9
);
if
(
lastindex_off
!=
0
)
{
ant_value_t
li_val
=
js_propref_load
(
js
,
lastindex_off
);
if
(
vtype
(
li_val
)
==
T_NUM
)
{
double
li
=
tod
(
li_val
);
if
(
li
>=
0
&&
li
<=
(
double
)
str_len
)
start_offset
=
(
PCRE2_SIZE
)
li
;
else
{
if
(
is_err
(
setprop_cstr
(
js
,
regexp
,
"lastIndex"
,
9
,
tov
(
0
))))
return
js_mkerr
(
js
,
"oom"
);
return
js_mknull
();
}
}
}
}
compiled_regex_t
compiled
;
if
(
!
regex_get_or_compile
(
js
,
regexp
,
&
compiled
))
return
js_mknull
();
uint32_t
match_options
=
0
;
if
(
sticky_flag
)
match_options
|=
PCRE2_ANCHORED
;
int
rc
;
if
(
compiled
.
jit_ready
&&
!
sticky_flag
)
{
rc
=
pcre2_jit_match
(
compiled
.
code
,
(
PCRE2_SPTR
)
str_ptr
,
str_len
,
start_offset
,
match_options
,
compiled
.
match_data
,
NULL
);
}
else
rc
=
pcre2_match
(
compiled
.
code
,
(
PCRE2_SPTR
)
str_ptr
,
str_len
,
start_offset
,
match_options
,
compiled
.
match_data
,
NULL
);
if
(
rc
<
0
)
{
if
((
global_flag
||
sticky_flag
)
&&
is_err
(
setprop_cstr
(
js
,
regexp
,
"lastIndex"
,
9
,
tov
(
0
))))
{
return
js_mkerr
(
js
,
"oom"
);
}
return
js_mknull
();
}
PCRE2_SIZE
*
ovector
=
pcre2_get_ovector_pointer
(
compiled
.
match_data
);
uint32_t
ovcount
=
pcre2_get_ovector_count
(
compiled
.
match_data
);
update_regexp_statics
(
js
,
str_ptr
,
ovector
,
ovcount
);
if
(
global_flag
||
sticky_flag
)
{
ant_value_t
next_idx
=
tov
((
double
)
ovector
[
1
]);
if
(
is_err
(
setprop_cstr
(
js
,
regexp
,
"lastIndex"
,
9
,
next_idx
)))
return
js_mkerr
(
js
,
"oom"
);
}
if
(
truthy_only
)
return
js_true
;
ant_value_t
result_arr
=
js_mkarr
(
js
);
if
(
is_err
(
result_arr
))
return
result_arr
;
for
(
uint32_t
i
=
0
;
i
<
ovcount
&&
i
<
32
;
i
++
)
{
PCRE2_SIZE
start
=
ovector
[
2
*
i
];
PCRE2_SIZE
end
=
ovector
[
2
*
i
+
1
];
if
(
start
==
PCRE2_UNSET
)
{
js_arr_push
(
js
,
result_arr
,
js_mkundef
());
}
else
{
ant_value_t
match_str
=
js_mkstr
(
js
,
str_ptr
+
start
,
end
-
start
);
js_arr_push
(
js
,
result_arr
,
match_str
);
}
}
if
(
is_err
(
setprop_cstr
(
js
,
result_arr
,
"index"
,
5
,
tov
((
double
)
ovector
[
0
]))))
return
js_mkerr
(
js
,
"oom"
);
if
(
is_err
(
setprop_cstr
(
js
,
result_arr
,
"input"
,
5
,
str_arg
)))
return
js_mkerr
(
js
,
"oom"
);
ant_value_t
groups_meta
=
js_get_slot
(
regexp
,
SLOT_REGEXP_NAMED_GROUPS
);
if
(
is_object_type
(
groups_meta
))
{
js_set_slot
(
result_arr
,
SLOT_REGEXP_RESULT_GROUPS
,
groups_meta
);
js_set_slot
(
result_arr
,
SLOT_REGEXP_GROUPS_CACHE
,
js_mkundef
());
js_set_getter_desc
(
js
,
js_as_obj
(
result_arr
),
"groups"
,
6
,
js_mkfun
(
builtin_regexp_groups_getter
),
JS_DESC_E
|
JS_DESC_C
);
}
else
if
(
is_err
(
setprop_cstr
(
js
,
result_arr
,
"groups"
,
6
,
js_mkundef
())))
return
js_mkerr
(
js
,
"oom"
);
if
(
has_indices
)
{
ant_value_t
indices
=
regexp_build_indices_result
(
js
,
regexp
,
ovector
,
ovcount
);
if
(
is_err
(
indices
))
return
indices
;
if
(
is_err
(
setprop_cstr
(
js
,
result_arr
,
"indices"
,
7
,
indices
)))
return
js_mkerr
(
js
,
"oom"
);
}
return
result_arr
;
}
static
ant_value_t
builtin_regexp_exec
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
regexp
=
js
->
this_val
;
if
(
vtype
(
regexp
)
!=
T_OBJ
)
return
js_mkerr
(
js
,
"exec called on non-regexp"
);
if
(
nargs
<
1
)
return
js_mknull
();
ant_value_t
str_arg
=
args
[
0
];
if
(
vtype
(
str_arg
)
!=
T_STR
)
return
js_mknull
();
return
regexp_exec_internal
(
js
,
regexp
,
str_arg
,
false
);
}
static
ant_value_t
builtin_regexp_toString
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
regexp
=
js
->
this_val
;
if
(
!
is_object_type
(
regexp
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"toString called on non-object"
);
ant_value_t
source_val
=
js_getprop_fallback
(
js
,
regexp
,
"source"
);
if
(
is_err
(
source_val
))
return
source_val
;
ant_value_t
source_str
=
js_tostring_val
(
js
,
source_val
);
if
(
is_err
(
source_str
))
return
source_str
;
ant_value_t
flags_val
=
js_getprop_fallback
(
js
,
regexp
,
"flags"
);
if
(
is_err
(
flags_val
))
return
flags_val
;
ant_value_t
flags_str
=
js_tostring_val
(
js
,
flags_val
);
if
(
is_err
(
flags_str
))
return
flags_str
;
ant_offset_t
src_len
,
src_off
=
vstr
(
js
,
source_str
,
&
src_len
);
ant_offset_t
fl_len
,
fl_off
=
vstr
(
js
,
flags_str
,
&
fl_len
);
size_t
total
=
1
+
src_len
+
1
+
fl_len
;
char
*
buf
=
ant_calloc
(
total
+
1
);
if
(
!
buf
)
return
js_mkerr
(
js
,
"oom"
);
size_t
n
=
0
;
buf
[
n
++
]
=
'/'
;
memcpy
(
buf
+
n
,
(
const
void
*
)(
uintptr_t
)
src_off
,
src_len
);
n
+=
src_len
;
buf
[
n
++
]
=
'/'
;
memcpy
(
buf
+
n
,
(
const
void
*
)(
uintptr_t
)
fl_off
,
fl_len
);
n
+=
fl_len
;
ant_value_t
result
=
js_mkstr
(
js
,
buf
,
n
);
free
(
buf
);
return
result
;
}
static
ant_value_t
builtin_regexp_compile
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
rx
=
js
->
this_val
;
if
(
!
is_object_type
(
rx
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"compile called on non-object"
);
ant_value_t
pattern
=
js_mkstr
(
js
,
""
,
0
);
ant_value_t
flags
=
js_mkstr
(
js
,
""
,
0
);
if
(
nargs
>
0
&&
vtype
(
args
[
0
])
!=
T_UNDEF
)
{
ant_value_t
is_re
=
is_regexp_like
(
js
,
args
[
0
]);
if
(
is_err
(
is_re
))
return
is_re
;
if
(
js_truthy
(
js
,
is_re
))
{
ant_value_t
src
=
js_getprop_fallback
(
js
,
args
[
0
],
"source"
);
if
(
is_err
(
src
))
return
src
;
pattern
=
js_tostring_val
(
js
,
src
);
if
(
is_err
(
pattern
))
return
pattern
;
ant_value_t
fl
=
js_getprop_fallback
(
js
,
args
[
0
],
"flags"
);
if
(
is_err
(
fl
))
return
fl
;
flags
=
js_tostring_val
(
js
,
fl
);
if
(
is_err
(
flags
))
return
flags
;
}
else
{
pattern
=
js_tostring_val
(
js
,
args
[
0
]);
if
(
is_err
(
pattern
))
return
pattern
;
}
}
if
(
nargs
>
1
&&
vtype
(
args
[
1
])
!=
T_UNDEF
)
{
flags
=
js_tostring_val
(
js
,
args
[
1
]);
if
(
is_err
(
flags
))
return
flags
;
}
js_setprop
(
js
,
rx
,
js_mkstr
(
js
,
"source"
,
6
),
pattern
);
ant_offset_t
flen
,
foff
=
vstr
(
js
,
flags
,
&
flen
);
regexp_init_flags
(
js
,
rx
,
(
const
char
*
)(
uintptr_t
)(
foff
),
flen
,
false
);
ant_object_t
*
rx_ptr
=
js_obj_ptr
(
rx
);
for
(
size_t
i
=
0
;
i
<
regex_cache_count
;
i
++
)
{
if
(
regex_cache
[
i
].
obj
==
rx_ptr
)
{
pcre2_match_data_free
(
regex_cache
[
i
].
match_data
);
pcre2_code_free
(
regex_cache
[
i
].
code
);
regex_cache
[
i
]
=
regex_cache
[
--
regex_cache_count
];
break
;
}
}
return
rx
;
}
static
inline
bool
is_syntax_char
(
char
c
)
{
return
c
==
'^'
||
c
==
'$'
||
c
==
'\\'
||
c
==
'.'
||
c
==
'*'
||
c
==
'+'
||
c
==
'?'
||
c
==
'('
||
c
==
')'
||
c
==
'['
||
c
==
']'
||
c
==
'{'
||
c
==
'}'
||
c
==
'|'
||
c
==
'/'
;
}
static
inline
bool
is_other_punctuator
(
char
c
)
{
return
c
==
','
||
c
==
'-'
||
c
==
':'
||
c
==
';'
||
c
==
'<'
||
c
==
'='
||
c
==
'>'
||
c
==
'@'
||
c
==
'!'
||
c
==
'"'
||
c
==
'#'
||
c
==
'%'
||
c
==
'&'
||
c
==
'\''
||
c
==
'`'
||
c
==
'~'
;
}
static
ant_value_t
builtin_regexp_escape
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
if
(
nargs
<
1
||
vtype
(
args
[
0
])
!=
T_STR
)
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp.escape requires a string argument"
);
ant_offset_t
slen
,
soff
=
vstr
(
js
,
args
[
0
],
&
slen
);
const
char
*
src
=
(
const
char
*
)(
uintptr_t
)(
soff
);
size_t
buf_cap
=
slen
*
6
+
1
;
char
*
buf
=
ant_calloc
(
buf_cap
);
if
(
!
buf
)
return
js_mkerr
(
js
,
"oom"
);
size_t
di
=
0
;
bool
first
=
true
;
for
(
size_t
si
=
0
;
si
<
slen
;
)
{
unsigned
char
c
=
(
unsigned
char
)
src
[
si
];
if
(
c
>=
0x80
)
{
utf8proc_int32_t
cp
;
int
bytes
=
(
int
)
utf8_next
(
(
const
utf8proc_uint8_t
*
)
&
src
[
si
],
(
utf8proc_ssize_t
)(
slen
-
si
),
&
cp
);
for
(
int
b
=
0
;
b
<
bytes
&&
si
<
slen
;
b
++
)
buf
[
di
++
]
=
src
[
si
++
];
first
=
false
;
continue
;
}
if
(
first
&&
((
c
>=
'0'
&&
c
<=
'9'
)
||
(
c
>=
'a'
&&
c
<=
'z'
)
||
(
c
>=
'A'
&&
c
<=
'Z'
)))
{
di
+=
snprintf
(
buf
+
di
,
buf_cap
-
di
,
"
\\
x%02x"
,
c
);
si
++
;
first
=
false
;
continue
;
}
if
(
is_syntax_char
(
c
))
{
buf
[
di
++
]
=
'\\'
;
buf
[
di
++
]
=
c
;
si
++
;
first
=
false
;
continue
;
}
if
(
is_other_punctuator
(
c
)
||
c
==
' '
||
c
==
'\t'
||
c
==
'\n'
||
c
==
'\r'
||
c
==
'\v'
||
c
==
'\f'
)
{
di
+=
snprintf
(
buf
+
di
,
buf_cap
-
di
,
"
\\
x%02x"
,
c
);
si
++
;
first
=
false
;
continue
;
}
buf
[
di
++
]
=
c
;
si
++
;
first
=
false
;
}
ant_value_t
result
=
js_mkstr
(
js
,
buf
,
di
);
free
(
buf
);
return
result
;
}
static
ant_value_t
regexp_exec_with_exec_fn
(
ant_t
*
js
,
ant_value_t
rx
,
ant_value_t
str
,
ant_value_t
exec_fn
)
{
if
(
vtype
(
exec_fn
)
==
T_FUNC
||
vtype
(
exec_fn
)
==
T_CFUNC
)
{
ant_value_t
call_args
[
1
]
=
{
str
};
ant_value_t
result
=
sv_vm_call
(
js
->
vm
,
js
,
exec_fn
,
rx
,
call_args
,
1
,
NULL
,
false
);
if
(
is_err
(
result
))
return
result
;
if
(
!
is_object_type
(
result
)
&&
vtype
(
result
)
!=
T_NULL
)
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp exec returned non-object"
);
return
result
;
}
ant_value_t
call_args
[
1
]
=
{
str
};
ant_value_t
saved
=
js
->
this_val
;
js
->
this_val
=
rx
;
ant_value_t
result
=
builtin_regexp_exec
(
js
,
call_args
,
1
);
js
->
this_val
=
saved
;
return
result
;
}
static
ant_value_t
regexp_exec_abstract
(
ant_t
*
js
,
ant_value_t
rx
,
ant_value_t
str
)
{
ant_value_t
exec_fn
=
js_get
(
js
,
rx
,
"exec"
);
if
(
is_err
(
exec_fn
))
return
exec_fn
;
return
regexp_exec_with_exec_fn
(
js
,
rx
,
str
,
exec_fn
);
}
bool
regexp_exec_truthy_try_fast
(
ant_t
*
js
,
ant_value_t
call_func
,
ant_value_t
regexp
,
ant_value_t
arg
,
ant_value_t
*
out_result
)
{
if
(
!
out_result
||
vtype
(
call_func
)
!=
T_CFUNC
)
return
false
;
if
(
!
js_cfunc_same_entrypoint
(
call_func
,
builtin_regexp_exec
))
return
false
;
if
(
!
is_object_type
(
regexp
)
||
vtype
(
arg
)
!=
T_STR
)
return
false
;
ant_value_t
result
=
regexp_exec_internal
(
js
,
regexp
,
arg
,
true
);
if
(
is_err
(
result
))
{
*
out_result
=
result
;
return
true
;
}
*
out_result
=
mkval
(
T_BOOL
,
vtype
(
result
)
!=
T_NULL
?
1
:
0
);
return
true
;
}
static
ant_value_t
builtin_regexp_test
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
regexp
=
js
->
this_val
;
if
(
!
is_object_type
(
regexp
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"test called on non-object"
);
ant_value_t
str_arg
=
nargs
>
0
?
js_tostring_val
(
js
,
args
[
0
])
:
js_mkstr
(
js
,
"undefined"
,
9
);
if
(
is_err
(
str_arg
))
return
str_arg
;
ant_value_t
exec_fn
=
js_get
(
js
,
regexp
,
"exec"
);
if
(
is_err
(
exec_fn
))
return
exec_fn
;
ant_value_t
result
;
if
(
vtype
(
exec_fn
)
==
T_CFUNC
&&
js_cfunc_same_entrypoint
(
exec_fn
,
builtin_regexp_exec
))
{
result
=
regexp_exec_internal
(
js
,
regexp
,
str_arg
,
true
);
}
else
result
=
regexp_exec_with_exec_fn
(
js
,
regexp
,
str_arg
,
exec_fn
);
if
(
is_err
(
result
))
return
result
;
return
mkval
(
T_BOOL
,
vtype
(
result
)
!=
T_NULL
?
1
:
0
);
}
static
ant_value_t
builtin_regexp_flags_getter
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
rx
=
js
->
this_val
;
if
(
!
is_object_type
(
rx
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp.prototype.flags called on non-object"
);
char
buf
[
16
];
int
n
=
0
;
ant_value_t
v
=
js_getprop_fallback
(
js
,
rx
,
"hasIndices"
);
if
(
is_err
(
v
))
return
v
;
if
(
js_truthy
(
js
,
v
))
buf
[
n
++
]
=
'd'
;
v
=
js_getprop_fallback
(
js
,
rx
,
"global"
);
if
(
is_err
(
v
))
return
v
;
if
(
js_truthy
(
js
,
v
))
buf
[
n
++
]
=
'g'
;
v
=
js_getprop_fallback
(
js
,
rx
,
"ignoreCase"
);
if
(
is_err
(
v
))
return
v
;
if
(
js_truthy
(
js
,
v
))
buf
[
n
++
]
=
'i'
;
v
=
js_getprop_fallback
(
js
,
rx
,
"multiline"
);
if
(
is_err
(
v
))
return
v
;
if
(
js_truthy
(
js
,
v
))
buf
[
n
++
]
=
'm'
;
v
=
js_getprop_fallback
(
js
,
rx
,
"dotAll"
);
if
(
is_err
(
v
))
return
v
;
if
(
js_truthy
(
js
,
v
))
buf
[
n
++
]
=
's'
;
v
=
js_getprop_fallback
(
js
,
rx
,
"unicode"
);
if
(
is_err
(
v
))
return
v
;
if
(
js_truthy
(
js
,
v
))
buf
[
n
++
]
=
'u'
;
v
=
js_getprop_fallback
(
js
,
rx
,
"unicodeSets"
);
if
(
is_err
(
v
))
return
v
;
if
(
js_truthy
(
js
,
v
))
buf
[
n
++
]
=
'v'
;
v
=
js_getprop_fallback
(
js
,
rx
,
"sticky"
);
if
(
is_err
(
v
))
return
v
;
if
(
js_truthy
(
js
,
v
))
buf
[
n
++
]
=
'y'
;
return
js_mkstr
(
js
,
buf
,
n
);
}
static
ant_value_t
builtin_regexp_symbol_match
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
rx
=
js
->
this_val
;
if
(
!
is_object_type
(
rx
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp.prototype[@@match] called on non-object"
);
ant_value_t
str
=
nargs
>
0
?
js_tostring_val
(
js
,
args
[
0
])
:
js_mkstr
(
js
,
"undefined"
,
9
);
if
(
is_err
(
str
))
return
str
;
ant_value_t
global_val
=
js_getprop_fallback
(
js
,
rx
,
"global"
);
if
(
is_err
(
global_val
))
return
global_val
;
if
(
!
js_truthy
(
js
,
global_val
))
return
regexp_exec_abstract
(
js
,
rx
,
str
);
ant_value_t
unicode_val
=
js_getprop_fallback
(
js
,
rx
,
"unicode"
);
if
(
is_err
(
unicode_val
))
return
unicode_val
;
bool
full_unicode
=
js_truthy
(
js
,
unicode_val
);
js_setprop
(
js
,
rx
,
js_mkstr
(
js
,
"lastIndex"
,
9
),
tov
(
0
));
ant_value_t
A
=
js_mkarr
(
js
);
if
(
is_err
(
A
))
return
A
;
ant_offset_t
n
=
0
;
for
(;;)
{
ant_value_t
result
=
regexp_exec_abstract
(
js
,
rx
,
str
);
if
(
is_err
(
result
))
return
result
;
if
(
vtype
(
result
)
==
T_NULL
)
return
n
==
0
?
js_mknull
()
:
mkval
(
T_ARR
,
vdata
(
A
));
ant_value_t
match_str
=
js_tostring_val
(
js
,
js_arr_get
(
js
,
result
,
0
));
if
(
is_err
(
match_str
))
return
match_str
;
js_arr_push
(
js
,
A
,
match_str
);
n
++
;
ant_offset_t
mlen
;
vstr
(
js
,
match_str
,
&
mlen
);
if
(
mlen
==
0
)
{
ant_value_t
li_val
=
js_getprop_fallback
(
js
,
rx
,
"lastIndex"
);
if
(
is_err
(
li_val
))
return
li_val
;
double
li
=
vtype
(
li_val
)
==
T_NUM
?
tod
(
li_val
)
:
0
;
ant_offset_t
str_len
,
str_off
=
vstr
(
js
,
str
,
&
str_len
);
double
advance
=
1
;
if
(
full_unicode
&&
li
<
(
double
)
str_len
)
{
advance
=
(
double
)
utf8_char_len_at
((
const
char
*
)(
uintptr_t
)(
str_off
),
str_len
,
(
ant_offset_t
)
li
);
}
js_setprop
(
js
,
rx
,
js_mkstr
(
js
,
"lastIndex"
,
9
),
tov
(
li
+
advance
));
}
}
}
static
ant_value_t
regexp_matchall_next
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
iter
=
js
->
this_val
;
ant_value_t
rx
=
js_get_slot
(
iter
,
SLOT_MATCHALL_RX
);
ant_value_t
str
=
js_get_slot
(
iter
,
SLOT_MATCHALL_STR
);
ant_value_t
done_val
=
js_get_slot
(
iter
,
SLOT_MATCHALL_DONE
);
if
(
js_truthy
(
js
,
done_val
))
return
js_iter_result
(
js
,
false
,
js_mkundef
());
ant_value_t
result
=
regexp_exec_abstract
(
js
,
rx
,
str
);
if
(
is_err
(
result
))
return
result
;
if
(
vtype
(
result
)
==
T_NULL
)
{
js_set_slot
(
iter
,
SLOT_MATCHALL_DONE
,
js_true
);
return
js_iter_result
(
js
,
false
,
js_mkundef
());
}
ant_value_t
global_val
=
js_getprop_fallback
(
js
,
rx
,
"global"
);
if
(
js_truthy
(
js
,
global_val
))
{
ant_value_t
match_str
=
js_tostring_val
(
js
,
js_arr_get
(
js
,
result
,
0
));
if
(
is_err
(
match_str
))
return
match_str
;
ant_offset_t
mlen
;
vstr
(
js
,
match_str
,
&
mlen
);
if
(
mlen
==
0
)
{
ant_value_t
li_val
=
js_getprop_fallback
(
js
,
rx
,
"lastIndex"
);
double
li
=
vtype
(
li_val
)
==
T_NUM
?
tod
(
li_val
)
:
0
;
js_setprop
(
js
,
rx
,
js_mkstr
(
js
,
"lastIndex"
,
9
),
tov
(
li
+
1
));
}
}
else
js_set_slot
(
iter
,
SLOT_MATCHALL_DONE
,
js_true
);
return
js_iter_result
(
js
,
true
,
result
);
}
static
ant_value_t
builtin_regexp_symbol_matchAll
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
rx
=
js
->
this_val
;
if
(
!
is_object_type
(
rx
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp.prototype[@@matchAll] called on non-object"
);
ant_value_t
str
=
nargs
>
0
?
js_tostring_val
(
js
,
args
[
0
])
:
js_mkstr
(
js
,
"undefined"
,
9
);
if
(
is_err
(
str
))
return
str
;
ant_value_t
flags_val
=
js_getprop_fallback
(
js
,
rx
,
"flags"
);
if
(
is_err
(
flags_val
))
return
flags_val
;
ant_value_t
flags_str
=
js_tostring_val
(
js
,
flags_val
);
if
(
is_err
(
flags_str
))
return
flags_str
;
ant_value_t
source_val
=
js_getprop_fallback
(
js
,
rx
,
"source"
);
if
(
is_err
(
source_val
))
return
source_val
;
ant_value_t
ctor_args
[
2
]
=
{
source_val
,
flags_str
};
ant_value_t
regexp_ctor
=
js_get
(
js
,
js_glob
(
js
),
"RegExp"
);
ant_value_t
new_rx
=
sv_vm_call
(
js
->
vm
,
js
,
regexp_ctor
,
js_mkundef
(),
ctor_args
,
2
,
NULL
,
true
);
if
(
is_err
(
new_rx
))
return
new_rx
;
ant_value_t
li_val
=
js_getprop_fallback
(
js
,
rx
,
"lastIndex"
);
js_setprop
(
js
,
new_rx
,
js_mkstr
(
js
,
"lastIndex"
,
9
),
li_val
);
ant_value_t
iter
=
js_mkobj
(
js
);
js_set_slot
(
iter
,
SLOT_MATCHALL_RX
,
new_rx
);
js_set_slot
(
iter
,
SLOT_MATCHALL_STR
,
str
);
js_set_slot
(
iter
,
SLOT_MATCHALL_DONE
,
js_false
);
js_set_proto_init
(
iter
,
regexp_matchall_iter_proto_val
);
return
iter
;
}
static
ant_value_t
builtin_string_matchAll
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
this_unwrapped
=
unwrap_primitive
(
js
,
js
->
this_val
);
ant_value_t
str
=
js_tostring_val
(
js
,
this_unwrapped
);
if
(
is_err
(
str
))
return
str
;
if
(
nargs
<
1
)
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"matchAll requires at least 1 argument"
);
if
(
is_object_type
(
args
[
0
]))
{
ant_value_t
is_re
=
is_regexp_like
(
js
,
args
[
0
]);
if
(
js_truthy
(
js
,
is_re
))
{
ant_value_t
flags_val
=
js_getprop_fallback
(
js
,
args
[
0
],
"flags"
);
if
(
is_err
(
flags_val
))
return
flags_val
;
ant_value_t
flags_str
=
js_tostring_val
(
js
,
flags_val
);
ant_offset_t
flen
,
foff
=
vstr
(
js
,
flags_str
,
&
flen
);
const
char
*
fp
=
(
const
char
*
)(
uintptr_t
)(
foff
);
bool
has_g
=
false
;
for
(
ant_offset_t
i
=
0
;
i
<
flen
;
i
++
)
if
(
fp
[
i
]
==
'g'
)
has_g
=
true
;
if
(
!
has_g
)
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"String.prototype.matchAll called with a non-global RegExp"
);
}
bool
called
=
false
;
ant_value_t
call_args
[
1
]
=
{
str
};
ant_value_t
dispatched
=
maybe_call_symbol_method
(
js
,
args
[
0
],
get_matchAll_sym
(),
args
[
0
],
call_args
,
1
,
&
called
);
if
(
is_err
(
dispatched
))
return
dispatched
;
if
(
called
)
return
dispatched
;
}
ant_value_t
pattern_str
=
js_tostring_val
(
js
,
args
[
0
]);
if
(
is_err
(
pattern_str
))
return
pattern_str
;
ant_value_t
ctor_args
[
2
]
=
{
pattern_str
,
js_mkstr
(
js
,
"g"
,
1
)
};
ant_value_t
regexp_ctor
=
js_get
(
js
,
js_glob
(
js
),
"RegExp"
);
ant_value_t
rx
=
sv_vm_call
(
js
->
vm
,
js
,
regexp_ctor
,
js_mkundef
(),
ctor_args
,
2
,
NULL
,
true
);
if
(
is_err
(
rx
))
return
rx
;
ant_value_t
ma_args
[
1
]
=
{
str
};
js
->
this_val
=
rx
;
return
builtin_regexp_symbol_matchAll
(
js
,
ma_args
,
1
);
}
static
ant_value_t
builtin_regexp_symbol_replace
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
rx
=
js
->
this_val
;
if
(
!
is_object_type
(
rx
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp.prototype[@@replace] called on non-object"
);
ant_value_t
str
=
nargs
>
0
?
js_tostring_val
(
js
,
args
[
0
])
:
js_mkstr
(
js
,
"undefined"
,
9
);
if
(
is_err
(
str
))
return
str
;
ant_value_t
replace_value
=
nargs
>
1
?
args
[
1
]
:
js_mkundef
();
bool
func_replace
=
(
vtype
(
replace_value
)
==
T_FUNC
||
vtype
(
replace_value
)
==
T_CFUNC
);
ant_value_t
replace_str
=
js_mkundef
();
if
(
!
func_replace
)
{
replace_str
=
js_tostring_val
(
js
,
replace_value
);
if
(
is_err
(
replace_str
))
return
replace_str
;
}
ant_value_t
global_val
=
js_getprop_fallback
(
js
,
rx
,
"global"
);
if
(
is_err
(
global_val
))
return
global_val
;
bool
global
=
js_truthy
(
js
,
global_val
);
bool
full_unicode
=
false
;
if
(
global
)
{
ant_value_t
unicode_val
=
js_getprop_fallback
(
js
,
rx
,
"unicode"
);
if
(
is_err
(
unicode_val
))
return
unicode_val
;
full_unicode
=
js_truthy
(
js
,
unicode_val
);
js_setprop
(
js
,
rx
,
js_mkstr
(
js
,
"lastIndex"
,
9
),
tov
(
0
));
}
ant_value_t
results
=
js_mkarr
(
js
);
if
(
is_err
(
results
))
return
results
;
ant_offset_t
nresults
=
0
;
for
(;;)
{
ant_value_t
result
=
regexp_exec_abstract
(
js
,
rx
,
str
);
if
(
is_err
(
result
))
return
result
;
if
(
vtype
(
result
)
==
T_NULL
)
break
;
js_arr_push
(
js
,
results
,
result
);
nresults
++
;
if
(
!
global
)
break
;
ant_value_t
match_str
=
js_tostring_val
(
js
,
js_arr_get
(
js
,
result
,
0
));
if
(
is_err
(
match_str
))
return
match_str
;
ant_offset_t
mlen
;
vstr
(
js
,
match_str
,
&
mlen
);
if
(
mlen
==
0
)
{
ant_value_t
li_val
=
js_getprop_fallback
(
js
,
rx
,
"lastIndex"
);
if
(
is_err
(
li_val
))
return
li_val
;
double
li
=
vtype
(
li_val
)
==
T_NUM
?
tod
(
li_val
)
:
0
;
ant_offset_t
sl
,
so
=
vstr
(
js
,
str
,
&
sl
);
double
advance
=
1
;
if
(
full_unicode
&&
li
<
(
double
)
sl
)
{
advance
=
(
double
)
utf8_char_len_at
((
const
char
*
)(
uintptr_t
)(
so
),
sl
,
(
ant_offset_t
)
li
);
}
js_setprop
(
js
,
rx
,
js_mkstr
(
js
,
"lastIndex"
,
9
),
tov
(
li
+
advance
));
}
}
ant_offset_t
str_len
,
str_off
=
vstr
(
js
,
str
,
&
str_len
);
size_t
buf_cap
=
str_len
+
256
;
char
*
buf
=
ant_calloc
(
buf_cap
);
if
(
!
buf
)
return
js_mkerr
(
js
,
"oom"
);
size_t
buf_len
=
0
;
ant_offset_t
next_src_pos
=
0
;
#define SB_APPEND(data, dlen) do { \
if (buf_len + (dlen) >= buf_cap) { \
buf_cap = (buf_len + (dlen) + 1) * 2; \
char *nb = ant_realloc(buf, buf_cap); \
if (!nb) { free(buf); return js_mkerr(js, "oom"); } \
buf = nb; \
} \
memcpy(buf + buf_len, data, dlen); buf_len += (dlen); \
} while(0)
for
(
ant_offset_t
i
=
0
;
i
<
nresults
;
i
++
)
{
ant_value_t
result
=
js_arr_get
(
js
,
results
,
i
);
ant_value_t
matched
=
js_tostring_val
(
js
,
js_arr_get
(
js
,
result
,
0
));
if
(
is_err
(
matched
))
{
free
(
buf
);
return
matched
;
}
ant_offset_t
matched_len
;
vstr
(
js
,
matched
,
&
matched_len
);
ant_value_t
pos_val
=
js_getprop_fallback
(
js
,
result
,
"index"
);
ant_offset_t
position
=
0
;
if
(
!
is_err
(
pos_val
)
&&
vtype
(
pos_val
)
==
T_NUM
)
{
double
d
=
tod
(
pos_val
);
position
=
d
<
0
?
0
:
(
ant_offset_t
)
d
;
}
if
(
position
>
str_len
)
position
=
str_len
;
ant_value_t
replacement
;
if
(
func_replace
)
{
ant_offset_t
ncaptures
=
js_arr_len
(
js
,
result
);
ant_value_t
call_args
[
32
];
int
ca
=
0
;
for
(
ant_offset_t
c
=
0
;
c
<
ncaptures
&&
ca
<
30
;
c
++
)
call_args
[
ca
++
]
=
js_arr_get
(
js
,
result
,
c
);
call_args
[
ca
++
]
=
tov
((
double
)
position
);
call_args
[
ca
++
]
=
str
;
replacement
=
sv_vm_call
(
js
->
vm
,
js
,
replace_value
,
js_mkundef
(),
call_args
,
ca
,
NULL
,
false
);
}
else
{
replacement
=
replace_str
;
}
if
(
is_err
(
replacement
))
{
free
(
buf
);
return
replacement
;
}
ant_value_t
rep_str
=
js_tostring_val
(
js
,
replacement
);
if
(
is_err
(
rep_str
))
{
free
(
buf
);
return
rep_str
;
}
if
(
position
>=
next_src_pos
)
{
str_off
=
vstr
(
js
,
str
,
&
str_len
);
if
(
position
>
next_src_pos
)
SB_APPEND
((
const
char
*
)(
uintptr_t
)(
str_off
+
next_src_pos
),
position
-
next_src_pos
);
ant_offset_t
rep_len
,
rep_off
=
vstr
(
js
,
rep_str
,
&
rep_len
);
if
(
func_replace
)
{
SB_APPEND
((
const
char
*
)(
uintptr_t
)(
rep_off
),
rep_len
);
}
else
{
ant_offset_t
ncap
=
js_arr_len
(
js
,
result
);
int
num_caps
=
ncap
>
1
?
(
int
)(
ncap
-
1
)
:
0
;
repl_capture_t
caps_buf
[
16
],
*
caps
=
num_caps
<=
16
?
caps_buf
:
ant_calloc
(
sizeof
(
repl_capture_t
)
*
(
size_t
)
num_caps
);
if
(
num_caps
>
16
&&
!
caps
)
{
free
(
buf
);
return
js_mkerr
(
js
,
"oom"
);
}
for
(
int
ci
=
0
;
ci
<
num_caps
;
ci
++
)
{
ant_value_t
cap
=
js_arr_get
(
js
,
result
,
(
ant_offset_t
)(
ci
+
1
));
if
(
vtype
(
cap
)
==
T_STR
)
{
ant_offset_t
cl
,
co
=
vstr
(
js
,
cap
,
&
cl
);
caps
[
ci
]
=
(
repl_capture_t
){
(
const
char
*
)(
uintptr_t
)(
co
),
cl
};
}
else
caps
[
ci
]
=
(
repl_capture_t
){
NULL
,
0
};
}
ant_offset_t
mlen
,
moff
=
vstr
(
js
,
matched
,
&
mlen
);
str_off
=
vstr
(
js
,
str
,
&
str_len
);
bool
ok
=
repl_template
(
(
const
char
*
)(
uintptr_t
)(
rep_off
),
rep_len
,
(
const
char
*
)(
uintptr_t
)(
moff
),
mlen
,
(
const
char
*
)(
uintptr_t
)(
str_off
),
str_len
,
position
,
caps
,
num_caps
,
&
buf
,
&
buf_len
,
&
buf_cap
);
if
(
caps
!=
caps_buf
)
free
(
caps
);
if
(
!
ok
)
{
free
(
buf
);
return
js_mkerr
(
js
,
"oom"
);
}
}
next_src_pos
=
position
+
matched_len
;
}
}
str_off
=
vstr
(
js
,
str
,
&
str_len
);
if
(
next_src_pos
<
str_len
)
SB_APPEND
((
const
char
*
)(
uintptr_t
)(
str_off
+
next_src_pos
),
str_len
-
next_src_pos
);
#undef SB_APPEND
ant_value_t
ret
=
js_mkstr
(
js
,
buf
,
buf_len
);
free
(
buf
);
return
ret
;
}
static
ant_value_t
builtin_regexp_symbol_search
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
rx
=
js
->
this_val
;
if
(
!
is_object_type
(
rx
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp.prototype[@@search] called on non-object"
);
ant_value_t
str
=
nargs
>
0
?
js_tostring_val
(
js
,
args
[
0
])
:
js_mkstr
(
js
,
"undefined"
,
9
);
if
(
is_err
(
str
))
return
str
;
ant_value_t
prev_li
=
js_getprop_fallback
(
js
,
rx
,
"lastIndex"
);
if
(
is_err
(
prev_li
))
return
prev_li
;
js_setprop
(
js
,
rx
,
js_mkstr
(
js
,
"lastIndex"
,
9
),
tov
(
0
));
ant_value_t
result
=
regexp_exec_abstract
(
js
,
rx
,
str
);
if
(
is_err
(
result
))
return
result
;
ant_value_t
cur_li
=
js_getprop_fallback
(
js
,
rx
,
"lastIndex"
);
if
(
is_err
(
cur_li
))
return
cur_li
;
js_setprop
(
js
,
rx
,
js_mkstr
(
js
,
"lastIndex"
,
9
),
prev_li
);
if
(
vtype
(
result
)
==
T_NULL
)
return
tov
(
-1
);
ant_value_t
idx
=
js_getprop_fallback
(
js
,
result
,
"index"
);
if
(
is_err
(
idx
))
return
idx
;
return
vtype
(
idx
)
==
T_NUM
?
idx
:
tov
(
-1
);
}
static
ant_value_t
builtin_regexp_symbol_split
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
rx
=
js_getthis
(
js
);
if
(
!
is_object_type
(
rx
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp.prototype[@@split] called on non-object"
);
ant_value_t
str
=
nargs
>
0
?
js_tostring_val
(
js
,
args
[
0
])
:
js_mkstr
(
js
,
""
,
0
);
if
(
is_err
(
str
))
return
str
;
ant_value_t
ctor
=
js_get
(
js
,
rx
,
"constructor"
);
if
(
is_err
(
ctor
))
return
ctor
;
ant_value_t
C
;
if
(
vtype
(
ctor
)
==
T_UNDEF
)
{
C
=
js_get
(
js
,
js_glob
(
js
),
"RegExp"
);
}
else
if
(
!
is_object_type
(
ctor
))
{
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp.prototype[@@split]: constructor is not an object"
);
}
else
{
ant_value_t
species
=
get_ctor_species_value
(
js
,
ctor
);
if
(
is_err
(
species
))
return
species
;
if
(
vtype
(
species
)
==
T_UNDEF
||
vtype
(
species
)
==
T_NULL
)
C
=
js_get
(
js
,
js_glob
(
js
),
"RegExp"
);
else
C
=
species
;
}
if
(
is_err
(
C
))
return
C
;
if
(
vtype
(
C
)
!=
T_FUNC
&&
vtype
(
C
)
!=
T_CFUNC
)
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"RegExp species is not a constructor"
);
ant_value_t
flags_val
=
js_get
(
js
,
rx
,
"flags"
);
if
(
is_err
(
flags_val
))
return
flags_val
;
ant_value_t
flags_str
=
js_tostring_val
(
js
,
flags_val
);
if
(
is_err
(
flags_str
))
return
flags_str
;
ant_offset_t
flen
,
foff
=
vstr
(
js
,
flags_str
,
&
flen
);
const
char
*
fptr
=
(
const
char
*
)(
uintptr_t
)(
foff
);
bool
unicode_matching
=
false
,
has_sticky
=
false
;
for
(
ant_offset_t
i
=
0
;
i
<
flen
;
i
++
)
{
if
(
fptr
[
i
]
==
'u'
||
fptr
[
i
]
==
'v'
)
unicode_matching
=
true
;
if
(
fptr
[
i
]
==
'y'
)
has_sticky
=
true
;
}
ant_value_t
new_flags
;
if
(
has_sticky
)
new_flags
=
flags_str
;
else
{
char
fbuf
[
16
];
if
(
flen
>
14
)
flen
=
14
;
foff
=
vstr
(
js
,
flags_str
,
&
flen
);
fptr
=
(
const
char
*
)(
uintptr_t
)(
foff
);
memcpy
(
fbuf
,
fptr
,
flen
);
fbuf
[
flen
]
=
'y'
;
new_flags
=
js_mkstr
(
js
,
fbuf
,
flen
+
1
);
}
ant_value_t
ctor_args
[
2
]
=
{
rx
,
new_flags
};
ant_value_t
splitter
=
regexp_species_construct
(
js
,
rx
,
C
,
ctor_args
,
2
);
if
(
is_err
(
splitter
))
return
splitter
;
ant_value_t
A
=
js_mkarr
(
js
);
if
(
is_err
(
A
))
return
A
;
ant_offset_t
lengthA
=
0
;
uint32_t
lim
=
UINT32_MAX
;
if
(
nargs
>=
2
&&
vtype
(
args
[
1
])
!=
T_UNDEF
)
{
double
d
=
tod
(
args
[
1
]);
if
(
d
>=
0
&&
d
<=
UINT32_MAX
)
lim
=
(
uint32_t
)
d
;
}
if
(
lim
==
0
)
return
mkval
(
T_ARR
,
vdata
(
A
));
ant_offset_t
str_len
,
str_off
=
vstr
(
js
,
str
,
&
str_len
);
ant_offset_t
size
=
str_len
;
if
(
size
==
0
)
{
ant_value_t
z
=
regexp_exec_abstract
(
js
,
splitter
,
str
);
if
(
is_err
(
z
))
return
z
;
if
(
vtype
(
z
)
==
T_NULL
)
js_arr_push
(
js
,
A
,
str
);
return
mkval
(
T_ARR
,
vdata
(
A
));
}
ant_offset_t
p
=
0
,
q
=
p
;
ant_value_t
lastIndex_key
=
js_mkstr
(
js
,
"lastIndex"
,
9
);
while
(
q
<
size
)
{
js_setprop
(
js
,
splitter
,
lastIndex_key
,
tov
((
double
)
q
));
ant_value_t
z
=
regexp_exec_abstract
(
js
,
splitter
,
str
);
if
(
is_err
(
z
))
return
z
;
if
(
vtype
(
z
)
==
T_NULL
)
{
if
(
unicode_matching
)
{
str_off
=
vstr
(
js
,
str
,
&
str_len
);
q
+=
utf8_char_len_at
((
const
char
*
)(
uintptr_t
)(
str_off
),
str_len
,
q
);
}
else
q
++
;
continue
;
}
ant_value_t
li_val
=
js_get
(
js
,
splitter
,
"lastIndex"
);
if
(
is_err
(
li_val
))
return
li_val
;
double
e_raw
=
vtype
(
li_val
)
==
T_NUM
?
tod
(
li_val
)
:
0
;
ant_offset_t
e
=
(
ant_offset_t
)(
e_raw
<
0
?
0
:
(
e_raw
>
(
double
)
size
?
(
double
)
size
:
e_raw
));
if
(
e
==
p
)
{
if
(
unicode_matching
)
{
str_off
=
vstr
(
js
,
str
,
&
str_len
);
q
+=
utf8_char_len_at
((
const
char
*
)(
uintptr_t
)(
str_off
),
str_len
,
q
);
}
else
q
++
;
continue
;
}
str_off
=
vstr
(
js
,
str
,
NULL
);
ant_value_t
T_val
=
js_mkstr
(
js
,
(
char
*
)(
uintptr_t
)(
str_off
+
p
),
q
-
p
);
js_arr_push
(
js
,
A
,
T_val
);
lengthA
++
;
if
(
lengthA
==
lim
)
return
mkval
(
T_ARR
,
vdata
(
A
));
ant_offset_t
num_caps
=
js_arr_len
(
js
,
z
);
for
(
ant_offset_t
i
=
1
;
i
<
num_caps
;
i
++
)
{
ant_value_t
cap
=
js_arr_get
(
js
,
z
,
i
);
js_arr_push
(
js
,
A
,
cap
);
lengthA
++
;
if
(
lengthA
==
lim
)
return
mkval
(
T_ARR
,
vdata
(
A
));
}
p
=
e
;
q
=
p
;
}
str_off
=
vstr
(
js
,
str
,
&
str_len
);
ant_value_t
trailing
=
js_mkstr
(
js
,
(
char
*
)(
uintptr_t
)(
str_off
+
p
),
str_len
-
p
);
js_arr_push
(
js
,
A
,
trailing
);
return
mkval
(
T_ARR
,
vdata
(
A
));
}
ant_value_t
do_regex_match_pcre2
(
ant_t
*
js
,
regex_match_args_t
args
)
{
char
pcre2_pattern
[
4096
];
size_t
pcre2_len
=
js_to_pcre2_pattern
(
args
.
pattern_ptr
,
args
.
pattern_len
,
pcre2_pattern
,
sizeof
(
pcre2_pattern
),
false
);
uint32_t
options
=
PCRE2_UTF
|
PCRE2_UCP
|
PCRE2_MATCH_UNSET_BACKREF
|
PCRE2_DUPNAMES
;
if
(
args
.
ignore_case
)
options
|=
PCRE2_CASELESS
;
if
(
args
.
multiline
)
options
|=
PCRE2_MULTILINE
;
int
errcode
;
PCRE2_SIZE
erroffset
;
pcre2_code
*
re
=
pcre2_compile
((
PCRE2_SPTR
)
pcre2_pattern
,
pcre2_len
,
options
,
&
errcode
,
&
erroffset
,
NULL
);
if
(
re
==
NULL
)
return
js_mknull
();
pcre2_match_data
*
match_data
=
pcre2_match_data_create_from_pattern
(
re
,
NULL
);
uint32_t
capture_count
;
pcre2_pattern_info
(
re
,
PCRE2_INFO_CAPTURECOUNT
,
&
capture_count
);
ant_value_t
result_arr
=
js_mkarr
(
js
);
if
(
is_err
(
result_arr
))
{
pcre2_match_data_free
(
match_data
);
pcre2_code_free
(
re
);
return
result_arr
;
}
PCRE2_SIZE
pos
=
0
;
int
match_count
=
0
;
while
(
pos
<=
(
PCRE2_SIZE
)
args
.
str_len
)
{
int
rc
=
pcre2_match
(
re
,
(
PCRE2_SPTR
)
args
.
str_ptr
,
args
.
str_len
,
pos
,
0
,
match_data
,
NULL
);
if
(
rc
<
0
)
break
;
PCRE2_SIZE
*
ovector
=
pcre2_get_ovector_pointer
(
match_data
);
PCRE2_SIZE
match_start
=
ovector
[
0
];
PCRE2_SIZE
match_end
=
ovector
[
1
];
if
(
args
.
global
)
{
ant_value_t
match_str
=
js_mkstr
(
js
,
args
.
str_ptr
+
match_start
,
match_end
-
match_start
);
if
(
is_err
(
match_str
))
{
pcre2_match_data_free
(
match_data
);
pcre2_code_free
(
re
);
return
match_str
;
}
js_arr_push
(
js
,
result_arr
,
match_str
);
}
else
{
for
(
uint32_t
i
=
0
;
i
<=
capture_count
;
i
++
)
{
PCRE2_SIZE
start
=
ovector
[
2
*
i
];
PCRE2_SIZE
end
=
ovector
[
2
*
i
+
1
];
if
(
start
==
PCRE2_UNSET
)
{
js_arr_push
(
js
,
result_arr
,
js_mkundef
());
}
else
{
ant_value_t
match_str
=
js_mkstr
(
js
,
args
.
str_ptr
+
start
,
end
-
start
);
if
(
is_err
(
match_str
))
{
pcre2_match_data_free
(
match_data
);
pcre2_code_free
(
re
);
return
match_str
;
}
js_arr_push
(
js
,
result_arr
,
match_str
);
}
}
js_setprop
(
js
,
result_arr
,
js_mkstr
(
js
,
"index"
,
5
),
tov
((
double
)
match_start
));
}
match_count
++
;
if
(
!
args
.
global
)
break
;
if
(
match_start
==
match_end
)
{
pos
=
match_end
+
1
;
}
else
{
pos
=
match_end
;
}
}
pcre2_match_data_free
(
match_data
);
pcre2_code_free
(
re
);
if
(
match_count
==
0
)
return
js_mknull
();
return
result_arr
;
}
static
bool
str_buf_append
(
char
**
buf
,
size_t
*
len
,
size_t
*
cap
,
const
char
*
data
,
size_t
n
)
{
if
(
n
==
0
)
return
true
;
if
(
*
len
+
n
>=
*
cap
)
{
size_t
new_cap
=
(
*
len
+
n
+
1
)
*
2
;
char
*
nb
=
(
char
*
)
ant_realloc
(
*
buf
,
new_cap
);
if
(
!
nb
)
return
false
;
*
buf
=
nb
;
*
cap
=
new_cap
;
}
memcpy
(
*
buf
+
*
len
,
data
,
n
);
*
len
+=
n
;
return
true
;
}
static
inline
ant_value_t
emit_str_replacement
(
ant_t
*
js
,
ant_value_t
replacement
,
bool
is_func
,
const
char
*
repl_ptr
,
ant_offset_t
repl_len
,
const
char
*
str_ptr
,
ant_value_t
str
,
ant_offset_t
pos
,
ant_offset_t
match_len
,
char
**
buf
,
size_t
*
buf_len
,
size_t
*
buf_cap
)
{
if
(
is_func
)
{
ant_value_t
cb_args
[
3
]
=
{
js_mkstr
(
js
,
str_ptr
+
pos
,
match_len
),
tov
((
double
)
pos
),
str
};
ant_value_t
r
=
sv_vm_call
(
js
->
vm
,
js
,
replacement
,
js_mkundef
(),
cb_args
,
3
,
NULL
,
false
);
if
(
vtype
(
r
)
==
T_ERR
)
return
r
;
ant_value_t
r_str
=
js_tostring_val
(
js
,
r
);
if
(
is_err
(
r_str
))
return
r_str
;
ant_offset_t
rlen
,
roff
=
vstr
(
js
,
r_str
,
&
rlen
);
if
(
!
str_buf_append
(
buf
,
buf_len
,
buf_cap
,
(
const
char
*
)(
uintptr_t
)
roff
,
rlen
))
return
js_mkerr
(
js
,
"oom"
);
}
else
if
(
!
str_buf_append
(
buf
,
buf_len
,
buf_cap
,
repl_ptr
,
repl_len
))
return
js_mkerr
(
js
,
"oom"
);
return
js_mkundef
();
}
static
ant_value_t
string_replace_impl
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
,
bool
replace_all
)
{
ant_value_t
this_unwrapped
=
unwrap_primitive
(
js
,
js
->
this_val
);
ant_value_t
str
=
js_tostring_val
(
js
,
this_unwrapped
);
if
(
is_err
(
str
))
return
str
;
if
(
nargs
<
1
)
return
str
;
if
(
is_object_type
(
args
[
0
]))
{
if
(
replace_all
)
{
ant_value_t
global_val
=
js_getprop_fallback
(
js
,
args
[
0
],
"global"
);
if
(
!
js_truthy
(
js
,
global_val
))
return
js_mkerr_typed
(
js
,
JS_ERR_TYPE
,
"String.prototype.replaceAll called with a non-global RegExp"
);
}
bool
called
=
false
;
ant_value_t
replacement_arg
=
nargs
>
1
?
args
[
1
]
:
js_mkundef
();
ant_value_t
call_args
[
2
]
=
{
str
,
replacement_arg
};
ant_value_t
result
=
maybe_call_symbol_method
(
js
,
args
[
0
],
get_replace_sym
(),
args
[
0
],
call_args
,
2
,
&
called
);
if
(
is_err
(
result
))
return
result
;
if
(
called
)
return
result
;
}
if
(
nargs
<
2
)
return
str
;
ant_value_t
search
=
args
[
0
];
ant_value_t
replacement
=
args
[
1
];
if
(
vtype
(
search
)
!=
T_STR
)
return
str
;
ant_offset_t
str_len
,
str_off
=
vstr
(
js
,
str
,
&
str_len
);
const
char
*
str_ptr
=
(
char
*
)(
uintptr_t
)(
str_off
);
ant_offset_t
search_len
,
search_off
=
vstr
(
js
,
search
,
&
search_len
);
const
char
*
search_ptr
=
(
char
*
)(
uintptr_t
)(
search_off
);
bool
is_func
=
(
vtype
(
replacement
)
==
T_FUNC
);
ant_offset_t
repl_len
=
0
;
const
char
*
repl_ptr
=
NULL
;
if
(
!
is_func
)
{
if
(
vtype
(
replacement
)
!=
T_STR
)
return
str
;
ant_offset_t
repl_off
=
vstr
(
js
,
replacement
,
&
repl_len
);
repl_ptr
=
(
char
*
)(
uintptr_t
)(
repl_off
);
}
if
(
!
replace_all
)
{
if
(
search_len
>
str_len
)
return
str
;
ant_offset_t
match_pos
=
0
;
bool
found
=
false
;
for
(
ant_offset_t
i
=
0
;
i
<=
str_len
-
search_len
;
i
++
)
if
(
memcmp
(
str_ptr
+
i
,
search_ptr
,
search_len
)
==
0
)
{
match_pos
=
i
;
found
=
true
;
break
;
}
if
(
!
found
)
return
str
;
size_t
cap
=
str_len
+
repl_len
+
256
,
len
=
0
;
char
*
buf
=
(
char
*
)
ant_calloc
(
cap
);
if
(
!
buf
)
return
js_mkerr
(
js
,
"oom"
);
if
(
!
str_buf_append
(
&
buf
,
&
len
,
&
cap
,
str_ptr
,
match_pos
))
{
free
(
buf
);
return
js_mkerr
(
js
,
"oom"
);
}
ant_value_t
err
=
emit_str_replacement
(
js
,
replacement
,
is_func
,
repl_ptr
,
repl_len
,
str_ptr
,
str
,
match_pos
,
search_len
,
&
buf
,
&
len
,
&
cap
);
if
(
vtype
(
err
)
==
T_ERR
)
{
free
(
buf
);
return
err
;
}
if
(
!
str_buf_append
(
&
buf
,
&
len
,
&
cap
,
str_ptr
+
match_pos
+
search_len
,
str_len
-
match_pos
-
search_len
)
)
{
free
(
buf
);
return
js_mkerr
(
js
,
"oom"
);
}
ant_value_t
ret
=
js_mkstr
(
js
,
buf
,
len
);
free
(
buf
);
return
ret
;
}
else
{
size_t
cap
=
str_len
+
repl_len
+
256
,
len
=
0
;
char
*
buf
=
(
char
*
)
ant_calloc
(
cap
);
if
(
!
buf
)
return
js_mkerr
(
js
,
"oom"
);
ant_offset_t
pos
=
0
;
bool
replaced
=
false
;
while
(
pos
+
(
ant_offset_t
)
search_len
<=
str_len
)
{
if
(
search_len
==
0
||
memcmp
(
str_ptr
+
pos
,
search_ptr
,
search_len
)
==
0
)
{
replaced
=
true
;
ant_value_t
err
=
emit_str_replacement
(
js
,
replacement
,
is_func
,
repl_ptr
,
repl_len
,
str_ptr
,
str
,
pos
,
search_len
,
&
buf
,
&
len
,
&
cap
);
if
(
vtype
(
err
)
==
T_ERR
)
{
free
(
buf
);
return
err
;
}
if
(
search_len
==
0
)
{
if
(
pos
<
str_len
&&
!
str_buf_append
(
&
buf
,
&
len
,
&
cap
,
str_ptr
+
pos
,
1
))
{
free
(
buf
);
return
js_mkerr
(
js
,
"oom"
);
}
pos
++
;
}
else
pos
+=
search_len
;
}
else
{
if
(
!
str_buf_append
(
&
buf
,
&
len
,
&
cap
,
str_ptr
+
pos
,
1
))
{
free
(
buf
);
return
js_mkerr
(
js
,
"oom"
);
}
pos
++
;
}
}
if
(
!
str_buf_append
(
&
buf
,
&
len
,
&
cap
,
str_ptr
+
pos
,
str_len
-
pos
)
)
{
free
(
buf
);
return
js_mkerr
(
js
,
"oom"
);
}
if
(
!
replaced
)
{
free
(
buf
);
return
str
;
}
ant_value_t
ret
=
js_mkstr
(
js
,
buf
,
len
);
free
(
buf
);
return
ret
;
}
}
static
ant_value_t
builtin_string_replace
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
return
string_replace_impl
(
js
,
args
,
nargs
,
false
);
}
static
ant_value_t
builtin_string_replaceAll
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
return
string_replace_impl
(
js
,
args
,
nargs
,
true
);
}
static
ant_value_t
builtin_string_search
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
this_unwrapped
=
unwrap_primitive
(
js
,
js
->
this_val
);
ant_value_t
str
=
js_tostring_val
(
js
,
this_unwrapped
);
if
(
is_err
(
str
))
return
str
;
if
(
nargs
<
1
)
return
tov
(
-1
);
if
(
is_object_type
(
args
[
0
]))
{
bool
called
=
false
;
ant_value_t
call_args
[
1
]
=
{
str
};
ant_value_t
dispatched
=
maybe_call_symbol_method
(
js
,
args
[
0
],
get_search_sym
(),
args
[
0
],
call_args
,
1
,
&
called
);
if
(
is_err
(
dispatched
))
return
dispatched
;
if
(
called
)
return
dispatched
;
}
ant_value_t
pattern
=
args
[
0
];
const
char
*
pattern_ptr
=
NULL
;
ant_offset_t
pattern_len
=
0
;
bool
ignore_case
=
false
,
multiline
=
false
;
if
(
vtype
(
pattern
)
==
T_OBJ
)
{
ant_offset_t
source_off
=
lkp
(
js
,
pattern
,
"source"
,
6
);
if
(
source_off
==
0
)
return
tov
(
-1
);
ant_value_t
source_val
=
js_propref_load
(
js
,
source_off
);
if
(
vtype
(
source_val
)
!=
T_STR
)
return
tov
(
-1
);
ant_offset_t
poff
;
poff
=
vstr
(
js
,
source_val
,
&
pattern_len
);
pattern_ptr
=
(
char
*
)(
uintptr_t
)(
poff
);
ant_offset_t
flags_off
=
lkp
(
js
,
pattern
,
"flags"
,
5
);
if
(
flags_off
!=
0
)
{
ant_value_t
flags_val
=
js_propref_load
(
js
,
flags_off
);
if
(
vtype
(
flags_val
)
==
T_STR
)
{
ant_offset_t
flen
,
foff
=
vstr
(
js
,
flags_val
,
&
flen
);
const
char
*
flags_str
=
(
char
*
)(
uintptr_t
)(
foff
);
for
(
ant_offset_t
i
=
0
;
i
<
flen
;
i
++
)
{
if
(
flags_str
[
i
]
==
'i'
)
ignore_case
=
true
;
if
(
flags_str
[
i
]
==
'm'
)
multiline
=
true
;
}
}
}
}
else
if
(
vtype
(
pattern
)
==
T_STR
)
{
ant_offset_t
poff
;
poff
=
vstr
(
js
,
pattern
,
&
pattern_len
);
pattern_ptr
=
(
char
*
)(
uintptr_t
)(
poff
);
}
else
{
return
tov
(
-1
);
}
ant_offset_t
str_len
,
str_off
=
vstr
(
js
,
str
,
&
str_len
);
const
char
*
str_ptr
=
(
char
*
)(
uintptr_t
)(
str_off
);
char
pcre2_pattern
[
4096
];
size_t
pcre2_len
=
js_to_pcre2_pattern
(
pattern_ptr
,
pattern_len
,
pcre2_pattern
,
sizeof
(
pcre2_pattern
),
false
);
uint32_t
options
=
PCRE2_UTF
|
PCRE2_UCP
|
PCRE2_MATCH_UNSET_BACKREF
|
PCRE2_DUPNAMES
;
if
(
ignore_case
)
options
|=
PCRE2_CASELESS
;
if
(
multiline
)
options
|=
PCRE2_MULTILINE
;
int
errcode
;
PCRE2_SIZE
erroffset
;
pcre2_code
*
re
=
pcre2_compile
((
PCRE2_SPTR
)
pcre2_pattern
,
pcre2_len
,
options
,
&
errcode
,
&
erroffset
,
NULL
);
if
(
re
==
NULL
)
return
tov
(
-1
);
pcre2_match_data
*
match_data
=
pcre2_match_data_create_from_pattern
(
re
,
NULL
);
int
rc
=
pcre2_match
(
re
,
(
PCRE2_SPTR
)
str_ptr
,
str_len
,
0
,
0
,
match_data
,
NULL
);
if
(
rc
<
0
)
{
pcre2_match_data_free
(
match_data
);
pcre2_code_free
(
re
);
return
tov
(
-1
);
}
PCRE2_SIZE
*
ovector
=
pcre2_get_ovector_pointer
(
match_data
);
double
result
=
(
double
)
ovector
[
0
];
pcre2_match_data_free
(
match_data
);
pcre2_code_free
(
re
);
return
tov
(
result
);
}
static
ant_value_t
builtin_string_match
(
ant_t
*
js
,
ant_value_t
*
args
,
int
nargs
)
{
ant_value_t
this_unwrapped
=
unwrap_primitive
(
js
,
js
->
this_val
);
ant_value_t
str
=
js_tostring_val
(
js
,
this_unwrapped
);
if
(
is_err
(
str
))
return
str
;
if
(
nargs
<
1
)
return
js_mknull
();
if
(
is_object_type
(
args
[
0
]))
{
bool
called
=
false
;
ant_value_t
call_args
[
1
]
=
{
str
};
ant_value_t
dispatched
=
maybe_call_symbol_method
(
js
,
args
[
0
],
get_match_sym
(),
args
[
0
],
call_args
,
1
,
&
called
);
if
(
is_err
(
dispatched
))
return
dispatched
;
if
(
called
)
return
dispatched
;
}
ant_value_t
pattern
=
args
[
0
];
const
char
*
pattern_ptr
=
NULL
;
ant_offset_t
pattern_len
=
0
;
bool
global_flag
=
false
;
bool
ignore_case
=
false
;
bool
multiline
=
false
;
if
(
vtype
(
pattern
)
==
T_OBJ
)
{
ant_offset_t
source_off
=
lkp
(
js
,
pattern
,
"source"
,
6
);
if
(
source_off
==
0
)
return
js_mknull
();
ant_value_t
source_val
=
js_propref_load
(
js
,
source_off
);
if
(
vtype
(
source_val
)
!=
T_STR
)
return
js_mknull
();
ant_offset_t
poff
;
poff
=
vstr
(
js
,
source_val
,
&
pattern_len
);
pattern_ptr
=
(
char
*
)(
uintptr_t
)(
poff
);
ant_offset_t
flags_off
=
lkp
(
js
,
pattern
,
"flags"
,
5
);
if
(
flags_off
!=
0
)
{
ant_value_t
flags_val
=
js_propref_load
(
js
,
flags_off
);
if
(
vtype
(
flags_val
)
==
T_STR
)
{
ant_offset_t
flen
,
foff
=
vstr
(
js
,
flags_val
,
&
flen
);
const
char
*
flags_str
=
(
char
*
)(
uintptr_t
)(
foff
);
for
(
ant_offset_t
i
=
0
;
i
<
flen
;
i
++
)
{
if
(
flags_str
[
i
]
==
'g'
)
global_flag
=
true
;
if
(
flags_str
[
i
]
==
'i'
)
ignore_case
=
true
;
if
(
flags_str
[
i
]
==
'm'
)
multiline
=
true
;
}}
}
}
else
if
(
vtype
(
pattern
)
==
T_STR
)
{
ant_offset_t
poff
;
poff
=
vstr
(
js
,
pattern
,
&
pattern_len
);
pattern_ptr
=
(
char
*
)(
uintptr_t
)(
poff
);
}
else
return
js_mknull
();
ant_offset_t
str_len
,
str_off
=
vstr
(
js
,
str
,
&
str_len
);
const
char
*
str_ptr
=
(
char
*
)(
uintptr_t
)(
str_off
);
ant_value_t
result
=
do_regex_match_pcre2
(
js
,
(
regex_match_args_t
){
.
pattern_ptr
=
pattern_ptr
,
.
pattern_len
=
pattern_len
,
.
str_ptr
=
str_ptr
,
.
str_len
=
str_len
,
.
global
=
global_flag
,
.
ignore_case
=
ignore_case
,
.
multiline
=
multiline
,
});
if
(
!
global_flag
&&
vtype
(
result
)
==
T_ARR
)
{
js_setprop
(
js
,
result
,
js_mkstr
(
js
,
"input"
,
5
),
str
);
}
return
result
;
}
void
init_regex_module
(
void
)
{
ant_t
*
js
=
rt
->
js
;
ant_value_t
glob
=
js
->
global
;
ant_value_t
object_proto
=
js
->
sym
.
object_proto
;
ant_value_t
regexp_proto
=
js_mkobj
(
js
);
js_set_proto_init
(
regexp_proto
,
object_proto
);
defmethod
(
js
,
regexp_proto
,
"test"
,
4
,
js_mkfun
(
builtin_regexp_test
));
defmethod
(
js
,
regexp_proto
,
"exec"
,
4
,
js_mkfun
(
builtin_regexp_exec
));
defmethod
(
js
,
regexp_proto
,
"toString"
,
8
,
js_mkfun
(
builtin_regexp_toString
));
js_mkprop_fast
(
js
,
regexp_proto
,
"global"
,
6
,
js_false
);
js_mkprop_fast
(
js
,
regexp_proto
,
"ignoreCase"
,
10
,
js_false
);
js_mkprop_fast
(
js
,
regexp_proto
,
"multiline"
,
9
,
js_false
);
js_mkprop_fast
(
js
,
regexp_proto
,
"dotAll"
,
6
,
js_false
);
js_mkprop_fast
(
js
,
regexp_proto
,
"unicode"
,
7
,
js_false
);
js_mkprop_fast
(
js
,
regexp_proto
,
"sticky"
,
6
,
js_false
);
js_mkprop_fast
(
js
,
regexp_proto
,
"hasIndices"
,
10
,
js_false
);
js_mkprop_fast
(
js
,
regexp_proto
,
"unicodeSets"
,
11
,
js_false
);
js_set_sym
(
js
,
regexp_proto
,
get_split_sym
(),
js_mkfun
(
builtin_regexp_symbol_split
));
js_set_sym
(
js
,
regexp_proto
,
get_match_sym
(),
js_mkfun
(
builtin_regexp_symbol_match
));
js_set_sym
(
js
,
regexp_proto
,
get_matchAll_sym
(),
js_mkfun
(
builtin_regexp_symbol_matchAll
));
regexp_matchall_iter_proto_val
=
js_mkobj
(
js
);
js_set_proto_init
(
regexp_matchall_iter_proto_val
,
js
->
sym
.
iterator_proto
);
defmethod
(
js
,
regexp_matchall_iter_proto_val
,
"next"
,
4
,
js_mkfun
(
regexp_matchall_next
));
js_set_sym
(
js
,
regexp_matchall_iter_proto_val
,
get_iterator_sym
(),
js_mkfun
(
sym_this_cb
));
js_set_sym
(
js
,
regexp_proto
,
get_replace_sym
(),
js_mkfun
(
builtin_regexp_symbol_replace
));
js_set_sym
(
js
,
regexp_proto
,
get_search_sym
(),
js_mkfun
(
builtin_regexp_symbol_search
));
js_set_sym
(
js
,
regexp_proto
,
get_toStringTag_sym
(),
js_mkstr
(
js
,
"RegExp"
,
6
));
js_set_getter_desc
(
js
,
regexp_proto
,
"flags"
,
5
,
js_mkfun
(
builtin_regexp_flags_getter
),
JS_DESC_C
);
defmethod
(
js
,
regexp_proto
,
"compile"
,
7
,
js_mkfun
(
builtin_regexp_compile
));
ant_value_t
regexp_ctor
=
js_mkobj
(
js
);
js_set_slot
(
regexp_ctor
,
SLOT_CFUNC
,
js_mkfun
(
builtin_RegExp
));
js_mkprop_fast
(
js
,
regexp_ctor
,
"prototype"
,
9
,
regexp_proto
);
js_mkprop_fast
(
js
,
regexp_ctor
,
"name"
,
4
,
js_mkstr
(
js
,
"RegExp"
,
6
));
js_set_descriptor
(
js
,
regexp_ctor
,
"name"
,
4
,
0
);
js_define_species_getter
(
js
,
regexp_ctor
);
ant_value_t
regexp_func
=
js_obj_to_func
(
regexp_ctor
);
js_setprop
(
js
,
regexp_proto
,
js_mkstr
(
js
,
"constructor"
,
11
),
regexp_func
);
js_set_descriptor
(
js
,
regexp_proto
,
"constructor"
,
11
,
JS_DESC_W
|
JS_DESC_C
);
js_set
(
js
,
regexp_ctor
,
"escape"
,
js_mkfun
(
builtin_regexp_escape
));
ant_value_t
empty
=
js_mkstr
(
js
,
""
,
0
);
for
(
int
i
=
1
;
i
<=
9
;
i
++
)
{
char
key
[
3
]
=
{
'$'
,
(
char
)(
'0'
+
i
),
'\0'
};
js_set
(
js
,
regexp_ctor
,
key
,
empty
);
}
js_set
(
js
,
regexp_ctor
,
"lastMatch"
,
empty
);
js_set
(
js
,
regexp_ctor
,
"$&"
,
empty
);
js_set
(
js
,
glob
,
"RegExp"
,
regexp_func
);
ant_value_t
string_ctor
=
js_get
(
js
,
glob
,
"String"
);
ant_value_t
string_proto
=
js_get
(
js
,
string_ctor
,
"prototype"
);
defmethod
(
js
,
string_proto
,
"search"
,
6
,
js_mkfun
(
builtin_string_search
));
defmethod
(
js
,
string_proto
,
"match"
,
5
,
js_mkfun
(
builtin_string_match
));
defmethod
(
js
,
string_proto
,
"matchAll"
,
8
,
js_mkfun
(
builtin_string_matchAll
));
defmethod
(
js
,
string_proto
,
"replace"
,
7
,
js_mkfun
(
builtin_string_replace
));
defmethod
(
js
,
string_proto
,
"replaceAll"
,
10
,
js_mkfun
(
builtin_string_replaceAll
));
}
void
gc_sweep_regex_cache
(
void
)
{
size_t
write
=
0
;
for
(
size_t
i
=
0
;
i
<
regex_cache_count
;
i
++
)
{
if
(
!
gc_obj_is_marked
(
regex_cache
[
i
].
obj
))
{
pcre2_match_data_free
(
regex_cache
[
i
].
match_data
);
pcre2_code_free
(
regex_cache
[
i
].
code
);
}
else
{
if
(
write
!=
i
)
regex_cache
[
write
]
=
regex_cache
[
i
];
write
++
;
}
}
regex_cache_count
=
write
;
}
void
cleanup_regex_module
(
void
)
{
for
(
size_t
i
=
0
;
i
<
regex_cache_count
;
i
++
)
{
pcre2_match_data_free
(
regex_cache
[
i
].
match_data
);
pcre2_code_free
(
regex_cache
[
i
].
code
);
}
free
(
regex_cache
);
regex_cache
=
NULL
;
regex_cache_count
=
0
;
regex_cache_cap
=
0
;
}
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Sat, May 2, 6:15 AM (1 d, 23 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
541769
Default Alt Text
regex.c (76 KB)
Attached To
Mode
rANT Ant
Attached
Detach File
Event Timeline
Log In to Comment