Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F984938
PhutilSearchQueryCompiler.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
PhutilSearchQueryCompiler.php
View Options
<?php
final
class
PhutilSearchQueryCompiler
extends
Phobject
{
private
$operators
=
'+ -><()~*:""&|'
;
private
$query
;
private
$stemmer
;
private
$enableFunctions
=
false
;
const
OPERATOR_NOT
=
'not'
;
const
OPERATOR_AND
=
'and'
;
const
OPERATOR_SUBSTRING
=
'sub'
;
const
OPERATOR_EXACT
=
'exact'
;
const
OPERATOR_ABSENT
=
'absent'
;
const
OPERATOR_PRESENT
=
'present'
;
public
function
setOperators
(
$operators
)
{
$this
->
operators
=
$operators
;
return
$this
;
}
public
function
getOperators
()
{
return
$this
->
operators
;
}
public
function
setStemmer
(
PhutilSearchStemmer
$stemmer
)
{
$this
->
stemmer
=
$stemmer
;
return
$this
;
}
public
function
getStemmer
()
{
return
$this
->
stemmer
;
}
public
function
setEnableFunctions
(
$enable_functions
)
{
$this
->
enableFunctions
=
$enable_functions
;
return
$this
;
}
public
function
getEnableFunctions
()
{
return
$this
->
enableFunctions
;
}
public
function
compileQuery
(
array
$tokens
)
{
assert_instances_of
(
$tokens
,
'PhutilSearchQueryToken'
);
$result
=
array
();
foreach
(
$tokens
as
$token
)
{
$result
[]
=
$this
->
renderToken
(
$token
);
}
return
$this
->
compileRenderedTokens
(
$result
);
}
public
function
compileLiteralQuery
(
array
$tokens
)
{
assert_instances_of
(
$tokens
,
'PhutilSearchQueryToken'
);
$result
=
array
();
foreach
(
$tokens
as
$token
)
{
if
(!
$token
->
isQuoted
())
{
continue
;
}
$result
[]
=
$this
->
renderToken
(
$token
);
}
return
$this
->
compileRenderedTokens
(
$result
);
}
public
function
compileStemmedQuery
(
array
$tokens
)
{
assert_instances_of
(
$tokens
,
'PhutilSearchQueryToken'
);
$result
=
array
();
foreach
(
$tokens
as
$token
)
{
if
(
$token
->
isQuoted
())
{
continue
;
}
$result
[]
=
$this
->
renderToken
(
$token
,
$this
->
getStemmer
());
}
return
$this
->
compileRenderedTokens
(
$result
);
}
private
function
compileRenderedTokens
(
array
$list
)
{
if
(!
$list
)
{
return
null
;
}
$list
=
array_unique
(
$list
);
return
implode
(
' '
,
$list
);
}
public
function
newTokens
(
$query
)
{
$results
=
$this
->
tokenizeQuery
(
$query
);
$tokens
=
array
();
foreach
(
$results
as
$result
)
{
$tokens
[]
=
PhutilSearchQueryToken
::
newFromDictionary
(
$result
);
}
return
$tokens
;
}
private
function
tokenizeQuery
(
$query
)
{
$maximum_bytes
=
1024
;
$query_bytes
=
strlen
(
$query
);
if
(
$query_bytes
>
$maximum_bytes
)
{
throw
new
PhutilSearchQueryCompilerSyntaxException
(
pht
(
'Query is too long (%s bytes, maximum is %s bytes).'
,
new
PhutilNumber
(
$query_bytes
),
new
PhutilNumber
(
$maximum_bytes
)));
}
$query
=
phutil_utf8v
(
$query
);
$length
=
count
(
$query
);
$enable_functions
=
$this
->
getEnableFunctions
();
$mode
=
'scan'
;
$current_operator
=
array
();
$current_token
=
array
();
$current_function
=
null
;
$is_quoted
=
false
;
$tokens
=
array
();
if
(
$enable_functions
)
{
$operator_characters
=
'[~=+-]'
;
}
else
{
$operator_characters
=
'[+-]'
;
}
for
(
$ii
=
0
;
$ii
<
$length
;
$ii
++)
{
$character
=
$query
[
$ii
];
if
(
$mode
==
'scan'
)
{
if
(
preg_match
(
'/^
\s\z
/u'
,
$character
))
{
continue
;
}
$mode
=
'function'
;
}
if
(
$mode
==
'function'
)
{
$mode
=
'operator'
;
if
(
$enable_functions
)
{
$found
=
false
;
for
(
$jj
=
$ii
;
$jj
<
$length
;
$jj
++)
{
if
(
preg_match
(
'/^[a-zA-Z]
\z
/u'
,
$query
[
$jj
]))
{
continue
;
}
if
(
$query
[
$jj
]
==
':'
)
{
$found
=
$jj
;
}
break
;
}
if
(
$found
!==
false
)
{
$function
=
array_slice
(
$query
,
$ii
,
(
$jj
-
$ii
));
$current_function
=
implode
(
''
,
$function
);
if
(!
strlen
(
$current_function
))
{
$current_function
=
null
;
}
$ii
=
$jj
;
continue
;
}
}
}
if
(
$mode
==
'operator'
)
{
if
(
preg_match
(
'/^
\s\z
/u'
,
$character
))
{
continue
;
}
if
(
preg_match
(
'/^'
.
$operator_characters
.
'
\z
/'
,
$character
))
{
$current_operator
[]
=
$character
;
continue
;
}
$mode
=
'quote'
;
}
if
(
$mode
==
'quote'
)
{
if
(
preg_match
(
'/^"
\z
/'
,
$character
))
{
$is_quoted
=
true
;
$mode
=
'token'
;
continue
;
}
$mode
=
'token'
;
}
if
(
$mode
==
'token'
)
{
$capture
=
false
;
$was_quoted
=
$is_quoted
;
if
(
$is_quoted
)
{
if
(
preg_match
(
'/^"
\z
/'
,
$character
))
{
$capture
=
true
;
$mode
=
'scan'
;
$is_quoted
=
false
;
}
}
else
{
if
(
preg_match
(
'/^
\s\z
/u'
,
$character
))
{
$capture
=
true
;
$mode
=
'scan'
;
}
if
(
preg_match
(
'/^"
\z
/'
,
$character
))
{
$capture
=
true
;
$mode
=
'token'
;
$is_quoted
=
true
;
}
}
if
(
$capture
)
{
$token
=
array
(
'operator'
=>
$current_operator
,
'quoted'
=>
$was_quoted
,
'value'
=>
$current_token
,
);
if
(
$enable_functions
)
{
$token
[
'function'
]
=
$current_function
;
}
$tokens
[]
=
$token
;
$current_operator
=
array
();
$current_token
=
array
();
$current_function
=
null
;
continue
;
}
else
{
$current_token
[]
=
$character
;
}
}
}
if
(
$is_quoted
)
{
throw
new
PhutilSearchQueryCompilerSyntaxException
(
pht
(
'Query contains unmatched double quotes.'
));
}
// If the input query has trailing space, like "a b ", we may exit the
// parser without a final token.
if
(
$current_function
!==
null
||
$current_operator
||
$current_token
)
{
$token
=
array
(
'operator'
=>
$current_operator
,
'quoted'
=>
false
,
'value'
=>
$current_token
,
);
if
(
$enable_functions
)
{
$token
[
'function'
]
=
$current_function
;
}
$tokens
[]
=
$token
;
}
$results
=
array
();
foreach
(
$tokens
as
$token
)
{
$value
=
implode
(
''
,
$token
[
'value'
]);
$operator_string
=
implode
(
''
,
$token
[
'operator'
]);
$is_quoted
=
$token
[
'quoted'
];
switch
(
$operator_string
)
{
case
'-'
:
$operator
=
self
::
OPERATOR_NOT
;
break
;
case
'~'
:
$operator
=
self
::
OPERATOR_SUBSTRING
;
break
;
case
'='
:
$operator
=
self
::
OPERATOR_EXACT
;
break
;
case
'+'
:
$operator
=
self
::
OPERATOR_AND
;
break
;
case
''
:
// See T12995. If this query term contains Chinese, Japanese or
// Korean characters, treat the term as a substring term by default.
// These languages do not separate words with spaces, so the term
// search mode is normally useless.
if
(
$enable_functions
&&
!
$is_quoted
&&
phutil_utf8_is_cjk
(
$value
))
{
$operator
=
self
::
OPERATOR_SUBSTRING
;
}
else
{
$operator
=
self
::
OPERATOR_AND
;
}
break
;
default
:
throw
new
PhutilSearchQueryCompilerSyntaxException
(
pht
(
'Query has an invalid sequence of operators ("%s").'
,
$operator_string
));
}
if
(!
strlen
(
$value
))
{
$require_value
=
$is_quoted
;
switch
(
$operator
)
{
case
self
::
OPERATOR_NOT
:
if
(
$enable_functions
&&
(
$token
[
'function'
]
!==
null
))
{
$operator
=
self
::
OPERATOR_ABSENT
;
$value
=
null
;
}
else
{
$require_value
=
true
;
}
break
;
case
self
::
OPERATOR_SUBSTRING
:
if
(
$enable_functions
&&
(
$token
[
'function'
]
!==
null
))
{
$operator
=
self
::
OPERATOR_PRESENT
;
$value
=
null
;
}
else
{
$require_value
=
true
;
}
break
;
default
:
$require_value
=
true
;
break
;
}
if
(
$require_value
)
{
throw
new
PhutilSearchQueryCompilerSyntaxException
(
pht
(
'Query contains a token ("%s") with no search term. Query '
.
'tokens specify text to search for.'
,
$this
->
getDisplayToken
(
$token
)));
}
}
$result
=
array
(
'operator'
=>
$operator
,
'quoted'
=>
$is_quoted
,
'value'
=>
$value
,
);
if
(
$enable_functions
)
{
$result
[
'function'
]
=
$token
[
'function'
];
}
$results
[]
=
$result
;
}
return
$results
;
}
private
function
renderToken
(
PhutilSearchQueryToken
$token
,
PhutilSearchStemmer
$stemmer
=
null
)
{
$value
=
$token
->
getValue
();
if
(
$stemmer
)
{
$value
=
$stemmer
->
stemToken
(
$value
);
}
$value
=
$this
->
quoteToken
(
$value
);
$operator
=
$token
->
getOperator
();
$prefix
=
$this
->
getOperatorPrefix
(
$operator
);
$value
=
$prefix
.
$value
;
return
$value
;
}
private
function
getOperatorPrefix
(
$operator
)
{
$operators
=
$this
->
operators
;
switch
(
$operator
)
{
case
self
::
OPERATOR_AND
:
$prefix
=
$operators
[
0
];
break
;
case
self
::
OPERATOR_NOT
:
$prefix
=
$operators
[
2
];
break
;
default
:
throw
new
PhutilSearchQueryCompilerSyntaxException
(
pht
(
'Unsupported operator prefix "%s".'
,
$operator
));
}
if
(
$prefix
==
' '
)
{
$prefix
=
null
;
}
return
$prefix
;
}
private
function
quoteToken
(
$value
)
{
$operators
=
$this
->
operators
;
$open_quote
=
$this
->
operators
[
10
];
$close_quote
=
$this
->
operators
[
11
];
return
$open_quote
.
$value
.
$close_quote
;
}
private
function
getDisplayToken
(
array
$token
)
{
if
(
isset
(
$token
[
'function'
]))
{
$function
=
$token
[
'function'
].
':'
;
}
else
{
$function
=
''
;
}
$operator_string
=
implode
(
''
,
$token
[
'operator'
]);
$value
=
implode
(
''
,
$token
[
'value'
]);
$is_quoted
=
$token
[
'quoted'
];
if
(
$is_quoted
)
{
$value
=
$this
->
quoteToken
(
$value
);
}
return
sprintf
(
'%s%s%s'
,
$function
,
$operator_string
,
$value
);
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Wed, Jun 18, 3:45 PM (2 d)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
238074
Default Alt Text
PhutilSearchQueryCompiler.php (10 KB)
Attached To
Mode
rP Phorge
Attached
Detach File
Event Timeline
Log In to Comment