Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F436995
PhabricatorFerretFulltextEngineExtension.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
PhabricatorFerretFulltextEngineExtension.php
View Options
<?php
final
class
PhabricatorFerretFulltextEngineExtension
extends
PhabricatorFulltextEngineExtension
{
const
EXTENSIONKEY
=
'ferret'
;
public
function
getExtensionName
()
{
return
pht
(
'Ferret Fulltext Engine'
);
}
public
function
shouldIndexFulltextObject
(
$object
)
{
return
(
$object
instanceof
PhabricatorFerretInterface
);
}
public
function
indexFulltextObject
(
$object
,
PhabricatorSearchAbstractDocument
$document
)
{
$phid
=
$document
->
getPHID
();
$engine
=
$object
->
newFerretEngine
();
$is_closed
=
0
;
$author_phid
=
null
;
$owner_phid
=
null
;
foreach
(
$document
->
getRelationshipData
()
as
$relationship
)
{
list
(
$related_type
,
$related_phid
)
=
$relationship
;
switch
(
$related_type
)
{
case
PhabricatorSearchRelationship
::
RELATIONSHIP_OPEN
:
$is_closed
=
0
;
break
;
case
PhabricatorSearchRelationship
::
RELATIONSHIP_CLOSED
:
$is_closed
=
1
;
break
;
case
PhabricatorSearchRelationship
::
RELATIONSHIP_OWNER
:
$owner_phid
=
$related_phid
;
break
;
case
PhabricatorSearchRelationship
::
RELATIONSHIP_UNOWNED
:
$owner_phid
=
null
;
break
;
case
PhabricatorSearchRelationship
::
RELATIONSHIP_AUTHOR
:
$author_phid
=
$related_phid
;
break
;
}
}
$stemmer
=
$engine
->
newStemmer
();
// Copy all of the "title" and "body" fields to create new "core" fields.
// This allows users to search "in title or body" with the "core:" prefix.
$document_fields
=
$document
->
getFieldData
();
$virtual_fields
=
array
();
foreach
(
$document_fields
as
$field
)
{
$virtual_fields
[]
=
$field
;
list
(
$key
,
$raw_corpus
)
=
$field
;
switch
(
$key
)
{
case
PhabricatorSearchDocumentFieldType
::
FIELD_TITLE
:
case
PhabricatorSearchDocumentFieldType
::
FIELD_BODY
:
$virtual_fields
[]
=
array
(
PhabricatorSearchDocumentFieldType
::
FIELD_CORE
,
$raw_corpus
,
);
break
;
}
$virtual_fields
[]
=
array
(
PhabricatorSearchDocumentFieldType
::
FIELD_ALL
,
$raw_corpus
,
);
}
$empty_template
=
array
(
'raw'
=>
array
(),
'term'
=>
array
(),
'normal'
=>
array
(),
);
$ferret_corpus_map
=
array
();
foreach
(
$virtual_fields
as
$field
)
{
list
(
$key
,
$raw_corpus
)
=
$field
;
if
(!
strlen
(
$raw_corpus
))
{
continue
;
}
$term_corpus
=
$engine
->
newTermsCorpus
(
$raw_corpus
);
$normal_corpus
=
$stemmer
->
stemCorpus
(
$raw_corpus
);
$normal_corpus
=
$engine
->
newTermsCorpus
(
$normal_corpus
);
if
(!
isset
(
$ferret_corpus_map
[
$key
]))
{
$ferret_corpus_map
[
$key
]
=
$empty_template
;
}
$ferret_corpus_map
[
$key
][
'raw'
][]
=
$raw_corpus
;
$ferret_corpus_map
[
$key
][
'term'
][]
=
$term_corpus
;
$ferret_corpus_map
[
$key
][
'normal'
][]
=
$normal_corpus
;
}
$ferret_fields
=
array
();
$ngrams_source
=
array
();
foreach
(
$ferret_corpus_map
as
$key
=>
$fields
)
{
$raw_corpus
=
$fields
[
'raw'
];
$raw_corpus
=
implode
(
"
\n
"
,
$raw_corpus
);
if
(
strlen
(
$raw_corpus
))
{
$ngrams_source
[]
=
$raw_corpus
;
}
$normal_corpus
=
$fields
[
'normal'
];
$normal_corpus
=
implode
(
"
\n
"
,
$normal_corpus
);
if
(
strlen
(
$normal_corpus
))
{
$ngrams_source
[]
=
$normal_corpus
;
}
$term_corpus
=
$fields
[
'term'
];
$term_corpus
=
implode
(
"
\n
"
,
$term_corpus
);
if
(
strlen
(
$term_corpus
))
{
$ngrams_source
[]
=
$term_corpus
;
}
$ferret_fields
[]
=
array
(
'fieldKey'
=>
$key
,
'rawCorpus'
=>
$raw_corpus
,
'termCorpus'
=>
$term_corpus
,
'normalCorpus'
=>
$normal_corpus
,
);
}
$ngrams_source
=
implode
(
"
\n
"
,
$ngrams_source
);
$ngram_engine
=
new
PhabricatorSearchNgramEngine
();
$ngrams
=
$ngram_engine
->
getTermNgramsFromString
(
$ngrams_source
);
$conn
=
$object
->
establishConnection
(
'w'
);
if
(
$ngrams
)
{
$common
=
queryfx_all
(
$conn
,
'SELECT ngram FROM %T WHERE ngram IN (%Ls)'
,
$engine
->
getCommonNgramsTableName
(),
$ngrams
);
$common
=
ipull
(
$common
,
'ngram'
,
'ngram'
);
foreach
(
$ngrams
as
$key
=>
$ngram
)
{
if
(
isset
(
$common
[
$ngram
]))
{
unset
(
$ngrams
[
$key
]);
continue
;
}
// NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
$trimmed_ngram
=
rtrim
(
$ngram
,
' '
);
if
(
isset
(
$common
[
$trimmed_ngram
]))
{
unset
(
$ngrams
[
$key
]);
continue
;
}
}
}
$object
->
openTransaction
();
try
{
// See T13587. If this document already exists in the index, we try to
// update the existing rows to avoid leaving the ngrams table heavily
// fragmented.
$old_document
=
queryfx_one
(
$conn
,
'SELECT id FROM %T WHERE objectPHID = %s'
,
$engine
->
getDocumentTableName
(),
$object
->
getPHID
());
if
(
$old_document
)
{
$old_document_id
=
(
int
)
$old_document
[
'id'
];
}
else
{
$old_document_id
=
null
;
}
if
(
$old_document_id
===
null
)
{
queryfx
(
$conn
,
'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified,
authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)'
,
$engine
->
getDocumentTableName
(),
$object
->
getPHID
(),
$is_closed
,
$document
->
getDocumentCreated
(),
$document
->
getDocumentModified
(),
$author_phid
,
$owner_phid
);
$document_id
=
$conn
->
getInsertID
();
$is_new
=
true
;
}
else
{
$document_id
=
$old_document_id
;
queryfx
(
$conn
,
'UPDATE %T
SET
isClosed = %d,
epochCreated = %d,
epochModified = %d,
authorPHID = %ns,
ownerPHID = %ns
WHERE id = %d'
,
$engine
->
getDocumentTableName
(),
$is_closed
,
$document
->
getDocumentCreated
(),
$document
->
getDocumentModified
(),
$author_phid
,
$owner_phid
,
$document_id
);
$is_new
=
false
;
}
$this
->
updateStoredFields
(
$conn
,
$is_new
,
$document_id
,
$engine
,
$ferret_fields
);
$this
->
updateStoredNgrams
(
$conn
,
$is_new
,
$document_id
,
$engine
,
$ngrams
);
}
catch
(
Exception
$ex
)
{
$object
->
killTransaction
();
throw
$ex
;
}
catch
(
Throwable
$ex
)
{
$object
->
killTransaction
();
throw
$ex
;
}
$object
->
saveTransaction
();
}
private
function
updateStoredFields
(
AphrontDatabaseConnection
$conn
,
$is_new
,
$document_id
,
PhabricatorFerretEngine
$engine
,
$new_fields
)
{
if
(!
$is_new
)
{
$old_fields
=
queryfx_all
(
$conn
,
'SELECT * FROM %T WHERE documentID = %d'
,
$engine
->
getFieldTableName
(),
$document_id
);
}
else
{
$old_fields
=
array
();
}
$old_fields
=
ipull
(
$old_fields
,
null
,
'fieldKey'
);
$new_fields
=
ipull
(
$new_fields
,
null
,
'fieldKey'
);
$delete_rows
=
array
();
$insert_rows
=
array
();
$update_rows
=
array
();
foreach
(
$old_fields
as
$field_key
=>
$old_field
)
{
if
(!
isset
(
$new_fields
[
$field_key
]))
{
$delete_rows
[]
=
$old_field
;
}
}
$compare_keys
=
array
(
'rawCorpus'
,
'termCorpus'
,
'normalCorpus'
,
);
foreach
(
$new_fields
as
$field_key
=>
$new_field
)
{
if
(!
isset
(
$old_fields
[
$field_key
]))
{
$insert_rows
[]
=
$new_field
;
continue
;
}
$old_field
=
$old_fields
[
$field_key
];
$same_row
=
true
;
foreach
(
$compare_keys
as
$compare_key
)
{
if
(
$old_field
[
$compare_key
]
!==
$new_field
[
$compare_key
])
{
$same_row
=
false
;
break
;
}
}
if
(
$same_row
)
{
continue
;
}
$new_field
[
'id'
]
=
$old_field
[
'id'
];
$update_rows
[]
=
$new_field
;
}
if
(
$delete_rows
)
{
queryfx
(
$conn
,
'DELETE FROM %T WHERE id IN (%Ld)'
,
$engine
->
getFieldTableName
(),
ipull
(
$delete_rows
,
'id'
));
}
foreach
(
$update_rows
as
$update_row
)
{
queryfx
(
$conn
,
'UPDATE %T
SET
rawCorpus = %s,
termCorpus = %s,
normalCorpus = %s
WHERE id = %d'
,
$engine
->
getFieldTableName
(),
$update_row
[
'rawCorpus'
],
$update_row
[
'termCorpus'
],
$update_row
[
'normalCorpus'
],
$update_row
[
'id'
]);
}
foreach
(
$insert_rows
as
$insert_row
)
{
queryfx
(
$conn
,
'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus,
normalCorpus) VALUES (%d, %s, %s, %s, %s)'
,
$engine
->
getFieldTableName
(),
$document_id
,
$insert_row
[
'fieldKey'
],
$insert_row
[
'rawCorpus'
],
$insert_row
[
'termCorpus'
],
$insert_row
[
'normalCorpus'
]);
}
}
private
function
updateStoredNgrams
(
AphrontDatabaseConnection
$conn
,
$is_new
,
$document_id
,
PhabricatorFerretEngine
$engine
,
$new_ngrams
)
{
if
(
$is_new
)
{
$old_ngrams
=
array
();
}
else
{
$old_ngrams
=
queryfx_all
(
$conn
,
'SELECT id, ngram FROM %T WHERE documentID = %d'
,
$engine
->
getNgramsTableName
(),
$document_id
);
}
$old_ngrams
=
ipull
(
$old_ngrams
,
'id'
,
'ngram'
);
$new_ngrams
=
array_fuse
(
$new_ngrams
);
$delete_ids
=
array
();
$insert_ngrams
=
array
();
// NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
foreach
(
$old_ngrams
as
$ngram
=>
$id
)
{
if
(
isset
(
$new_ngrams
[
$ngram
]))
{
continue
;
}
$untrimmed_ngram
=
$ngram
.
' '
;
if
(
isset
(
$new_ngrams
[
$untrimmed_ngram
]))
{
continue
;
}
$delete_ids
[]
=
$id
;
}
foreach
(
$new_ngrams
as
$ngram
)
{
if
(
isset
(
$old_ngrams
[
$ngram
]))
{
continue
;
}
$trimmed_ngram
=
rtrim
(
$ngram
,
' '
);
if
(
isset
(
$old_ngrams
[
$trimmed_ngram
]))
{
continue
;
}
$insert_ngrams
[]
=
$ngram
;
}
if
(
$delete_ids
)
{
$sql
=
array
();
foreach
(
$delete_ids
as
$id
)
{
$sql
[]
=
qsprintf
(
$conn
,
'%d'
,
$id
);
}
foreach
(
PhabricatorLiskDAO
::
chunkSQL
(
$sql
)
as
$chunk
)
{
queryfx
(
$conn
,
'DELETE FROM %T WHERE id IN (%LQ)'
,
$engine
->
getNgramsTableName
(),
$chunk
);
}
}
if
(
$insert_ngrams
)
{
$sql
=
array
();
foreach
(
$insert_ngrams
as
$ngram
)
{
$sql
[]
=
qsprintf
(
$conn
,
'(%d, %s)'
,
$document_id
,
$ngram
);
}
foreach
(
PhabricatorLiskDAO
::
chunkSQL
(
$sql
)
as
$chunk
)
{
queryfx
(
$conn
,
'INSERT INTO %T (documentID, ngram) VALUES %LQ'
,
$engine
->
getNgramsTableName
(),
$chunk
);
}
}
}
public
function
newFerretSearchFunctions
()
{
return
array
(
id
(
new
FerretConfigurableSearchFunction
())
->
setFerretFunctionName
(
'all'
)
->
setFerretFieldKey
(
PhabricatorSearchDocumentFieldType
::
FIELD_ALL
),
id
(
new
FerretConfigurableSearchFunction
())
->
setFerretFunctionName
(
'title'
)
->
setFerretFieldKey
(
PhabricatorSearchDocumentFieldType
::
FIELD_TITLE
),
id
(
new
FerretConfigurableSearchFunction
())
->
setFerretFunctionName
(
'body'
)
->
setFerretFieldKey
(
PhabricatorSearchDocumentFieldType
::
FIELD_BODY
),
id
(
new
FerretConfigurableSearchFunction
())
->
setFerretFunctionName
(
'core'
)
->
setFerretFieldKey
(
PhabricatorSearchDocumentFieldType
::
FIELD_CORE
),
id
(
new
FerretConfigurableSearchFunction
())
->
setFerretFunctionName
(
'comment'
)
->
setFerretFieldKey
(
PhabricatorSearchDocumentFieldType
::
FIELD_COMMENT
),
);
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sun, May 4, 12:31 AM (1 d, 13 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
62952
Default Alt Text
PhabricatorFerretFulltextEngineExtension.php (11 KB)
Attached To
Mode
rP Phorge
Attached
Detach File
Event Timeline
Log In to Comment