select * from getWords( 'this is only a
one one figurative test test ', 0, 1 );
You will get this:
The function's code:
create FUNCTION [dbo].[getWords]
(
@text nvarchar(4000)
, @ReturnOnlyMeaningful
bit
, @SuppressDuplicates
bit
)
RETURNS
@words
TABLE (
WordID smallint --primary key
--, Position smallint
, Word varchar(255)
, OrderOfAppearance tinyint
, AlphabeticRank tinyint
, [Length] tinyint
, LengthRank tinyint
, Frequency tinyint
, WordCount tinyint
, DistinctWordCount tinyint
, Meaningful tinyint
, MeaningfulWordCount
tinyint
, MeaningfulDistinctWordCount
tinyint
, OriginalText varchar(4000)
)
AS
BEGIN
-- Parameter validation and cleanup
-- Do not process empty strings
if ltrim(rtrim(@text)) = ''
begin
return
end
-- Double quote characters if found
/*
select
REPLACE( 'this is" something', '"', '""' );
select
REPLACE( 'this is'' something', '''', '''''' );
*/
set @text = REPLACE( @text, '"', '""' );
set @text = REPLACE( @text, '''', '''''' );
-- If it is ok to display dupicates
if @SuppressDuplicates
= 0
begin
insert into
@words
select
occurrence
, display_term
, dense_rank()
over(
order by occurrence asc
)
as OrderOfAppearance
, DENSE_RANK()
over(
order by display_term asc
) as AlphabeticRank
, LEN( display_term ) as [Length]
, dense_rank()
over(
/*
partition
by
case
when
special_term = 'Exact Match'
then
1
else
0
end
*/
order by LEN( display_term ) desc
) as isMeaningfulLengthRank
, CAST( 1 as tinyint ) as Frequency
, null as WordCount
, null as DistinctWordCount
, cast(
case
when special_term = 'Exact Match'
then 1
else 0
end
as tinyint
) as isMeaningful
, null as MeaningfulWordCount
, null as MeaningfulDistinctWordCount
, @text as OriginalText
from
sys.dm_fts_parser ( N'"' + @text + '"', 1033, 0, 0)
where
-- Return only meaningful, according to the parameter
(
@ReturnOnlyMeaningful =
0
or (
@ReturnOnlyMeaningful =
1
and special_term = 'Exact Match'
)
)
-- Do not return end of file word.
and display_term not in
(
'END OF FILE' -- Seems to be
produced by hyphens
)
;
end
-- If we are to suppress duplicates
else
begin
insert into
@words
select
min( occurrence ) as occurrenceMin
, display_term
, dense_rank()
over(
order by min( occurrence ) asc
)
as OrderOfAppearance
, DENSE_RANK()
over(
order by display_term asc
) as AlphabeticRank
, LEN( display_term ) as [Length]
, dense_rank()
over(
/*
partition
by
case
when
special_term = 'Exact Match'
then
1
else
0
end
*/
order by LEN( display_term ) desc
) as isMeaningfulLengthRank
, COUNT(*) as Frequency
, null as WordCount
, null as DistinctWordCount
, cast(
case
when special_term = 'Exact Match'
then 1
else 0
end
as tinyint
) as isMeaningful
, null as MeaningfulWordCount
, null as MeaningfulDistinctWordCount
, @text as OriginalText
from
sys.dm_fts_parser ( N'"' + @text + '"', 1033, 0, 0 )
where
-- Return only meaningful, according to the parameter
(
@ReturnOnlyMeaningful =
0
or (
@ReturnOnlyMeaningful =
1
and special_term = 'Exact Match'
)
)
-- Do not return end of file word.
and display_term not in
(
'END OF FILE'
)
group by
display_term
, case
when special_term = 'Exact Match'
then 1
else 0
end
;
end
-- Word count
update
@words
set
WordCount = ( select SUM(Frequency) from @words )
, DistinctWordCount = ( select COUNT(*) from @words )
, MeaningfulWordCount
= ( select SUM( Meaningful * Frequency ) from @words )
, MeaningfulDistinctWordCount
= ( select SUM( Meaningful ) from @words )
;
-- Return
return;
end
No comments:
Post a Comment