Some days ago, in the Q group There was a question: How to use SQL to implement the following calculations in SQL Server
It is known from the graph that the problem is how to calculate the Jaccard coefficients. Jaccard coefficients, also known as Jaccard similarity coefficients (jaccard similarity coefficient), are used to compare similarities and differences between finite sample sets. The higher the value of Jaccard system, the higher the similarity of samples.
SQL Server implements the intersection through intersect, and union implements unions, as follows:
Intersect intersection
The calculated intersection code snippet is as follows: (1 intersect 0 = null,1 intersect 1 = 1,0 intersect 0 = 0)
Union and set
Compute the union code snippet as follows: (1 Unions 0 = 1 0,1 union 1 = 1,0 Union 0 = 0)
Principle Introduction:
Get the field name and field ID of the table through the Sys.columns table, loop the field name and fetch the corresponding value from the table according to the field name: If the field name is a, take the value of ID 1: Select a from Test where id=1, take the value of ID 2: Select a From Test where id=2,
The 2 values are then intersected and set, and after the end of the field cycle, the @str_intersect is obtained, @str_union, the calculation ratio: Len (@str_intersect) *1.0/len (@str_union)
The final result is as follows:
All the code is as follows: (Build the table, see the Notes section)
--create table test (Id int,a int,b int,c int,d int,e int,f int)-- insert into test select 1,1,1,1,0,0,0--insert into test select 2,0,1,0,1,0,1--insert into test select 3,1,1,1,1,0,1--insert into test select 4,1,1,1,0,1,0--insert into test select 5,0,1,0,1,1,1--insert into Test select 6,0,0,1,0,1,1--drop table test_result--create table test_result (ID int,_1_ numeric (10,4), _2_ numeric (10,4), _3_ numeric (10,4), _4_ numeric (10,4), _5_ numeric (10,4), _6_ numeric (10,4))--insert into test_result select 1,null,null, Null,null,null,null--insert into test_result select 2,null,null,null,null,null, Null--insert into test_result select 3,null,null,null,null,null,null--insert into test_result select 4,null,null,Null,null,null,null--insert into test_result select 5,null,null,null,null,null, Null--insert into test_result select 6,null,null,null,null,null,nullselect name, column_id into #test from sys. columns where object_id =object_id (' dbo.test ') and column_id>1declare @id_1 Int=0, @id_2 int=0, @str_union varchar (max), @a_union int, @sql_union varchar (max), @str_ Intersect varchar (max), @a_intersect int, @sql_intersect varchar (max) declare @name varchar, @column_id intcreate table #a_union (num int) create table #a_ Intersect (num int) declare @min_id int=0, @max_id int=0, @global_min_id int=0, @global_ max_id int=0select @min_id =min (id), @max_id =max (ID), @global_min_id =min (ID), @global_max_id =max (ID) from testwhile (@min_id <[email protected]_id) beginselect @[email protected]_id,@[email protected]_min_idwhile @id_2 <[email protected]_max_idbeginselect @str_ Union= ', @str_intersect = ' while (Select count (1) from #test) >0 beginselect top 1 @name =name, @column_id =column_id from #test order by column_idselect @sql_union = ' select case ' + ' when (select count (1) from (select ' [email protected]+ ' from test where id= ' +convert (varchar, @id_1) + ' union select ' [ email protected]+ ' from test where id= ' +convert (varchar, @id_2) + ') as a) > 1 then 1 ' + ' when isnull (select ' [email protected]+ ' FROM TEST Where id= ' +convert (varchar, @id_1) + ' UNION SELECT ' [email protected]+ ' from test where id= ' +convert (varchar, @id_2) + '), 0) =0 then 0 else 1 END ' insert into #a_unioNexec (@sql_union) select @a_union =num from #a_uniondelete from #a_unionif (@a_union =1) select @[email protected]select @sql_intersect = ' Select case when isnull ((select ' [email protected]+ ' from test where id= ' +convert (varchar, @id_1) + ' intersect select ' [email protected]+ ' from test where id= ' +convert ( varchar, @id_2) + '), 0) =0 then 0 else 1 end ' insert into #a_intersectexec (@sql_ intersect) select @a_intersect =num from #a_intersectdelete from #a_intersectif (@a_ intersect=1) select @[email protected]delete from #test where @name =name and @column_id =column_idendinsert into #test select name,column_id from sys. columns where object_id =object_id (' dbo.test ') and column_id>1--select @str_union, @ Str_intersect, @column_id, @iD_1, @id_2if (@id_2 =1) update test_result set _1_= convert (Numeric ( 10,4), Len (@str_intersect) *1.0/len (@str_union)) where [email protected]_1if (@id_2 =2) update test_result set _2_= convert (Numeric (10,4), Len (@str_intersect) *1.0/len ( @str_union)) where [email protected]_1if (@id_2 =3) update test_ Result set _3_= convert (Numeric (10,4), Len (@str_intersect) *1.0/len (@str_union)) where [Email protected]_1if (@id_2 =4) update Test_result set _4_= Convert (numeric (10,4), Len (@str_intersect) *1.0/len (@str_union)) where [email protected]_1if (@id_ 2=5) update test_result set _5_= convert (numeric (10,4), Len (@str_ intersect) *1.0/len (@str_union)) where [email protected]_1if (@id_2 =6) Update test_result&nbSp;set _6_= convert (Numeric (10,4), Len (@str_intersect) *1.0/len (@str_union)) where [email protected]_1 set @[email protected]_2+1endset @[email protected]_id +1enddrop table #testdrop table #a_uniondrop table #a_ Intersect-------------Create table test_str_column (Id int,str_columns varchar (max))-- insert into test_str_column select 1,null--insert into test_str_column Select 2,null--insert into test_str_column select 3,null--insert into test_ Str_column select 4,null--insert into test_str_column select 5,null--insert into test_str_column select 6,nullselect * into #temp_test_table from testselect name,column_id into #tmp_test_columns from sys.columns where object_id=object_id (' dbo.test ') and column_id>1declare @id int, @col_name varchar, @col_id int, @string_columns varchar ( Max), @sql_rs varchar (max), @num_1 intcreate table #tmp_rs (num int) while (select count (1) from #temp_test_table) >0beginselect top 1 @id =id from # temp_test_table order by idset @string_columns = "while (Select count (1) from #tmp_test_columns) >0beginselect top 1 @col_name =name, @col_id =column_id from #tmp_test_columns order by column_idselect @sql_rs = ' select ' [email protected]_ Name+ ' from test where id= ' +convert (varchar, @id) insert into #tmp_rsexec (@sql_rs) select @num_1 =num from #tmp_rsif (@num_1 =1) set @[email protected][email protected ]_namedelete from #tmp_test_columns where @col_name =name and @col_id =column_ iddelete from #tmp_rsendinsert into #tmp_test_columns select name,column_id from sys.columns where object_id=object_id (' dbo.test ') and column_id>1update test_str_ column set [email protected]_columns where id [email protected]delete from #temp_test_table where @id =idenddrop table #temp_test_tabledrop table #tmp_test_columnsdrop table #tmp_rsselect * from testselect * from test_str_columnselect * from test_result
SQL Server calculates Jaccard factor-sim (I,J)