SQL Server calculates Jaccard factor-sim (I,J)

Source: Internet
Author: User
Tags how to use sql

Some days ago, in the Q group There was a question: How to use SQL to implement the following calculations in SQL Server

It is known from the graph that the problem is how to calculate the Jaccard coefficients. Jaccard coefficients, also known as Jaccard similarity coefficients (jaccard similarity coefficient), are used to compare similarities and differences between finite sample sets. The higher the value of Jaccard system, the higher the similarity of samples.


SQL Server implements the intersection through intersect, and union implements unions, as follows:


Intersect intersection

The calculated intersection code snippet is as follows: (1 intersect 0 = null,1 intersect 1 = 1,0 intersect 0 = 0)


Union and set

Compute the union code snippet as follows: (1 Unions 0 = 1 0,1 union 1 = 1,0 Union 0 = 0)


Principle Introduction:

Get the field name and field ID of the table through the Sys.columns table, loop the field name and fetch the corresponding value from the table according to the field name: If the field name is a, take the value of ID 1: Select a from Test where id=1, take the value of ID 2: Select a From Test where id=2,

The 2 values are then intersected and set, and after the end of the field cycle, the @str_intersect is obtained, @str_union, the calculation ratio: Len (@str_intersect) *1.0/len (@str_union)


The final result is as follows:


All the code is as follows: (Build the table, see the Notes section)

--create table test (Id int,a int,b int,c int,d int,e int,f int)-- insert into test select 1,1,1,1,0,0,0--insert into test select  2,0,1,0,1,0,1--insert into test select 3,1,1,1,1,0,1--insert into test  select 4,1,1,1,0,1,0--insert into test select 5,0,1,0,1,1,1--insert into  Test select 6,0,0,1,0,1,1--drop table test_result--create table test_result (ID  int,_1_ numeric (10,4), _2_ numeric (10,4), _3_ numeric (10,4), _4_ numeric (10,4), _5_  numeric (10,4), _6_ numeric (10,4))--insert into test_result select 1,null,null, Null,null,null,null--insert into test_result select 2,null,null,null,null,null, Null--insert into test_result select 3,null,null,null,null,null,null--insert into  test_result select 4,null,null,Null,null,null,null--insert into test_result select 5,null,null,null,null,null, Null--insert into test_result select 6,null,null,null,null,null,nullselect name, column_id  into  #test   from sys. columns where object_id =object_id (' dbo.test ') and column_id>1declare  @id_1   Int=0, @id_2  int=0, @str_union  varchar (max), @a_union  int, @sql_union  varchar (max), @str_ Intersect varchar (max), @a_intersect  int, @sql_intersect  varchar (max) declare  @name   varchar, @column_id  intcreate table  #a_union (num int) create table  #a_ Intersect (num int) declare  @min_id  int=0, @max_id  int=0, @global_min_id  int=0, @global_ max_id int=0select  @min_id =min (id), @max_id =max (ID), @global_min_id =min (ID), @global_max_id =max (ID)  from testwhile (@min_id <[email protected]_id) beginselect @[email protected]_id,@[email protected]_min_idwhile  @id_2 <[email protected]_max_idbeginselect  @str_ Union= ', @str_intersect = ' while (Select count (1)  from  #test) >0 beginselect top  1  @name =name, @column_id =column_id from  #test  order by column_idselect   @sql_union = ' select case  ' + ' when  (select count (1)  from  (select  ' [email  protected]+ '  from test where id= ' +convert (varchar, @id_1) + ' union select  ' [ email protected]+ '  from test where id= ' +convert (varchar, @id_2) + ')  as a) > 1 then 1 ' + '  when isnull (select  ' [email protected]+ '  FROM TEST  Where id= ' +convert (varchar, @id_1) + '  UNION  SELECT  ' [email protected]+ '  from  test where id= ' +convert (varchar, @id_2) + '), 0) =0 then 0 else 1  END ' insert into  #a_unioNexec (@sql_union) select  @a_union =num from  #a_uniondelete  from  #a_unionif (@a_union =1) select @[email protected]select  @sql_intersect = ' Select case when isnull ((select   ' [email protected]+ '  from test where id= ' +convert (varchar, @id_1) + '   intersect  select  ' [email protected]+ '  from test where id= ' +convert ( varchar, @id_2) + '), 0) =0 then 0 else 1 end ' insert into  #a_intersectexec (@sql_ intersect) select  @a_intersect =num from  #a_intersectdelete  from  #a_intersectif (@a_ intersect=1) select @[email protected]delete from  #test  where  @name =name and   @column_id =column_idendinsert into  #test  select name,column_id from sys. columns where object_id =object_id (' dbo.test ') and column_id>1--select  @str_union, @ Str_intersect, @column_id, @iD_1, @id_2if (@id_2 =1)     update test_result set _1_= convert (Numeric ( 10,4), Len (@str_intersect) *1.0/len (@str_union))  where [email protected]_1if (@id_2 =2)      update test_result set _2_= convert (Numeric (10,4), Len (@str_intersect) *1.0/len ( @str_union))  where [email protected]_1if (@id_2 =3)     update test_ Result set _3_= convert (Numeric (10,4), Len (@str_intersect) *1.0/len (@str_union))  where  [Email protected]_1if (@id_2 =4)     update Test_result set _4_=  Convert (numeric (10,4), Len (@str_intersect) *1.0/len (@str_union))  where [email protected]_1if (@id_ 2=5)     update test_result set _5_= convert (numeric (10,4), Len (@str_ intersect) *1.0/len (@str_union))  where [email protected]_1if (@id_2 =6)      Update test_result&nbSp;set _6_= convert (Numeric (10,4), Len (@str_intersect) *1.0/len (@str_union))  where [email  protected]_1   set @[email protected]_2+1endset @[email protected]_id +1enddrop table  #testdrop  table  #a_uniondrop  table  #a_ Intersect-------------Create table test_str_column (Id int,str_columns varchar (max))-- insert into test_str_column select 1,null--insert into test_str_column  Select 2,null--insert into test_str_column select 3,null--insert into test_ Str_column select 4,null--insert into test_str_column select 5,null--insert  into test_str_column select 6,nullselect * into  #temp_test_table  from  testselect name,column_id into  #tmp_test_columns  from sys.columns where  object_id=object_id (' dbo.test ')  and column_id>1declare  @id  int, @col_name  varchar, @col_id  int, @string_columns  varchar ( Max), @sql_rs  varchar (max), @num_1  intcreate table  #tmp_rs (num int) while  (select  count (1)  from  #temp_test_table) >0beginselect top 1  @id =id from # temp_test_table order by idset  @string_columns = "while (Select count (1)  from   #tmp_test_columns) >0beginselect top 1  @col_name =name, @col_id =column_id from  #tmp_test_columns  order by column_idselect  @sql_rs = ' select  ' [email protected]_ Name+ '  from test where id= ' +convert (varchar, @id) insert into  #tmp_rsexec (@sql_rs) select  @num_1 =num from  #tmp_rsif (@num_1 =1) set @[email protected][email protected ]_namedelete from  #tmp_test_columns  where  @col_name =name and  @col_id =column_ iddelete from  #tmp_rsendinsert into  #tmp_test_columns  select name,column_id  from  sys.columns where object_id=object_id (' dbo.test ')  and column_id>1update test_str_ column set [email protected]_columns where id [email protected]delete  from  #temp_test_table  where  @id =idenddrop table   #temp_test_tabledrop  table   #tmp_test_columnsdrop  table  #tmp_rsselect  * from testselect * from  test_str_columnselect * from test_result


SQL Server calculates Jaccard factor-sim (I,J)

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.