(1) cosine Similarity
Measure the similarity between two vectors by measuring the cosine of the angle between them. The cosine of the 0-degree angle is 1, and the cosine of any other angle is not greater than 1, and its minimum value is-1. The cosine of the angle between the two vectors determines whether the two vectors are roughly in the same direction. Therefore, it is usually used for file comparison.
(2) In Algorithm Implementation, no weight (IDF-inverse Document Frequency) is used, and the occurrence times of word items are used as the value of vector space.
[Java]
Import java. util. HashMap;
Import java. util. Iterator;
Import java. util. Map;
Public class SimilarDegreeByCos
{
/*
* Calculate the similarity between two strings (English characters), calculate the cosine, and add no weight.
*/
Public static double getSimilarDegree (String str1, String str2)
{
// Create a vector space model and implement it using map. The primary key is a word item, and the value is an array of 2 characters. It stores the number of occurrences of the corresponding word item in the string.
Map <String, int []> vectorSpace = new HashMap <String, int []> ();
Int [] itemCountArray = null; // to avoid frequent local variables, declare itemCountArray here
// Use space as the separator to break down strings
String strArray [] = str1.split ("");
For (int I = 0; I <strArray. length; ++ I)
{
If (vectorSpace. containsKey (strArray [I])
++ (VectorSpace. get (strArray [I]) [0]);
Else
{
ItemCountArray = new int [2];
ItemCountArray [0] = 1;
ItemCountArray [1] = 0;
VectorSpace. put (strArray [I], itemCountArray );
}
}
StrArray = str2.split ("");
For (int I = 0; I <strArray. length; ++ I)
{
If (vectorSpace. containsKey (strArray [I])
++ (VectorSpace. get (strArray [I]) [1]);
Else
{
ItemCountArray = new int [2];
ItemCountArray [0] = 0;
ItemCountArray [1] = 1;
VectorSpace. put (strArray [I], itemCountArray );
}
}
// Calculate similarity
Double vector1Modulo = 0.00; // modulo of Vector 1
Double vector2Modulo = 0.00; // modulo of Vector 2
Double vectorProduct = 0.00; // Vector Product
Iterator iter = vectorSpace. entrySet (). iterator ();
While (iter. hasNext ())
{
Map. Entry entry = (Map. Entry) iter. next ();
ItemCountArray = (int []) entry. getValue ();
Vector1Modulo + = itemCountArray [0] * itemCountArray [0];
Vector2Modulo + = itemCountArray [1] * itemCountArray [1];
VectorProduct + = itemCountArray [0] * itemCountArray [1];
}
Vector1Modulo = Math. sqrt (vector1Modulo );
Vector2Modulo = Math. sqrt (vector2Modulo );
// Returns the similarity www.2cto.com
Return (vectorProduct/(vector1Modulo * vector2Modulo ));
}
/*
*
*/
Public static void main (String args [])
{
String str1 = "gold silver truck ";
String str2 = "Shipment of gold damaged in a fire ";
String str3 = "Delivery of silver arrived in a silver truck ";
String str4 = "Shipment of gold arrived in a truck ";
String str5 = "gold ";
System. out. println (SimilarDegreeByCos. getSimilarDegree (str1, str2 ));
System. out. println (SimilarDegreeByCos. getSimilarDegree (str1, str3 ));
System. out. println (SimilarDegreeByCos. getSimilarDegree (str1, str4 ));
System. out. println (SimilarDegreeByCos. getSimilarDegree (str1, str5 ));
}
}