直接上代码(返回0-1的小数,数值越大匹配度越高)
import java.util.HashMap;
import java.util.Map;
public class CosineSimilarityUtil {
//计算两个字符串的字符频向量
private static Map<String, Integer> getCharacterFrequency(String text) {
Map<String, Integer> frequencyMap = new HashMap<>();
char[] chars = text.toCharArray();//按字符分割
for (char c : chars) {
String charStr = String.valueOf(c);
frequencyMap.put(charStr, frequencyMap.getOrDefault(charStr, 0) + 1);
}
return frequencyMap;
}
//计算Cosine相似度
public static double calculateCosineSimilarity(String text1, String text2) {
Map<String, Integer> freqMap1 = getCharacterFrequency(text1);
Map<String, Integer> freqMap2 = getCharacterFrequency(text2);
//计算点积
double dotProduct = 0;
for (String charStr : freqMap1.keySet()) {
if (freqMap2.containsKey(charStr)) {
dotProduct += freqMap1.get(charStr) * freqMap2.get(charStr);
}
}
//计算模长
double magnitude1 = 0;
for (int freq : freqMap1.values()) {
magnitude1 += Math.pow(freq, 2);
}
magnitude1 = Math.sqrt(magnitude1);
double magnitude2 = 0;
for (int freq : freqMap2.values()) {
magnitude2 += Math.pow(freq, 2);
}
magnitude2 = Math.sqrt(magnitude2);
//计算余弦相似度
if (magnitude1 == 0 || magnitude2 == 0) {
return 0.0;//避免除以0
}
return dotProduct / (magnitude1 * magnitude2);
}
public static void main(String[] args) {
String targetText = "相卡整盒";
String matchingText1 = "相卡随机一发";
String matchingText2 = "吧唧随机一发";
String matchingText3 = "路飞相卡";
System.out.println("相卡随机一发: " + calculateCosineSimilarity(targetText, matchingText1));
System.out.println("吧唧随机一发: " + calculateCosineSimilarity(targetText, matchingText2));
System.out.println("路飞相卡: " + calculateCosineSimilarity(targetText, matchingText3));
}
}
微信扫码查看本文
发表评论