当保存字符串到数据库里出现\xF0\x9F\x92\x94类似问题时,发现竟是因为输入了Emoji表情的原因,由于我的mysql数据库是utf8字符集,而且Emoji表情等特殊符号要占四个字节,所以导致数据库不能正常存入。 找了很多方法,都没有从根源上解决问题,很是头疼。最后还是发现github上有个很牛逼的轻量级开源工具叫emoji-java,通过这个工具类基本上解决了我大部分的问题,同时配合前端限制Emoji表情输入,才算把问题解决。github地址:https://github.com/vdurmont/emoji-java。 下面附上完整的java解决Emoji表情工具类:
package com.im.app.api.util; import com.github.binarywang.java.emoji.EmojiConverter; import com.vdurmont.emoji.EmojiParser; import org.apache.commons.lang3.StringUtils; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @Author: guo * @Description: ${description} * @Date: 2019/4/18 18:48 * @Version: 1.0 */ public class EmojiFilterUtil { private static EmojiConverter emojiConverter = EmojiConverter.getInstance(); /** * 判断字符串中是否含有表情 * @param source * @return */ public static boolean containsEmoji(String source) { int len = source.length(); boolean isEmoji = false; for (int i = 0; i < len; i++) { char hs = source.charAt(i); if (0xd800 <= hs && hs <= 0xdbff) { if (source.length() > 1) { char ls = source.charAt(i + 1); int uc = ((hs - 0xd800) * 0x400) + (ls - 0xdc00) + 0x10000; if (0x1d000 <= uc && uc <= 0x1f77f) { return true; } } } else { // non surrogate if (0x2100 <= hs && hs <= 0x27ff && hs != 0x263b) { return true; } else if (0x2B05 <= hs && hs <= 0x2b07) { return true; } else if (0x2934 <= hs && hs <= 0x2935) { return true; } else if (0x3297 <= hs && hs <= 0x3299) { return true; } else if (hs == 0xa9 || hs == 0xae || hs == 0x303d || hs == 0x3030 || hs == 0x2b55 || hs == 0x2b1c || hs == 0x2b1b || hs == 0x2b50 || hs == 0x231a) { return true; } if (!isEmoji && source.length() > 1 && i < source.length() - 1) { char ls = source.charAt(i + 1); if (ls == 0x20e3) { return true; } } } } return isEmoji; } /** * 判断某个字符是不是表情 * @param codePoint * @return */ private static boolean isEmojiCharacter(char codePoint) { return (codePoint == 0x0) || (codePoint == 0x9) || (codePoint == 0xA) || (codePoint == 0xD) || ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF)); } /** * 过滤emoji 或者 其他非文字类型的字符 * * @param source * @return */ public static String filterEmoji(String source) { if (StringUtils.isBlank(source)) { return source; } StringBuilder buf = null; int len = source.length(); for (int i = 0; i < len; i++) { char codePoint = source.charAt(i); if (isEmojiCharacter(codePoint)) { if (buf == null) { buf = new StringBuilder(source.length()); } buf.append(codePoint); } } if (buf == null) { return source; } else { if (buf.length() == len) { buf = null; return source; } else { return buf.toString(); } } } /** * 判断字符串中是否含有表情 * @param source * @return */ public static boolean hasEmoji(String source){ if (containsEmoji(source)){ return true; } source = StringUtils.isEmpty(source) ? "" : source.trim(); String noEmojiStr = EmojiParser.removeAllEmojis(source);//移除所有的表情之后的字符串8 if (noEmojiStr.length()<source.length()){ return true; } return false; } /** * 将emojiStr转为 带有表情的字符 * @param emojiStr * @return */ public static String emojiConverterUnicodeStr(String emojiStr){ String result = emojiConverter.toUnicode(emojiStr); return result; } /** * 带有表情的字符串转换为编码 * @param str * @return */ public static String emojiConverterToAlias(String str){ String result=emojiConverter.toAlias(str); return result; } public static void main(String[] args) { String string = "✌"; System.out.println(containsEmoji(string)); System.out.println(filterEmoji(string)); System.out.println(hasEmoji(string)); System.out.println(emojiConverterToAlias(string)); System.out.println(emojiConverterUnicodeStr(string)); } }
其中pom.xml文件如下:
<!-- 后加的关于过滤表情的,可以去掉 --> <dependency> <groupId>com.github.binarywang</groupId> <artifactId>java-emoji-converter</artifactId> <version>0.1.1</version> </dependency>
本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。
我来说两句