function tokenizeZH(text) {
const segmenter = new Intl.Segmenter('zh', { granularity: 'word' });
const segments = segmenter.segment(text);
const words = [];
for (const { segment /* , index, isWordLike */ } of segments) {
words.push(segment);
}
return words;
}
console.log(tokenizeZH('我不是太清楚'));
Live: https://jsfiddle.net/rgqen1zc/
Output:
["我不是", "太", "清楚"]
我不是 should be 我 不是
No comments:
Post a Comment