BreakUpSentence.java
package com.example.demo.util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class BreakUpSentence {
private static final String SEPARATOR_REGEX = "[,.?!,。?!]";
private static final Integer SIZE = 10;
public static void main(String[] args) {
String sentence = "很抱歉打扰到您了,祝您生活愉快,再见。";
System.out.println("原句子:" + sentence);
String[] words = splitSentence(sentence);
System.out.println("\n拆分后的句子:");
Arrays.stream(words).forEach(System.out::println);
System.out.println("\n正在处理每个句子允许的字数是10以内中.....");
List<String> wordList = limitNumber(words);
System.out.println("\n最终处理完后的数据是: ");
wordList.stream().forEach(word -> {
System.out.println("word = " + word + "----------->length = " + word.length());
});
}
public static String[] splitSentence(String sentence){
Pattern p = Pattern.compile(SEPARATOR_REGEX);
Matcher m = p.matcher(sentence);
String[] words = p.split(sentence);
if(words.length > 0){
int count = 0;
while(count < words.length){
if(m.find()){
words[count] += m.group();
}
count++;
}
}
return words;
}
public static List limitNumber(String[] words){
List<String> wordList = new ArrayList<>();
int wordsLength = words.length;
for(int i = 0; i < wordsLength; i++){
String word = words[i];
int length = word.length();
System.out.println("word = " + word + "-------------->length = " + length);
if(length >= SIZE){
wordList.add(word);
}else{
if(i+1 >= wordsLength){
wordList.add(word);
return wordList;
}
String nextWord = words[i+1];
int nextLength = nextWord.length();
int totalLength = length + nextLength;
if(totalLength <= 10){
wordList.add(word + nextWord);
i++;
}else{
wordList.add(word);
}
}
}
return wordList;
}
}
输出
原句子:很抱歉打扰到您了,祝您生活愉快,再见。
拆分后的句子:
很抱歉打扰到您了,
祝您生活愉快,
再见。
正在处理每个句子允许的字数是10以内中.....
word = 很抱歉打扰到您了,-------------->length = 9
word = 祝您生活愉快,-------------->length = 7
最终处理完后的数据是:
word = 很抱歉打扰到您了,----------->length = 9
word = 祝您生活愉快,再见。----------->length = 10
转载请注明原文地址:https://ipadbbs.8miu.com/read-44634.html