这是一个没有任何库的快速而肮脏的解决方案,即只有 JRE 板载方式。它没有针对效率进行优化,并且不检查消息是否确实是 US-ASCII,它只是假设它。这只是一个概念证明:
package de.scrum_master.stackoverflow;
import java.util.BitSet;
public class ASCIIConverter {
public byte[] compress(String message) {
BitSet bits = new BitSet(message.length() * 7);
int currentBit = 0;
for (char character : message.toCharArray()) {
for (int bitInCharacter = 0; bitInCharacter < 7; bitInCharacter++) {
if ((character & 1 << bitInCharacter) > 0)
bits.set(currentBit);
currentBit++;
}
}
return bits.toByteArray();
}
public String decompress(byte[] compressedMessage) {
BitSet bits = BitSet.valueOf(compressedMessage);
int numBits = 8 * compressedMessage.length - compressedMessage.length % 7;
StringBuilder decompressedMessage = new StringBuilder(numBits / 7);
for (int currentBit = 0; currentBit < numBits; currentBit += 7) {
char character = (char) bits.get(currentBit, currentBit + 7).toByteArray()[0];
decompressedMessage.append(character);
}
return decompressedMessage.toString();
}
public static void main(String[] args) {
String[] messages = {
"Hello world!",
"This is my message.\n\tAnd this is indented!",
" !\"#$%&'()*+,-./0123456789:;<=>?\n"
+ "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_\n"
+ "`abcdefghijklmnopqrstuvwxyz{|}~",
"1234567890123456789012345678901234567890"
+ "1234567890123456789012345678901234567890"
+ "1234567890123456789012345678901234567890"
+ "1234567890123456789012345678901234567890"
};
ASCIIConverter asciiConverter = new ASCIIConverter();
for (String message : messages) {
System.out.println(message);
System.out.println("--------------------------------");
byte[] compressedMessage = asciiConverter.compress(message);
System.out.println("Number of ASCII characters = " + message.length());
System.out.println("Number of compressed bytes = " + compressedMessage.length);
System.out.println("--------------------------------");
System.out.println(asciiConverter.decompress(compressedMessage));
System.out.println("\n");
}
}
}
控制台日志如下所示:
Hello world!
--------------------------------
Number of ASCII characters = 12
Number of compressed bytes = 11
--------------------------------
Hello world!
This is my message.
And this is indented!
--------------------------------
Number of ASCII characters = 42
Number of compressed bytes = 37
--------------------------------
This is my message.
And this is indented!
!"#$%&'()*+,-./0123456789:;<=>?
@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
`abcdefghijklmnopqrstuvwxyz{|}~
--------------------------------
Number of ASCII characters = 97
Number of compressed bytes = 85
--------------------------------
!"#$%&'()*+,-./0123456789:;<=>?
@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
`abcdefghijklmnopqrstuvwxyz{|}~
1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
--------------------------------
Number of ASCII characters = 160
Number of compressed bytes = 140
--------------------------------
1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890