Commit c64692b3 by Hut

Fixed HuffmanCodeBuilder after update.

parent 24068248
......@@ -11,7 +11,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<java.version>1.9</java.version>
<junit.jupiter.version>5.0.3</junit.jupiter.version>
<junit.platform.version>1.0.3</junit.platform.version>
</properties>
......
......@@ -34,4 +34,9 @@ public class Decission {
return id;
}
@Override
public String toString() {
return "Decission{" + "token=" + token + ", id=" + id + ", propability=" + propability +
'}';
}
}
......@@ -13,13 +13,12 @@ public class Lookup implements Serializable {
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<>();
private Decission[] finalData;
private long[] finishedSums;
private int[] finishedSums;
private boolean isFinishedCollecting = false;
private int totalCounts = 0;
int getDistinctTokens() {
public int getDistinctTokens() {
finishGuard();
return finalData.length;
}
......@@ -66,13 +65,17 @@ public class Lookup implements Serializable {
}
public Set<Token> allPossible() {
return this.tokens.keySet();
finishGuard();
return Arrays.asList(finalData)
.stream()
.map(Decission::getToken)
.collect(Collectors.toSet());
}
void finishCollection() {
if (!isFinishedCollecting) {
int size = tokens.size();
finishedSums = new long[size];
finishedSums = new int[size];
finalData = new Decission[size];
List<Entry<Token, Integer>> orderedEntries = tokens.entrySet()
.stream()
......@@ -86,8 +89,7 @@ public class Lookup implements Serializable {
finishedSums[i] = sum;
finalData[i] = new Decission(entry.getKey(),
i,
this,
entry.getValue() / getTotalCounts());
this, (double) entry.getValue() / getTotalCounts());
}
this.tokens.clear();
isFinishedCollecting = true;
......@@ -98,21 +100,27 @@ public class Lookup implements Serializable {
return totalCounts;
}
@Override
public String toString() {
return "Lookup [tokens= " + tokens.entrySet().stream().sorted((e1, e2) -> Integer.compare(e2
.getValue(), e1.getValue())).map(e -> String.format("%d*%s",
e.getValue(),
e.getKey())).collect(Collectors.joining(", ")) + "]";
}
/**
* Not thread save!
*/
void resetFinishding() {
int sum = 0;
for (int i = 0; i < finalData.length; i++) {
this.tokens.put(finalData[i].getToken(), finishedSums[i] - sum);
sum += finishedSums[i];
}
this.isFinishedCollecting = false;
this.finishedSums = null;
this.finalData = null;
}
@Override
public String toString() {
return isFinishedCollecting ? "Lookup" :
("Lookup [tokens= " + tokens.entrySet()
.stream()
.sorted((e1, e2) -> Integer.compare(e2.getValue(),
e1.getValue()))
.map(e -> String.format("%d*%s", e.getValue(), e.getKey()))
.collect(Collectors.joining(", ")) + "]");
}
}
......@@ -4,6 +4,10 @@ import com.google.common.base.Stopwatch;
import directory.passive.huffman.ByteHuffmanCodeBuilder;
import directory.passive.huffman.HuffmanCode;
import markov.huffman.HuffmanStore;
import markov.shortener.ShortenerByteHUffmanTrainerImpl;
import markov.shortener.ShortenerByteHuffmanImpl;
import markov.shortener.ShortenerIntHuffmanImpl;
import markov.shortener.ShortenerSimpleImpl;
import markov.stuff.CountMap;
import markov.stuff.Inspector;
import markov.stuff.SimpleCountMap;
......@@ -191,20 +195,16 @@ public class Mail {
}
}
@SuppressWarnings("unchecked")
private Map<Integer, CountMap<Integer>> readIntMap() {
HuffmanStore<Integer> store = readHUffmanMap(intsFileLocation);
return store.getInternal();
}
@SuppressWarnings("unchecked")
private Map<Integer, CountMap<Byte>> readByteMap() {
HuffmanStore<Byte> store = readHUffmanMap(bytesFileLocation);
return store.getInternal();
}
// we controll whats in the file
@SuppressWarnings("unchecked")
private <X extends Serializable> HuffmanStore<X> readHUffmanMap(String location) {
File file = Utils.getFile(location);
System.out.println("reading from: " + file.getAbsolutePath());
......
......@@ -4,9 +4,8 @@ import java.io.Serializable;
public class Token implements Serializable, Comparable<Token> {
public static final Token START = new SpecialToken("START");
public static final Token END = new SpecialToken("END");
protected static final Token EMPTY = new SpecialToken("EMPTY");
static final Token START = new SpecialToken("START");
static final Token END = new SpecialToken("END");
public Token(String content) {
this(content, Glyph.Type.word);
......@@ -29,9 +28,6 @@ public class Token implements Serializable, Comparable<Token> {
if (this.equals(START)) {
return "TOKEN <Start>";
}
if (this.equals(EMPTY)) {
return "TOKEN <empty>";
}
return "T=[" + content + "]";
}
......
......@@ -35,7 +35,7 @@ public class Tokenizer {
do {
c = c.p;
tokenGlyphs.add(0, c.self);
} while (c != null && c.p != null && c.p.self != null &&
} while (c.p != null && c.p.self != null &&
c.self.getType().equals(c.p.self.getType()));
go.p = null; // memory optimization
return Stream.<List<Glyph>>builder().add(tokenGlyphs).build();
......
package markov;
package markov.shortener;
import markov.Data;
import markov.stuff.CountMap;
import markov.stuff.SimpleCountMap;
import markov.stuff.Utils;
......
package markov;
package markov.shortener;
import directory.passive.huffman.HuffmanCode;
import markov.stuff.BitConverter;
import markov.Data;
import markov.huffman.BitConverter;
import markov.stuff.Utils;
import java.util.Base64;
......
package markov;
package markov.shortener;
import directory.passive.huffman.HuffmanCode;
import markov.stuff.BitConverter;
import markov.Data;
import markov.huffman.BitConverter;
import java.util.Base64;
import java.util.List;
......
package markov;
package markov.shortener;
import markov.Data;
import markov.Decission;
import markov.Lookup;
import markov.Prefix;
import markov.Sentence;
import markov.Shortener;
import markov.Token;
import java.nio.ByteBuffer;
import java.util.Base64;
......
package markov.stuff;
import com.tomgibara.bits.Bits;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class BitConverter {
public byte[] toBytes(List<Boolean> list) {
int listLength = list.size();
int lengthInBit = listLength + 3;
int arrayLength = Math.max(1, (int) Math.ceil(lengthInBit / 8d));
byte[] bytes = new byte[arrayLength];
int lastByteNotEmpty = lengthInBit % 8;
// temp variable to not constantly write to heap
byte tmpByte = ((byte) lastByteNotEmpty);
int i = 0;
// set 3 bits for the lastByteNotEmpty number
for (int j = 3; j < 8 && i < listLength; j++, i++) {
tmpByte = setBit(list, tmpByte, i, j);
}
bytes[0] = tmpByte;
// set rest of the bits
for (int j = 1; j < arrayLength; j++) {
tmpByte = 0;
for (int l = 0; l < 8 && i < listLength; l++, i++) {
tmpByte = setBit(list, tmpByte, i, l);
}
bytes[j] = tmpByte;
}
return bytes;
}
private byte setBit(List<Boolean> list, byte tmpByte, int i, int j) {
tmpByte |= (byte) (list.get(i) ? 1 : 0) << j;
return tmpByte;
}
public List<Boolean> toBits(byte[] array) {
int arrayLength = array.length;
if (arrayLength == 0) {
return Collections.emptyList();
}
// determine lastByteNotEmpty bits
byte tmpByte = array[0];
int lastByteNotEmpty = tmpByte & 0b00000111;
int listSize = 8 * arrayLength + (lastByteNotEmpty == 0 ? -3 : lastByteNotEmpty - 11);
if (arrayLength == 1 && lastByteNotEmpty < 4 && lastByteNotEmpty != 0) {
throw new IllegalArgumentException("corrupted data");
}
// List<Boolean> list = toListBits(array, arrayLength, listSize);
List<Boolean> list = toListPrimitiv(listSize, array);
return list;
}
private List<Boolean> toListBits(byte[] array, int arrayLength, int listSize) {
List<Boolean> list = new ArrayList<>(Bits.asStore(array).asList());
// // remove lastByteNotEmpty bits
list.remove(0);
list.remove(0);
list.remove(0);
int maxIndex = arrayLength * 8 - 4; // BitStore uses all 8 bits per byte
for (int i = maxIndex; i >= listSize; i--) {
list.remove(i);
}
return list;
}
private static List<Boolean> toListPrimitiv(int listSize, byte[] array) {
List<Boolean> list = new ArrayList<>(listSize);
for (int i = 3; i < listSize + 3; i++) {
list.add((array[i / 8] & 1 << (i % 8)) >> i % 8 == 1);
}
return list;
}
}
......@@ -34,9 +34,15 @@ public class DataTests {
@Test
public void canReceieve() {
data.finish();
assertTrue(data.fetch(pa).allPossible().contains(new Token("a")));
assertEquals(1, data.fetch(pa).allPossible().size());
}
@Test
public void canRecive2() {
data.add(pa, new Token("b"));
data.finish();
assertTrue(data.fetch(pa).allPossible().contains(new Token("b")));
assertEquals(2, data.fetch(pa).allPossible().size());
}
......
......@@ -4,6 +4,7 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.HashSet;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
......@@ -11,6 +12,7 @@ import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
// TODO split this methods
public class LookupTests {
Lookup lookup;
......@@ -25,16 +27,17 @@ public class LookupTests {
@Test
public void add() {
lookup.add(new Token("b"));
lookup.finishCollection();
assertTrue(lookup.allPossible().contains(new Token("b")));
assertTrue(lookup.allPossible().contains(new Token("a")));
}
@Test
public void forRandom() {
lookup.finishCollection();
assertEquals(lookup.forRandom(0).getToken(), new Token("a"));
assertEquals(lookup.forRandom(0.5d).getToken(), new Token("a"));
assertEquals(lookup.forRandom(0.999d).getToken(), new Token("a"));
// assertEquals(lookup.forRandom(1), new Token("a"));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(-1));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1.001d));
......@@ -42,14 +45,18 @@ public class LookupTests {
lookup.resetFinishding();
lookup.add(new Token("b"));
lookup.finishCollection();
Decission a = lookup.forRandom(0);
Decission b = lookup.forRandom(0.9d);
assertNotEquals(a, b);
lookup.resetFinishding();
lookup.add(new Token("c"));
lookup.finishCollection();
a = lookup.forRandom(0);
b = lookup.forRandom(0.51d);
lookup.finishCollection();
Decission c = lookup.forRandom(0.9d);
assertEquals(new Token("a"), a.getToken());
assertEquals(new Token("b"), b.getToken());
......@@ -59,6 +66,7 @@ public class LookupTests {
lookup.add(new Token("c"));
lookup.add(new Token("c"));
lookup.finishCollection();
assertEquals(new Token("a"), lookup.forRandom(0).getToken());
assertEquals(new Token("a"), lookup.forRandom(0.19d).getToken());
assertEquals(new Token("b"), lookup.forRandom(0.2d).getToken());
......@@ -69,33 +77,43 @@ public class LookupTests {
@Test
public void average() {
lookup.finishCollection();
assertEquals(new Token("a"), lookup.average().getToken());
lookup.resetFinishding();
lookup.add(new Token("b"));
lookup.finishCollection();
Token token = lookup.average().getToken();
assertTrue(() -> token.equals(new Token("a")) || token.equals(new Token("b")));
lookup.resetFinishding();
lookup.add(new Token("a"));
lookup.finishCollection();
assertEquals(new Token("a"), lookup.average().getToken());
lookup.resetFinishding();
lookup.add(new Token("b"));
lookup.add(new Token("b"));
lookup.finishCollection();
assertEquals(new Token("b"), lookup.average().getToken());
}
@Test
public void allPossible() {
lookup.finishCollection();
assertIterableEquals(Arrays.asList(new Token("a")), lookup.allPossible());
lookup.resetFinishding();
lookup.add(new Token("b"));
assertIterableEquals(Arrays.asList(new Token("a"), new Token("b")), lookup.allPossible());
lookup.finishCollection();
assertEquals(new HashSet<>(Arrays.asList(new Token("a"), new Token("b"))),
lookup.allPossible());
lookup.resetFinishding();
lookup.add(new Token("c"));
assertIterableEquals(
Arrays.asList(new Token("a"), new Token("b"), new Token("c")),
lookup.finishCollection();
assertEquals(new HashSet<>(Arrays.asList(new Token("a"), new Token("b"), new Token("c"))),
lookup.allPossible());
lookup.resetFinishding();
lookup.add(new Token("a"));
assertIterableEquals(
Arrays.asList(new Token("a"), new Token("b"), new Token("c")),
lookup.finishCollection();
assertEquals(new HashSet<>(Arrays.asList(new Token("a"), new Token("b"), new Token("c"))),
lookup.allPossible());
}
......@@ -110,10 +128,15 @@ public class LookupTests {
@Test
public void getDistinctTokens() {
lookup.finishCollection();
assertEquals(1, lookup.getDistinctTokens());
lookup.resetFinishding();
lookup.add(new Token("b"));
lookup.finishCollection();
assertEquals(2, lookup.getDistinctTokens());
lookup.resetFinishding();
lookup.add(new Token("b"));
lookup.finishCollection();
assertEquals(2, lookup.getDistinctTokens());
}
}
\ No newline at end of file
package markov.stuff;
package markov.huffman;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Nested;
......
package markov.huffman;
import com.tomgibara.bits.BitVector;
import markov.stuff.CountMap;
import org.junit.jupiter.api.Test;
import java.util.BitSet;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
class HuffmanCodeBuilderTest {
// @Test
......@@ -45,21 +39,21 @@ class HuffmanCodeBuilderTest {
@Test
public void testBitVectorFromTomgibara() {
BitVector _ = new BitVector(0);
assertEquals(0, _.size());
assertEquals("", _.toString());
BitVector bitVector = new BitVector(0);
assertEquals(0, bitVector.size());
assertEquals("", bitVector.toString());
BitVector _0 = _.resizedCopy(1, true);
BitVector _0 = bitVector.resizedCopy(1, true);
_0.setBit(0, false);
assertEquals(0, _.size());
assertEquals("", _.toString());
assertEquals(0, bitVector.size());
assertEquals("", bitVector.toString());
assertEquals(1, _0.size());
assertEquals("0", _0.toString());
BitVector _1 = _.resizedCopy(1, true);
BitVector _1 = bitVector.resizedCopy(1, true);
_1.setBit(0, true);
assertEquals(0, _.size());
assertEquals("", _.toString());
assertEquals(0, bitVector.size());
assertEquals("", bitVector.toString());
assertEquals(1, _0.size());
assertEquals("0", _0.toString());
assertEquals(1, _1.size());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment