Commit c64692b3 by Hut

Fixed HuffmanCodeBuilder after update.

parent 24068248
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version> <java.version>1.9</java.version>
<junit.jupiter.version>5.0.3</junit.jupiter.version> <junit.jupiter.version>5.0.3</junit.jupiter.version>
<junit.platform.version>1.0.3</junit.platform.version> <junit.platform.version>1.0.3</junit.platform.version>
</properties> </properties>
......
...@@ -34,4 +34,9 @@ public class Decission { ...@@ -34,4 +34,9 @@ public class Decission {
return id; return id;
} }
@Override
public String toString() {
return "Decission{" + "token=" + token + ", id=" + id + ", propability=" + propability +
'}';
}
} }
...@@ -13,13 +13,12 @@ public class Lookup implements Serializable { ...@@ -13,13 +13,12 @@ public class Lookup implements Serializable {
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<>(); private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<>();
private Decission[] finalData; private Decission[] finalData;
private long[] finishedSums; private int[] finishedSums;
private boolean isFinishedCollecting = false; private boolean isFinishedCollecting = false;
private int totalCounts = 0; private int totalCounts = 0;
int getDistinctTokens() { public int getDistinctTokens() {
finishGuard(); finishGuard();
return finalData.length; return finalData.length;
} }
...@@ -66,13 +65,17 @@ public class Lookup implements Serializable { ...@@ -66,13 +65,17 @@ public class Lookup implements Serializable {
} }
public Set<Token> allPossible() { public Set<Token> allPossible() {
return this.tokens.keySet(); finishGuard();
return Arrays.asList(finalData)
.stream()
.map(Decission::getToken)
.collect(Collectors.toSet());
} }
void finishCollection() { void finishCollection() {
if (!isFinishedCollecting) { if (!isFinishedCollecting) {
int size = tokens.size(); int size = tokens.size();
finishedSums = new long[size]; finishedSums = new int[size];
finalData = new Decission[size]; finalData = new Decission[size];
List<Entry<Token, Integer>> orderedEntries = tokens.entrySet() List<Entry<Token, Integer>> orderedEntries = tokens.entrySet()
.stream() .stream()
...@@ -86,8 +89,7 @@ public class Lookup implements Serializable { ...@@ -86,8 +89,7 @@ public class Lookup implements Serializable {
finishedSums[i] = sum; finishedSums[i] = sum;
finalData[i] = new Decission(entry.getKey(), finalData[i] = new Decission(entry.getKey(),
i, i,
this, this, (double) entry.getValue() / getTotalCounts());
entry.getValue() / getTotalCounts());
} }
this.tokens.clear(); this.tokens.clear();
isFinishedCollecting = true; isFinishedCollecting = true;
...@@ -98,21 +100,27 @@ public class Lookup implements Serializable { ...@@ -98,21 +100,27 @@ public class Lookup implements Serializable {
return totalCounts; return totalCounts;
} }
@Override
public String toString() {
return "Lookup [tokens= " + tokens.entrySet().stream().sorted((e1, e2) -> Integer.compare(e2
.getValue(), e1.getValue())).map(e -> String.format("%d*%s",
e.getValue(),
e.getKey())).collect(Collectors.joining(", ")) + "]";
}
/**
* Not thread save!
*/
void resetFinishding() { void resetFinishding() {
int sum = 0;
for (int i = 0; i < finalData.length; i++) {
this.tokens.put(finalData[i].getToken(), finishedSums[i] - sum);
sum += finishedSums[i];
}
this.isFinishedCollecting = false; this.isFinishedCollecting = false;
this.finishedSums = null; this.finishedSums = null;
this.finalData = null; this.finalData = null;
} }
@Override
public String toString() {
return isFinishedCollecting ? "Lookup" :
("Lookup [tokens= " + tokens.entrySet()
.stream()
.sorted((e1, e2) -> Integer.compare(e2.getValue(),
e1.getValue()))
.map(e -> String.format("%d*%s", e.getValue(), e.getKey()))
.collect(Collectors.joining(", ")) + "]");
}
} }
...@@ -4,6 +4,10 @@ import com.google.common.base.Stopwatch; ...@@ -4,6 +4,10 @@ import com.google.common.base.Stopwatch;
import directory.passive.huffman.ByteHuffmanCodeBuilder; import directory.passive.huffman.ByteHuffmanCodeBuilder;
import directory.passive.huffman.HuffmanCode; import directory.passive.huffman.HuffmanCode;
import markov.huffman.HuffmanStore; import markov.huffman.HuffmanStore;
import markov.shortener.ShortenerByteHUffmanTrainerImpl;
import markov.shortener.ShortenerByteHuffmanImpl;
import markov.shortener.ShortenerIntHuffmanImpl;
import markov.shortener.ShortenerSimpleImpl;
import markov.stuff.CountMap; import markov.stuff.CountMap;
import markov.stuff.Inspector; import markov.stuff.Inspector;
import markov.stuff.SimpleCountMap; import markov.stuff.SimpleCountMap;
...@@ -191,20 +195,16 @@ public class Mail { ...@@ -191,20 +195,16 @@ public class Mail {
} }
} }
@SuppressWarnings("unchecked")
private Map<Integer, CountMap<Integer>> readIntMap() { private Map<Integer, CountMap<Integer>> readIntMap() {
HuffmanStore<Integer> store = readHUffmanMap(intsFileLocation); HuffmanStore<Integer> store = readHUffmanMap(intsFileLocation);
return store.getInternal(); return store.getInternal();
} }
@SuppressWarnings("unchecked")
private Map<Integer, CountMap<Byte>> readByteMap() { private Map<Integer, CountMap<Byte>> readByteMap() {
HuffmanStore<Byte> store = readHUffmanMap(bytesFileLocation); HuffmanStore<Byte> store = readHUffmanMap(bytesFileLocation);
return store.getInternal(); return store.getInternal();
} }
// we controll whats in the file
@SuppressWarnings("unchecked")
private <X extends Serializable> HuffmanStore<X> readHUffmanMap(String location) { private <X extends Serializable> HuffmanStore<X> readHUffmanMap(String location) {
File file = Utils.getFile(location); File file = Utils.getFile(location);
System.out.println("reading from: " + file.getAbsolutePath()); System.out.println("reading from: " + file.getAbsolutePath());
......
...@@ -4,9 +4,8 @@ import java.io.Serializable; ...@@ -4,9 +4,8 @@ import java.io.Serializable;
public class Token implements Serializable, Comparable<Token> { public class Token implements Serializable, Comparable<Token> {
public static final Token START = new SpecialToken("START"); static final Token START = new SpecialToken("START");
public static final Token END = new SpecialToken("END"); static final Token END = new SpecialToken("END");
protected static final Token EMPTY = new SpecialToken("EMPTY");
public Token(String content) { public Token(String content) {
this(content, Glyph.Type.word); this(content, Glyph.Type.word);
...@@ -29,9 +28,6 @@ public class Token implements Serializable, Comparable<Token> { ...@@ -29,9 +28,6 @@ public class Token implements Serializable, Comparable<Token> {
if (this.equals(START)) { if (this.equals(START)) {
return "TOKEN <Start>"; return "TOKEN <Start>";
} }
if (this.equals(EMPTY)) {
return "TOKEN <empty>";
}
return "T=[" + content + "]"; return "T=[" + content + "]";
} }
......
...@@ -35,7 +35,7 @@ public class Tokenizer { ...@@ -35,7 +35,7 @@ public class Tokenizer {
do { do {
c = c.p; c = c.p;
tokenGlyphs.add(0, c.self); tokenGlyphs.add(0, c.self);
} while (c != null && c.p != null && c.p.self != null && } while (c.p != null && c.p.self != null &&
c.self.getType().equals(c.p.self.getType())); c.self.getType().equals(c.p.self.getType()));
go.p = null; // memory optimization go.p = null; // memory optimization
return Stream.<List<Glyph>>builder().add(tokenGlyphs).build(); return Stream.<List<Glyph>>builder().add(tokenGlyphs).build();
......
package markov; package markov.shortener;
import markov.Data;
import markov.stuff.CountMap; import markov.stuff.CountMap;
import markov.stuff.SimpleCountMap; import markov.stuff.SimpleCountMap;
import markov.stuff.Utils; import markov.stuff.Utils;
......
package markov; package markov.shortener;
import directory.passive.huffman.HuffmanCode; import directory.passive.huffman.HuffmanCode;
import markov.stuff.BitConverter; import markov.Data;
import markov.huffman.BitConverter;
import markov.stuff.Utils; import markov.stuff.Utils;
import java.util.Base64; import java.util.Base64;
......
package markov; package markov.shortener;
import directory.passive.huffman.HuffmanCode; import directory.passive.huffman.HuffmanCode;
import markov.stuff.BitConverter; import markov.Data;
import markov.huffman.BitConverter;
import java.util.Base64; import java.util.Base64;
import java.util.List; import java.util.List;
......
package markov; package markov.shortener;
import markov.Data;
import markov.Decission;
import markov.Lookup;
import markov.Prefix;
import markov.Sentence;
import markov.Shortener;
import markov.Token;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.Base64; import java.util.Base64;
......
package markov.stuff;
import com.tomgibara.bits.Bits;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class BitConverter {
public byte[] toBytes(List<Boolean> list) {
int listLength = list.size();
int lengthInBit = listLength + 3;
int arrayLength = Math.max(1, (int) Math.ceil(lengthInBit / 8d));
byte[] bytes = new byte[arrayLength];
int lastByteNotEmpty = lengthInBit % 8;
// temp variable to not constantly write to heap
byte tmpByte = ((byte) lastByteNotEmpty);
int i = 0;
// set 3 bits for the lastByteNotEmpty number
for (int j = 3; j < 8 && i < listLength; j++, i++) {
tmpByte = setBit(list, tmpByte, i, j);
}
bytes[0] = tmpByte;
// set rest of the bits
for (int j = 1; j < arrayLength; j++) {
tmpByte = 0;
for (int l = 0; l < 8 && i < listLength; l++, i++) {
tmpByte = setBit(list, tmpByte, i, l);
}
bytes[j] = tmpByte;
}
return bytes;
}
private byte setBit(List<Boolean> list, byte tmpByte, int i, int j) {
tmpByte |= (byte) (list.get(i) ? 1 : 0) << j;
return tmpByte;
}
public List<Boolean> toBits(byte[] array) {
int arrayLength = array.length;
if (arrayLength == 0) {
return Collections.emptyList();
}
// determine lastByteNotEmpty bits
byte tmpByte = array[0];
int lastByteNotEmpty = tmpByte & 0b00000111;
int listSize = 8 * arrayLength + (lastByteNotEmpty == 0 ? -3 : lastByteNotEmpty - 11);
if (arrayLength == 1 && lastByteNotEmpty < 4 && lastByteNotEmpty != 0) {
throw new IllegalArgumentException("corrupted data");
}
// List<Boolean> list = toListBits(array, arrayLength, listSize);
List<Boolean> list = toListPrimitiv(listSize, array);
return list;
}
private List<Boolean> toListBits(byte[] array, int arrayLength, int listSize) {
List<Boolean> list = new ArrayList<>(Bits.asStore(array).asList());
// // remove lastByteNotEmpty bits
list.remove(0);
list.remove(0);
list.remove(0);
int maxIndex = arrayLength * 8 - 4; // BitStore uses all 8 bits per byte
for (int i = maxIndex; i >= listSize; i--) {
list.remove(i);
}
return list;
}
private static List<Boolean> toListPrimitiv(int listSize, byte[] array) {
List<Boolean> list = new ArrayList<>(listSize);
for (int i = 3; i < listSize + 3; i++) {
list.add((array[i / 8] & 1 << (i % 8)) >> i % 8 == 1);
}
return list;
}
}
...@@ -34,9 +34,15 @@ public class DataTests { ...@@ -34,9 +34,15 @@ public class DataTests {
@Test @Test
public void canReceieve() { public void canReceieve() {
data.finish();
assertTrue(data.fetch(pa).allPossible().contains(new Token("a"))); assertTrue(data.fetch(pa).allPossible().contains(new Token("a")));
assertEquals(1, data.fetch(pa).allPossible().size()); assertEquals(1, data.fetch(pa).allPossible().size());
}
@Test
public void canRecive2() {
data.add(pa, new Token("b")); data.add(pa, new Token("b"));
data.finish();
assertTrue(data.fetch(pa).allPossible().contains(new Token("b"))); assertTrue(data.fetch(pa).allPossible().contains(new Token("b")));
assertEquals(2, data.fetch(pa).allPossible().size()); assertEquals(2, data.fetch(pa).allPossible().size());
} }
......
...@@ -4,6 +4,7 @@ import org.junit.jupiter.api.BeforeEach; ...@@ -4,6 +4,7 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashSet;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals; import static org.junit.jupiter.api.Assertions.assertIterableEquals;
...@@ -11,6 +12,7 @@ import static org.junit.jupiter.api.Assertions.assertNotEquals; ...@@ -11,6 +12,7 @@ import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
// TODO split this methods
public class LookupTests { public class LookupTests {
Lookup lookup; Lookup lookup;
...@@ -25,16 +27,17 @@ public class LookupTests { ...@@ -25,16 +27,17 @@ public class LookupTests {
@Test @Test
public void add() { public void add() {
lookup.add(new Token("b")); lookup.add(new Token("b"));
lookup.finishCollection();
assertTrue(lookup.allPossible().contains(new Token("b"))); assertTrue(lookup.allPossible().contains(new Token("b")));
assertTrue(lookup.allPossible().contains(new Token("a"))); assertTrue(lookup.allPossible().contains(new Token("a")));
} }
@Test @Test
public void forRandom() { public void forRandom() {
lookup.finishCollection();
assertEquals(lookup.forRandom(0).getToken(), new Token("a")); assertEquals(lookup.forRandom(0).getToken(), new Token("a"));
assertEquals(lookup.forRandom(0.5d).getToken(), new Token("a")); assertEquals(lookup.forRandom(0.5d).getToken(), new Token("a"));
assertEquals(lookup.forRandom(0.999d).getToken(), new Token("a")); assertEquals(lookup.forRandom(0.999d).getToken(), new Token("a"));
// assertEquals(lookup.forRandom(1), new Token("a"));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1)); assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(-1)); assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(-1));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1.001d)); assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1.001d));
...@@ -42,14 +45,18 @@ public class LookupTests { ...@@ -42,14 +45,18 @@ public class LookupTests {
lookup.resetFinishding(); lookup.resetFinishding();
lookup.add(new Token("b")); lookup.add(new Token("b"));
lookup.finishCollection();
Decission a = lookup.forRandom(0); Decission a = lookup.forRandom(0);
Decission b = lookup.forRandom(0.9d); Decission b = lookup.forRandom(0.9d);
assertNotEquals(a, b); assertNotEquals(a, b);
lookup.resetFinishding(); lookup.resetFinishding();
lookup.add(new Token("c")); lookup.add(new Token("c"));
lookup.finishCollection();
a = lookup.forRandom(0); a = lookup.forRandom(0);
b = lookup.forRandom(0.51d); b = lookup.forRandom(0.51d);
lookup.finishCollection();
Decission c = lookup.forRandom(0.9d); Decission c = lookup.forRandom(0.9d);
assertEquals(new Token("a"), a.getToken()); assertEquals(new Token("a"), a.getToken());
assertEquals(new Token("b"), b.getToken()); assertEquals(new Token("b"), b.getToken());
...@@ -59,6 +66,7 @@ public class LookupTests { ...@@ -59,6 +66,7 @@ public class LookupTests {
lookup.add(new Token("c")); lookup.add(new Token("c"));
lookup.add(new Token("c")); lookup.add(new Token("c"));
lookup.finishCollection();
assertEquals(new Token("a"), lookup.forRandom(0).getToken()); assertEquals(new Token("a"), lookup.forRandom(0).getToken());
assertEquals(new Token("a"), lookup.forRandom(0.19d).getToken()); assertEquals(new Token("a"), lookup.forRandom(0.19d).getToken());
assertEquals(new Token("b"), lookup.forRandom(0.2d).getToken()); assertEquals(new Token("b"), lookup.forRandom(0.2d).getToken());
...@@ -69,33 +77,43 @@ public class LookupTests { ...@@ -69,33 +77,43 @@ public class LookupTests {
@Test @Test
public void average() { public void average() {
lookup.finishCollection();
assertEquals(new Token("a"), lookup.average().getToken()); assertEquals(new Token("a"), lookup.average().getToken());
lookup.resetFinishding(); lookup.resetFinishding();
lookup.add(new Token("b")); lookup.add(new Token("b"));
lookup.finishCollection();
Token token = lookup.average().getToken(); Token token = lookup.average().getToken();
assertTrue(() -> token.equals(new Token("a")) || token.equals(new Token("b"))); assertTrue(() -> token.equals(new Token("a")) || token.equals(new Token("b")));
lookup.resetFinishding(); lookup.resetFinishding();
lookup.add(new Token("a")); lookup.add(new Token("a"));
lookup.finishCollection();
assertEquals(new Token("a"), lookup.average().getToken()); assertEquals(new Token("a"), lookup.average().getToken());
lookup.resetFinishding(); lookup.resetFinishding();
lookup.add(new Token("b")); lookup.add(new Token("b"));
lookup.add(new Token("b")); lookup.add(new Token("b"));
lookup.finishCollection();
assertEquals(new Token("b"), lookup.average().getToken()); assertEquals(new Token("b"), lookup.average().getToken());
} }
@Test @Test
public void allPossible() { public void allPossible() {
lookup.finishCollection();
assertIterableEquals(Arrays.asList(new Token("a")), lookup.allPossible()); assertIterableEquals(Arrays.asList(new Token("a")), lookup.allPossible());
lookup.resetFinishding();
lookup.add(new Token("b")); lookup.add(new Token("b"));
assertIterableEquals(Arrays.asList(new Token("a"), new Token("b")), lookup.allPossible()); lookup.finishCollection();
assertEquals(new HashSet<>(Arrays.asList(new Token("a"), new Token("b"))),
lookup.allPossible());
lookup.resetFinishding();
lookup.add(new Token("c")); lookup.add(new Token("c"));
assertIterableEquals( lookup.finishCollection();
Arrays.asList(new Token("a"), new Token("b"), new Token("c")), assertEquals(new HashSet<>(Arrays.asList(new Token("a"), new Token("b"), new Token("c"))),
lookup.allPossible()); lookup.allPossible());
lookup.resetFinishding();
lookup.add(new Token("a")); lookup.add(new Token("a"));
assertIterableEquals( lookup.finishCollection();
Arrays.asList(new Token("a"), new Token("b"), new Token("c")), assertEquals(new HashSet<>(Arrays.asList(new Token("a"), new Token("b"), new Token("c"))),
lookup.allPossible()); lookup.allPossible());
} }
...@@ -110,10 +128,15 @@ public class LookupTests { ...@@ -110,10 +128,15 @@ public class LookupTests {
@Test @Test
public void getDistinctTokens() { public void getDistinctTokens() {
lookup.finishCollection();
assertEquals(1, lookup.getDistinctTokens()); assertEquals(1, lookup.getDistinctTokens());
lookup.resetFinishding();
lookup.add(new Token("b")); lookup.add(new Token("b"));
lookup.finishCollection();
assertEquals(2, lookup.getDistinctTokens()); assertEquals(2, lookup.getDistinctTokens());
lookup.resetFinishding();
lookup.add(new Token("b")); lookup.add(new Token("b"));
lookup.finishCollection();
assertEquals(2, lookup.getDistinctTokens()); assertEquals(2, lookup.getDistinctTokens());
} }
} }
\ No newline at end of file
package markov.stuff; package markov.huffman;
import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Nested;
......
package markov.huffman; package markov.huffman;
import com.tomgibara.bits.BitVector; import com.tomgibara.bits.BitVector;
import markov.stuff.CountMap;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.util.BitSet;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
class HuffmanCodeBuilderTest { class HuffmanCodeBuilderTest {
// @Test // @Test
...@@ -45,21 +39,21 @@ class HuffmanCodeBuilderTest { ...@@ -45,21 +39,21 @@ class HuffmanCodeBuilderTest {
@Test @Test
public void testBitVectorFromTomgibara() { public void testBitVectorFromTomgibara() {
BitVector _ = new BitVector(0); BitVector bitVector = new BitVector(0);
assertEquals(0, _.size()); assertEquals(0, bitVector.size());
assertEquals("", _.toString()); assertEquals("", bitVector.toString());
BitVector _0 = _.resizedCopy(1, true); BitVector _0 = bitVector.resizedCopy(1, true);
_0.setBit(0, false); _0.setBit(0, false);
assertEquals(0, _.size()); assertEquals(0, bitVector.size());
assertEquals("", _.toString()); assertEquals("", bitVector.toString());
assertEquals(1, _0.size()); assertEquals(1, _0.size());
assertEquals("0", _0.toString()); assertEquals("0", _0.toString());
BitVector _1 = _.resizedCopy(1, true); BitVector _1 = bitVector.resizedCopy(1, true);
_1.setBit(0, true); _1.setBit(0, true);
assertEquals(0, _.size()); assertEquals(0, bitVector.size());
assertEquals("", _.toString()); assertEquals("", bitVector.toString());
assertEquals(1, _0.size()); assertEquals(1, _0.size());
assertEquals("0", _0.toString()); assertEquals("0", _0.toString());
assertEquals(1, _1.size()); assertEquals(1, _1.size());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment