Commit f6319bee by Hut

bugfixes

parent c64692b3
package markov; package markov;
import java.util.AbstractMap;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Function; import java.util.function.Function;
......
...@@ -22,7 +22,7 @@ public class Data implements Serializable{ ...@@ -22,7 +22,7 @@ public class Data implements Serializable{
} }
public void add(Prefix p, Token t) { public void add(Prefix p, Token t) {
Lookup l = data.getOrDefault(p, new Lookup()); Lookup l = data.getOrDefault(p, new LookupImpl());
l.add(t); l.add(t);
data.put(p, l); data.put(p, l);
} }
......
package markov; package markov;
import java.io.Serializable; import java.io.Serializable;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors;
public class Lookup implements Serializable { public interface Lookup extends Serializable {
int getDistinctTokens();
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<>(); void add(Token t);
private Decission[] finalData;
private int[] finishedSums;
private boolean isFinishedCollecting = false;
private int totalCounts = 0; Decission forRandom(double random);
public int getDistinctTokens() { Decission forId(int id);
finishGuard();
return finalData.length;
}
private void finishGuard() { Decission average();
if (!isFinishedCollecting) {
throw new IllegalStateException("not jet finished");
}
}
public void add(Token t) { Set<Token> allPossible();
if (isFinishedCollecting) {
throw new IllegalStateException("already finished");
}
Integer i = tokens.getOrDefault(t, 0);
tokens.put(t, i + 1);
totalCounts++;
}
public Decission forRandom(double random) {
finishGuard();
if (random < 0 || random >= 1) {
throw new IllegalArgumentException("expected double [0; 1)");
}
int id = (int) Math.floor(random * totalCounts);
int i = Arrays.binarySearch(finishedSums, id);
i = Math.min(i >= 0 ? i + 1 : -1 * (i + 1), finishedSums.length - 1);
return finalData[i];
}
public Decission forId(int id) {
finishGuard();
if (id < 0 || id > finishedSums.length) {
throw new IllegalArgumentException(String.format("got %d but expected id [0; %d)",
id,
finishedSums.length));
}
return finalData[id];
}
public Decission average() {
finishGuard();
return finalData[this.finishedSums.length - 1];
}
public Set<Token> allPossible() {
finishGuard();
return Arrays.asList(finalData)
.stream()
.map(Decission::getToken)
.collect(Collectors.toSet());
}
void finishCollection() {
if (!isFinishedCollecting) {
int size = tokens.size();
finishedSums = new int[size];
finalData = new Decission[size];
List<Entry<Token, Integer>> orderedEntries = tokens.entrySet()
.stream()
.sequential()
.sorted(Comparator.comparingInt(Entry::getValue))
.collect(Collectors.toList());
int sum = 0;
for (int i = 0; i < orderedEntries.size(); i++) {
Entry<Token, Integer> entry = orderedEntries.get(i);
sum += entry.getValue();
finishedSums[i] = sum;
finalData[i] = new Decission(entry.getKey(),
i,
this, (double) entry.getValue() / getTotalCounts());
}
this.tokens.clear();
isFinishedCollecting = true;
}
}
int getTotalCounts() {
return totalCounts;
}
void resetFinishding() {
int sum = 0;
for (int i = 0; i < finalData.length; i++) {
this.tokens.put(finalData[i].getToken(), finishedSums[i] - sum);
sum += finishedSums[i];
}
this.isFinishedCollecting = false;
this.finishedSums = null;
this.finalData = null;
}
@Override
public String toString() {
return isFinishedCollecting ? "Lookup" :
("Lookup [tokens= " + tokens.entrySet()
.stream()
.sorted((e1, e2) -> Integer.compare(e2.getValue(),
e1.getValue()))
.map(e -> String.format("%d*%s", e.getValue(), e.getKey()))
.collect(Collectors.joining(", ")) + "]");
}
void finishCollection();
int getTotalCounts();
} }
package markov;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
public class LookupImpl implements Lookup {
private static final long serialVersionUID = 8382785417663574549L;
private Lookup innerLookup = new CollectingLookup();
private int totalCounts = 0;
private class CollectingLookup implements Lookup {
private static final long serialVersionUID = 8508868695745897489L;
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<>();
@Override
public int getDistinctTokens() {
throw newISE();
}
private IllegalStateException newISE() {
return new IllegalStateException("call finishCollecting first");
}
@Override
public void add(Token t) {
Integer i = tokens.getOrDefault(t, 0);
tokens.put(t, i + 1);
totalCounts++;
}
@Override
public Decission forRandom(double random) {
throw newISE();
}
@Override
public Decission forId(int id) {
throw newISE();
}
@Override
public Decission average() {
throw newISE();
}
@Override
public Set<Token> allPossible() {
throw newISE();
}
@Override
public void finishCollection() {
int size = tokens.size();
int[] finishedSums = new int[size];
Decission[] finalData = new Decission[size];
List<Entry<Token, Integer>> orderedEntries = tokens.entrySet()
.stream()
.sequential()
.sorted(Comparator.comparingInt(Entry::getValue))
.collect(Collectors.toList());
int sum = 0;
for (int i = 0; i < orderedEntries.size(); i++) {
Entry<Token, Integer> entry = orderedEntries.get(i);
sum += entry.getValue();
finishedSums[i] = sum;
finalData[i] = new Decission(entry.getKey(),
i,
LookupImpl.this,
(double) entry.getValue() / totalCounts);
}
innerLookup = new OptimizedLookup(finalData, finishedSums);
}
@Override
public int getTotalCounts() {
throw newISE();
}
}
private class OptimizedLookup implements Lookup {
private static final long serialVersionUID = -8666528642123058396L;
private Decission[] finalData;
private int[] finishedSums;
OptimizedLookup(Decission[] finalData, int[] finishedSums) {
this.finalData = finalData;
this.finishedSums = finishedSums;
}
@Override
public int getDistinctTokens() {
return finalData.length;
}
@Override
public void add(Token t) {
throw new IllegalStateException("already optimized");
}
@Override
public Decission forRandom(double random) {
if (random < 0 || random >= 1) {
throw new IllegalArgumentException("expected double [0; 1)");
}
if (totalCounts == 0) {
return null;
}
int id = (int) Math.floor(random * totalCounts);
int i = Arrays.binarySearch(finishedSums, id);
i = Math.min(i >= 0 ? i + 1 : -1 * (i + 1), finishedSums.length - 1);
return finalData[i];
}
@Override
public Decission forId(int id) {
if (id < 0 || id >= finalData.length || totalCounts == 0) {
throw new IllegalArgumentException(String.format("got %d but expected id [0; %d)",
id,
finishedSums.length));
}
return finalData[id];
}
@Override
public Decission average() {
if (totalCounts == 0) {
return null;
}
return finalData[this.finishedSums.length - 1];
}
@Override
public Set<Token> allPossible() {
return Arrays.stream(finalData).map(Decission::getToken).collect(Collectors.toSet());
}
@Override
public void finishCollection() {
throw new IllegalStateException("already finished");
}
@Override
public int getTotalCounts() {
return totalCounts;
}
}
@Override
public int getDistinctTokens() {
return innerLookup.getDistinctTokens();
}
@Override
public void add(Token t) {
innerLookup.add(t);
}
@Override
public Decission forRandom(double random) {
return innerLookup.forRandom(random);
}
@Override
public Decission forId(int id) {
return innerLookup.forId(id);
}
@Override
public Decission average() {
return innerLookup.average();
}
@Override
public Set<Token> allPossible() {
return innerLookup.allPossible();
}
@Override
public void finishCollection() {
innerLookup.finishCollection();
}
@Override
public int getTotalCounts() {
return innerLookup.getTotalCounts();
}
}
...@@ -237,10 +237,8 @@ public class Mail { ...@@ -237,10 +237,8 @@ public class Mail {
statistics.getAverage(), statistics.getAverage(),
statistics.getMin(), statistics.getMin(),
statistics.getMax())); statistics.getMax()));
System.out.println("SD: " + Math.sqrt(stats.stream() System.out.println("SD: " + Math.sqrt(stats.stream().mapToDouble(e -> Math.pow(
.mapToDouble(e -> (double) (Math.pow( e - statistics.getAverage(), 2))
e - statistics.getAverage(),
2)))
.sum() / iterations)); .sum() / iterations));
} }
} }
......
package markov; package markov;
import java.io.Serializable; import java.io.Serializable;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
......
package markov; package markov;
import java.io.Serializable; import java.io.Serializable;
public class Token implements Serializable, Comparable<Token> { public class Token implements Serializable, Comparable<Token> {
static final Token START = new SpecialToken("START"); static final Token START = new SpecialToken("START");
static final Token END = new SpecialToken("END"); static final Token END = new SpecialToken("END");
private final int hashCode;
private String content;
private Glyph.Type type;
public Token(String content) { public Token(String content) {
this(content, Glyph.Type.word); this(content, Glyph.Type.word);
} }
...@@ -17,18 +20,15 @@ public class Token implements Serializable, Comparable<Token> { ...@@ -17,18 +20,15 @@ public class Token implements Serializable, Comparable<Token> {
this.hashCode = generateHashCode(); this.hashCode = generateHashCode();
} }
private String content; private int generateHashCode() {
private Glyph.Type type; int result = content != null ? content.hashCode() : 0;
result = 31 * result + (type != null ? type.hashCode() : 0);
return result;
}
@Override @Override
public String toString() { public int hashCode() {
if (this.equals(END)) { return hashCode;
return "Token <END>";
}
if (this.equals(START)) {
return "TOKEN <Start>";
}
return "T=[" + content + "]";
} }
@Override @Override
...@@ -42,17 +42,15 @@ public class Token implements Serializable, Comparable<Token> { ...@@ -42,17 +42,15 @@ public class Token implements Serializable, Comparable<Token> {
type == token.type; type == token.type;
} }
private final int hashCode;
private int generateHashCode() {
int result = content != null ? content.hashCode() : 0;
result = 31 * result + (type != null ? type.hashCode() : 0);
return result;
}
@Override @Override
public int hashCode() { public String toString() {
return hashCode; if (this.equals(END)) {
return "Token <END>";
}
if (this.equals(START)) {
return "TOKEN <Start>";
}
return "T=[" + content + "]";
} }
public String render(String prefix) { public String render(String prefix) {
......
...@@ -42,7 +42,7 @@ public class ShortenerByteHUffmanTrainerImpl extends ShortenerSimpleImpl { ...@@ -42,7 +42,7 @@ public class ShortenerByteHUffmanTrainerImpl extends ShortenerSimpleImpl {
if (i < 128) { if (i < 128) {
return Stream.of((byte) (i | 0b10000000)); return Stream.of((byte) (i | 0b10000000));
} else { } else {
Stream.Builder<Byte> builder = Stream.<Byte>builder(); Stream.Builder<Byte> builder = Stream.builder();
for (byte b : Utils.toByteArray(i)) { for (byte b : Utils.toByteArray(i)) {
builder.add(b); builder.add(b);
} }
......
...@@ -28,7 +28,7 @@ public class ShortenerByteHuffmanImpl extends ShortenerSimpleImpl { ...@@ -28,7 +28,7 @@ public class ShortenerByteHuffmanImpl extends ShortenerSimpleImpl {
if (i < 128) { if (i < 128) {
return Stream.of((byte) (i | 0b10000000)); return Stream.of((byte) (i | 0b10000000));
} else { } else {
Stream.Builder<Byte> builder = Stream.<Byte>builder(); Stream.Builder<Byte> builder = Stream.builder();
for (byte b : Utils.toByteArray(i)) { for (byte b : Utils.toByteArray(i)) {
builder.add(b); builder.add(b);
} }
......
...@@ -2,7 +2,8 @@ package markov; ...@@ -2,7 +2,8 @@ package markov;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
class GlyphTest { class GlyphTest {
...@@ -16,11 +17,6 @@ class GlyphTest { ...@@ -16,11 +17,6 @@ class GlyphTest {
assertEquals(a, a2); assertEquals(a, a2);
assertNotEquals(a, w); assertNotEquals(a, w);
assertNotEquals(a, c); assertNotEquals(a, c);
// assertNotEquals(Glyph.End, c);
// assertNotEquals(Glyph.Start, c);
// assertNotEquals(Glyph.Start, Glyph.End);
// assertEquals(Glyph.Start, Glyph.Start);
// assertEquals(Glyph.End, Glyph.End);
} }
} }
\ No newline at end of file
package markov;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class ParserStreamStyleTest {
@Test
void tests() {
// Collector parserStreamStyle = new Collector(1);
// Assertions.assertIterableEquals(
// Arrays.asList(
// new Token(Glyph.Start.getContent(), Glyph.Type.control),
// new Token("ab"),
// new Token(" ", Glyph.Type.whitespace),
// new Token("c"),
// new Token(Glyph.End.getContent(), Glyph.Type.control)),
// parserStreamStyle.combineToTokens(Stream.of(Glyph.Start,
// new Glyph(Glyph.Type.word, "a"),
// new Glyph(Glyph.Type.word, "b"),
// new Glyph(Glyph.Type.whitespace, " "),
// new Glyph(Glyph.Type.word, "c"),
// Glyph.End, new Glyph(Glyph.Type.empty, "EMPTY")))
// .collect(Collectors.toList()));
}
// @Test
// void test2() {
// Collector parserStreamStyle = new Collector(2);
// Data data = parserStreamStyle.learn(Stream.<String>builder().add("a bc a d_e ").build());
// System.out.println(data.dumpStats());
// System.out.println(data.fetch(new Prefix(Arrays.asList(new Token("e")))).allPossible());
// }
}
\ No newline at end of file
package markov;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class ShortenerByteHuffmanImplTest {
@Test
void random() {
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment