Commit f6319bee by Hut

bugfixes

parent c64692b3
package markov;
import java.util.AbstractMap;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Function;
......
......@@ -22,7 +22,7 @@ public class Data implements Serializable{
}
public void add(Prefix p, Token t) {
Lookup l = data.getOrDefault(p, new Lookup());
Lookup l = data.getOrDefault(p, new LookupImpl());
l.add(t);
data.put(p, l);
}
......
package markov;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
public class Lookup implements Serializable {
public interface Lookup extends Serializable {
int getDistinctTokens();
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<>();
private Decission[] finalData;
private int[] finishedSums;
private boolean isFinishedCollecting = false;
void add(Token t);
private int totalCounts = 0;
Decission forRandom(double random);
public int getDistinctTokens() {
finishGuard();
return finalData.length;
}
Decission forId(int id);
private void finishGuard() {
if (!isFinishedCollecting) {
throw new IllegalStateException("not jet finished");
}
}
Decission average();
public void add(Token t) {
if (isFinishedCollecting) {
throw new IllegalStateException("already finished");
}
Integer i = tokens.getOrDefault(t, 0);
tokens.put(t, i + 1);
totalCounts++;
}
public Decission forRandom(double random) {
finishGuard();
if (random < 0 || random >= 1) {
throw new IllegalArgumentException("expected double [0; 1)");
}
int id = (int) Math.floor(random * totalCounts);
int i = Arrays.binarySearch(finishedSums, id);
i = Math.min(i >= 0 ? i + 1 : -1 * (i + 1), finishedSums.length - 1);
return finalData[i];
}
public Decission forId(int id) {
finishGuard();
if (id < 0 || id > finishedSums.length) {
throw new IllegalArgumentException(String.format("got %d but expected id [0; %d)",
id,
finishedSums.length));
}
return finalData[id];
}
public Decission average() {
finishGuard();
return finalData[this.finishedSums.length - 1];
}
public Set<Token> allPossible() {
finishGuard();
return Arrays.asList(finalData)
.stream()
.map(Decission::getToken)
.collect(Collectors.toSet());
}
void finishCollection() {
if (!isFinishedCollecting) {
int size = tokens.size();
finishedSums = new int[size];
finalData = new Decission[size];
List<Entry<Token, Integer>> orderedEntries = tokens.entrySet()
.stream()
.sequential()
.sorted(Comparator.comparingInt(Entry::getValue))
.collect(Collectors.toList());
int sum = 0;
for (int i = 0; i < orderedEntries.size(); i++) {
Entry<Token, Integer> entry = orderedEntries.get(i);
sum += entry.getValue();
finishedSums[i] = sum;
finalData[i] = new Decission(entry.getKey(),
i,
this, (double) entry.getValue() / getTotalCounts());
}
this.tokens.clear();
isFinishedCollecting = true;
}
}
int getTotalCounts() {
return totalCounts;
}
void resetFinishding() {
int sum = 0;
for (int i = 0; i < finalData.length; i++) {
this.tokens.put(finalData[i].getToken(), finishedSums[i] - sum);
sum += finishedSums[i];
}
this.isFinishedCollecting = false;
this.finishedSums = null;
this.finalData = null;
}
@Override
public String toString() {
return isFinishedCollecting ? "Lookup" :
("Lookup [tokens= " + tokens.entrySet()
.stream()
.sorted((e1, e2) -> Integer.compare(e2.getValue(),
e1.getValue()))
.map(e -> String.format("%d*%s", e.getValue(), e.getKey()))
.collect(Collectors.joining(", ")) + "]");
}
Set<Token> allPossible();
void finishCollection();
int getTotalCounts();
}
package markov;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
public class LookupImpl implements Lookup {
private static final long serialVersionUID = 8382785417663574549L;
private Lookup innerLookup = new CollectingLookup();
private int totalCounts = 0;
private class CollectingLookup implements Lookup {
private static final long serialVersionUID = 8508868695745897489L;
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<>();
@Override
public int getDistinctTokens() {
throw newISE();
}
private IllegalStateException newISE() {
return new IllegalStateException("call finishCollecting first");
}
@Override
public void add(Token t) {
Integer i = tokens.getOrDefault(t, 0);
tokens.put(t, i + 1);
totalCounts++;
}
@Override
public Decission forRandom(double random) {
throw newISE();
}
@Override
public Decission forId(int id) {
throw newISE();
}
@Override
public Decission average() {
throw newISE();
}
@Override
public Set<Token> allPossible() {
throw newISE();
}
@Override
public void finishCollection() {
int size = tokens.size();
int[] finishedSums = new int[size];
Decission[] finalData = new Decission[size];
List<Entry<Token, Integer>> orderedEntries = tokens.entrySet()
.stream()
.sequential()
.sorted(Comparator.comparingInt(Entry::getValue))
.collect(Collectors.toList());
int sum = 0;
for (int i = 0; i < orderedEntries.size(); i++) {
Entry<Token, Integer> entry = orderedEntries.get(i);
sum += entry.getValue();
finishedSums[i] = sum;
finalData[i] = new Decission(entry.getKey(),
i,
LookupImpl.this,
(double) entry.getValue() / totalCounts);
}
innerLookup = new OptimizedLookup(finalData, finishedSums);
}
@Override
public int getTotalCounts() {
throw newISE();
}
}
private class OptimizedLookup implements Lookup {
private static final long serialVersionUID = -8666528642123058396L;
private Decission[] finalData;
private int[] finishedSums;
OptimizedLookup(Decission[] finalData, int[] finishedSums) {
this.finalData = finalData;
this.finishedSums = finishedSums;
}
@Override
public int getDistinctTokens() {
return finalData.length;
}
@Override
public void add(Token t) {
throw new IllegalStateException("already optimized");
}
@Override
public Decission forRandom(double random) {
if (random < 0 || random >= 1) {
throw new IllegalArgumentException("expected double [0; 1)");
}
if (totalCounts == 0) {
return null;
}
int id = (int) Math.floor(random * totalCounts);
int i = Arrays.binarySearch(finishedSums, id);
i = Math.min(i >= 0 ? i + 1 : -1 * (i + 1), finishedSums.length - 1);
return finalData[i];
}
@Override
public Decission forId(int id) {
if (id < 0 || id >= finalData.length || totalCounts == 0) {
throw new IllegalArgumentException(String.format("got %d but expected id [0; %d)",
id,
finishedSums.length));
}
return finalData[id];
}
@Override
public Decission average() {
if (totalCounts == 0) {
return null;
}
return finalData[this.finishedSums.length - 1];
}
@Override
public Set<Token> allPossible() {
return Arrays.stream(finalData).map(Decission::getToken).collect(Collectors.toSet());
}
@Override
public void finishCollection() {
throw new IllegalStateException("already finished");
}
@Override
public int getTotalCounts() {
return totalCounts;
}
}
@Override
public int getDistinctTokens() {
return innerLookup.getDistinctTokens();
}
@Override
public void add(Token t) {
innerLookup.add(t);
}
@Override
public Decission forRandom(double random) {
return innerLookup.forRandom(random);
}
@Override
public Decission forId(int id) {
return innerLookup.forId(id);
}
@Override
public Decission average() {
return innerLookup.average();
}
@Override
public Set<Token> allPossible() {
return innerLookup.allPossible();
}
@Override
public void finishCollection() {
innerLookup.finishCollection();
}
@Override
public int getTotalCounts() {
return innerLookup.getTotalCounts();
}
}
......@@ -237,10 +237,8 @@ public class Mail {
statistics.getAverage(),
statistics.getMin(),
statistics.getMax()));
System.out.println("SD: " + Math.sqrt(stats.stream()
.mapToDouble(e -> (double) (Math.pow(
e - statistics.getAverage(),
2)))
System.out.println("SD: " + Math.sqrt(stats.stream().mapToDouble(e -> Math.pow(
e - statistics.getAverage(), 2))
.sum() / iterations));
}
}
......
package markov;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
......
package markov;
import java.io.Serializable;
public class Token implements Serializable, Comparable<Token> {
static final Token START = new SpecialToken("START");
static final Token END = new SpecialToken("END");
private final int hashCode;
private String content;
private Glyph.Type type;
public Token(String content) {
this(content, Glyph.Type.word);
}
......@@ -17,18 +20,15 @@ public class Token implements Serializable, Comparable<Token> {
this.hashCode = generateHashCode();
}
private String content;
private Glyph.Type type;
private int generateHashCode() {
int result = content != null ? content.hashCode() : 0;
result = 31 * result + (type != null ? type.hashCode() : 0);
return result;
}
@Override
public String toString() {
if (this.equals(END)) {
return "Token <END>";
}
if (this.equals(START)) {
return "TOKEN <Start>";
}
return "T=[" + content + "]";
public int hashCode() {
return hashCode;
}
@Override
......@@ -42,17 +42,15 @@ public class Token implements Serializable, Comparable<Token> {
type == token.type;
}
private final int hashCode;
private int generateHashCode() {
int result = content != null ? content.hashCode() : 0;
result = 31 * result + (type != null ? type.hashCode() : 0);
return result;
}
@Override
public int hashCode() {
return hashCode;
public String toString() {
if (this.equals(END)) {
return "Token <END>";
}
if (this.equals(START)) {
return "TOKEN <Start>";
}
return "T=[" + content + "]";
}
public String render(String prefix) {
......
......@@ -42,7 +42,7 @@ public class ShortenerByteHUffmanTrainerImpl extends ShortenerSimpleImpl {
if (i < 128) {
return Stream.of((byte) (i | 0b10000000));
} else {
Stream.Builder<Byte> builder = Stream.<Byte>builder();
Stream.Builder<Byte> builder = Stream.builder();
for (byte b : Utils.toByteArray(i)) {
builder.add(b);
}
......
......@@ -28,7 +28,7 @@ public class ShortenerByteHuffmanImpl extends ShortenerSimpleImpl {
if (i < 128) {
return Stream.of((byte) (i | 0b10000000));
} else {
Stream.Builder<Byte> builder = Stream.<Byte>builder();
Stream.Builder<Byte> builder = Stream.builder();
for (byte b : Utils.toByteArray(i)) {
builder.add(b);
}
......
......@@ -2,7 +2,8 @@ package markov;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
class GlyphTest {
......@@ -16,11 +17,6 @@ class GlyphTest {
assertEquals(a, a2);
assertNotEquals(a, w);
assertNotEquals(a, c);
// assertNotEquals(Glyph.End, c);
// assertNotEquals(Glyph.Start, c);
// assertNotEquals(Glyph.Start, Glyph.End);
// assertEquals(Glyph.Start, Glyph.Start);
// assertEquals(Glyph.End, Glyph.End);
}
}
\ No newline at end of file
package markov;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class ParserStreamStyleTest {
@Test
void tests() {
// Collector parserStreamStyle = new Collector(1);
// Assertions.assertIterableEquals(
// Arrays.asList(
// new Token(Glyph.Start.getContent(), Glyph.Type.control),
// new Token("ab"),
// new Token(" ", Glyph.Type.whitespace),
// new Token("c"),
// new Token(Glyph.End.getContent(), Glyph.Type.control)),
// parserStreamStyle.combineToTokens(Stream.of(Glyph.Start,
// new Glyph(Glyph.Type.word, "a"),
// new Glyph(Glyph.Type.word, "b"),
// new Glyph(Glyph.Type.whitespace, " "),
// new Glyph(Glyph.Type.word, "c"),
// Glyph.End, new Glyph(Glyph.Type.empty, "EMPTY")))
// .collect(Collectors.toList()));
}
// @Test
// void test2() {
// Collector parserStreamStyle = new Collector(2);
// Data data = parserStreamStyle.learn(Stream.<String>builder().add("a bc a d_e ").build());
// System.out.println(data.dumpStats());
// System.out.println(data.fetch(new Prefix(Arrays.asList(new Token("e")))).allPossible());
// }
}
\ No newline at end of file
package markov;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class ShortenerByteHuffmanImplTest {
@Test
void random() {
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment