Commit 6613e459 by Hut

performance stuff

parent e42c4630
......@@ -31,12 +31,11 @@ public class Builder {
Function<Prefix, Decission> producer) {
Sentence sentence = new Sentence(data);
Prefix p = initPrefix();
Decission d = new Decission(Token.START, 0);
Decission d = producer.apply(p);
for (int i = 0; i < 1000; i++) {
if (Token.END.equals(d.getToken()))
break;
d = producer.apply(p);
d.setP(p);
sentence.add(d);
p = p.slide(d.getToken(), prefix_length);
}
......@@ -53,7 +52,6 @@ public class Builder {
throw new NullPointerException(String.format("could not find a lookup for %s", prefix));
}
Decission decission = l.forRandom(nextRandomNumber());
decission.setLookup(l);
return decission;
}
......
package markov;
import java.util.Collection;
import java.util.stream.Stream;
public class Collector {
private final int prefixLength;
private final Prefix[] slider = new Prefix[]{getNewPrefix()};
private Prefix slider = getNewPrefix();
private final Data data;
......@@ -17,12 +14,12 @@ public class Collector {
}
public void learn(Token token) {
data.add(slider[0], token);
slider[0] = slider[0].slide(token, this.prefixLength);
data.add(slider, token);
slider = slider.slide(token, this.prefixLength);
}
public void reset() {
slider[0] = getNewPrefix();
slider = getNewPrefix();
}
private Prefix getNewPrefix() {
......@@ -33,7 +30,10 @@ public class Collector {
return prefixLength;
}
public Data getData() {
public Data finishAndGetData() {
data.finish();
return data;
}
}
......@@ -70,4 +70,7 @@ public class Data implements Serializable{
}
public void finish() {
data.values().forEach(l -> l.finishCollection());
}
}
......@@ -3,27 +3,19 @@ package markov;
public class Decission {
private final Token token;
private final int id;
private Prefix p;
private Lookup lookup;
private final Lookup lookup;
public Decission(Token key, int id) {
token = key;
public Decission(Token token, int id, Lookup lookup) {
this.token = token;
this.id = id;
}
public void setLookup(Lookup lookup) {
this.lookup = lookup;
}
public void setP(Prefix p) {
// public Decission(Token key, int id) {
// token = key;
// this.id = id;
// }
this.p = p;
}
public Prefix getP() {
return p;
}
public Lookup getLookup() {
return lookup;
......
package markov;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.Map.Entry;
import java.util.Set;
......@@ -9,7 +11,13 @@ import java.util.stream.Collectors;
public class Lookup implements Serializable {
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<Token, Integer>();
private int totalCounts =0;
private Decission[] finalData;
private long[] finishedSums;
private boolean isFinishedCollecting = false;
private int totalCounts = 0;
int getTotalCounts() {
return totalCounts;
}
......@@ -23,67 +31,88 @@ public class Lookup implements Serializable {
}
public void add(Token t) {
if (isFinishedCollecting) {
throw new IllegalStateException("already finished");
}
Integer i = tokens.getOrDefault(t, 0);
tokens.put(t, i + 1);
totalCounts++;
}
public Decission forRandom(double random) {
if (!isFinishedCollecting) {
throw new IllegalStateException("not jet finished");
}
if (random < 0 || random >= 1) {
throw new IllegalArgumentException("expected double [0; 1)");
}
int id = (int) Math.floor(random * getTotalCounts());
int i = 0;
for (Entry<Token, Integer> entry : tokens.entrySet()) {
i += entry.getValue();
if (id < i) {
return new Decission(entry.getKey(), id);
}
}
throw new IllegalStateException(String.format(
"failed to find a random token with seed %d within %s", id,
this));
int id = (int) Math.floor(random * totalCounts);
int i = Arrays.binarySearch(finishedSums, id);
i = Math.min(i >= 0 ? i + 1 : -1 * (i + 1), finishedSums.length - 1);
return finalData[i];
}
public Decission forId(int id) {
if (id < 0 || id > getTotalCounts()) {
throw new IllegalArgumentException(String.format("got %d but expected id [0; %d)",id, getTotalCounts()));
if (!isFinishedCollecting) {
throw new IllegalStateException("not jet finished");
}
int i = 0;
for (Entry<Token, Integer> entry : tokens.entrySet()) {
i += entry.getValue();
if (id < i) {
return new Decission(entry.getKey(), id);
if (id < 0 || id > finishedSums.length) {
throw new IllegalArgumentException(
String.format("got %d but expected id [0; %d)", id, finishedSums.length));
}
}
throw new IllegalStateException(String.format(
"failed to find a random token with seed %d within %s", id,
this));
return finalData[id];
}
public Decission average() {
return tokens
.entrySet().stream().sorted((e1, e2) -> Integer
.compare(e2.getValue(), e1.getValue()))
.findFirst().map(e -> new Decission(e.getKey(), e.getValue())).get();
if (!isFinishedCollecting) {
throw new IllegalStateException("not jet finished");
}
return finalData[this.finishedSums.length - 1];
}
public Set<Token> allPossible() {
return this.tokens.keySet();
}
void finishCollection() {
if (!isFinishedCollecting) {
int size = tokens.size();
finishedSums = new long[size];
finalData = new Decission[size];
final int[] i = new int[1];
i[0] = 0;
final long[] sum = new long[1];
sum[0] = 0;
tokens.entrySet()
.stream()
.sequential()
.sorted(Comparator.comparingInt(Entry::getValue))
.forEach(e -> {
sum[0] += e.getValue();
finishedSums[i[0]] = sum[0];
finalData[i[0]] = new Decission(e.getKey(), i[0], this);
i[0]++;
});
this.tokens.clear();
isFinishedCollecting = true;
}
}
@Override
public String toString() {
return "Lookup [tokens= "
+ tokens.entrySet().stream()
.sorted((e1, e2) -> Integer.compare(e2.getValue(),
e1.getValue()))
.map(e -> String.format("%d*%s", e.getValue(),
e.getKey()))
.collect(Collectors.joining(", "))
+ "]";
return "Lookup [tokens= " + tokens.entrySet().stream().sorted(
(e1, e2) -> Integer.compare(e2.getValue(), e1.getValue())).map(
e -> String.format("%d*%s", e.getValue(), e.getKey())).collect(
Collectors.joining(", ")) + "]";
}
/**
* Not thread save!
*/
void resetFinishding() {
this.isFinishedCollecting = false;
this.finishedSums = null;
this.finalData = null;
}
}
......@@ -32,11 +32,11 @@ public class Mail {
"huffmanbytes.file", "classpath:huffmanBytes");
private static final int iterations = 1_000_000;
// private static final int iterations = 500_000;
// private static final int iterations = 10_000;
public static void main(String[] args)
throws IOException, ClassNotFoundException, NoSuchFieldException, IllegalAccessException {
// System.in.read();
System.in.read();
new Mail().timeCreation();
}
......@@ -55,8 +55,7 @@ public class Mail {
Stopwatch s = Stopwatch.createStarted();
for (int i = 0; i < iterations; i++) {
Utils.maybePrintPercentages(i, iterations);
IntStream.range(1, maxPrefix + 1).parallel().forEach(
j -> builder[j].random());
IntStream.range(1, maxPrefix + 1).parallel().forEach(j -> builder[j].random());
}
s.stop();
System.out.println("took: " + s.toString());
......@@ -204,10 +203,6 @@ public class Mail {
Builder b = new Builder(prefixLength, dataMap.get(prefixLength));
List<ShortenerStats> shorter = Arrays.asList(
new ShortenerStats(new ShortenerSimpleImpl(dataMap.get(prefixLength))),
new ShortenerStats(new ShortenerByteImpl(dataMap.get(prefixLength))),
new ShortenerStats(new ShortenerStringImpl(dataMap.get(prefixLength))),
new ShortenerStats(new ShortenerByte2Impl(dataMap.get(prefixLength))),
new ShortenerStats(new ShortenerByte64Impl(dataMap.get(prefixLength))),
new ShortenerStats(new ShortenerIntHuffmanImpl(dataMap.get(prefixLength), intCode)),
new ShortenerStats(
new ShortenerByteHuffmanImpl(dataMap.get(prefixLength), byteCode)));
......
......@@ -19,10 +19,6 @@ public class Main {
for (int i = 0; i < 1; i++) {
System.out.println(r2.render(sentence));
}
ShortenerByteImpl shortener = new ShortenerByteImpl(data);
String id = shortener.getId(sentence);
System.out.println(id);
System.out.println(r.render(shortener.getSentence(id)));
// Collection<Map.Entry<Prefix, Decission>> a = b.average();
......
......@@ -25,6 +25,6 @@ public class Parser {
c.reset();
}
});
return collectors.stream().collect(Collectors.toMap(c -> c.getPrefixLength(), c -> c.getData()));
return collectors.stream().collect(Collectors.toMap(c -> c.getPrefixLength(), c -> c.finishAndGetData()));
}
}
package markov;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
......@@ -23,19 +22,17 @@ public class Prefix implements Serializable, Comparable<Prefix> {
}
public Prefix slide(Token newToken, int maxLength) {
List<Token> newTokens = new ArrayList<Token>(Arrays.asList(this.tokens.clone()));
newTokens.add(newToken);
while (newTokens.size() > maxLength) {
newTokens.remove(0);
}
int newLength = Math.min(tokens.length + 1, maxLength);
Token[] newTokens = new Token[newLength];
System.arraycopy(tokens, Math.max(0, tokens.length - newLength + 1), newTokens, 0,
newLength - 1);
newTokens[newLength - 1] = newToken;
return new Prefix(newTokens);
}
@Override
public String toString() {
return "Prefix{" +
"tokens=" + Arrays.toString(tokens) +
'}';
return "Prefix{" + "tokens=" + Arrays.toString(tokens) + '}';
}
......@@ -62,12 +59,12 @@ public class Prefix implements Serializable, Comparable<Prefix> {
@Override
public int compareTo(Prefix o) {
if(this.tokens.length != o.tokens.length) {
if (this.tokens.length != o.tokens.length) {
return Integer.compare(o.tokens.length, this.tokens.length);
}
for (int i = 0; i < this.tokens.length; i++) {
int c = this.tokens[i].compareTo(o.tokens[i]);
if(c != 0) {
if (c != 0) {
return c;
}
}
......
......@@ -33,13 +33,6 @@ public class Sentence implements Iterable<Decission>, Serializable {
.reduce((d1, d2) -> d1 * d2).getAsDouble();
}
public String render() {
return new Renderer().render(this);
}
public String id() {
return new ShortenerByteImpl(data).getId(this);
}
@Override
public Iterator<Decission> iterator() {
return decissions.iterator();
......
package markov;
import markov.stuff.Utils;
import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class ShortenerByte2Impl extends ShortenerSimpleImpl {
public ShortenerByte2Impl(Data data) {
super(data);
}
String fromInts(Stream<Integer> ints) {
List<Byte> bl = ints.flatMap(i -> {
Stream.Builder<Byte> builder = Stream.<Byte>builder();
if (i < 0b00111111_11111111) {
byte[] bytes = Utils.toByteArray(i);
builder.add((byte) (bytes[2] | 0b01000000));
builder.add(bytes[3]);
} else {
for (byte b : Utils.toByteArray(i)) {
builder.add(b);
}
}
return builder.build();
}).collect(Collectors.toList());
ByteBuffer bb = ByteBuffer.allocate(bl.size());
for (Byte b : bl) {
bb.put(b);
}
bb.rewind();
return Base64.getEncoder().encodeToString(bb.array());
}
}
package markov;
import markov.stuff.Utils;
import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class ShortenerByte64Impl extends ShortenerSimpleImpl {
public ShortenerByte64Impl(Data data) {
super(data);
}
String fromInts(Stream<Integer> ints) {
List<Byte> bl = ints.flatMap(i -> {
if (i < 64) {
return Stream.of((byte) (i | 0b01000000));
} else {
Stream.Builder<Byte> builder = Stream.<Byte>builder();
for (byte b : Utils.toByteArray(i)) {
builder.add(b);
}
return builder.build();
}
}).collect(Collectors.toList());
ByteBuffer bb = ByteBuffer.allocate(bl.size());
for (Byte b : bl) {
bb.put(b);
}
bb.rewind();
return Base64.getEncoder().encodeToString(bb.array());
}
}
package markov;
import markov.stuff.Utils;
import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class ShortenerByteImpl extends ShortenerSimpleImpl {
public ShortenerByteImpl(Data data) {
super(data);
}
String fromInts(Stream<Integer> ints) {
List<Byte> bl = ints.flatMap(i -> {
if (i < 128) {
return Stream.of((byte) (i | 0b10000000));
} else {
Stream.Builder<Byte> builder = Stream.<Byte>builder();
for (byte b : Utils.toByteArray(i)) {
builder.add(b);
}
return builder.build();
}
}).collect(Collectors.toList());
ByteBuffer bb = ByteBuffer.allocate(bl.size());
for (Byte b : bl) {
bb.put(b);
}
bb.rewind();
return Base64.getUrlEncoder().encodeToString(bb.array());
}
@Override
Stream<Integer> toInts(String hash) {
byte[] hashBytes = Base64.getUrlDecoder().decode(hash);
Stream.Builder<Integer> builder = Stream.builder();
for (int i = 0; i < hashBytes.length; ) {
if (hashBytes[i] < 128) {
builder.add((int) hashBytes[i++]);
} else {
byte[] newBytes = new byte[4];
System.arraycopy(hashBytes, i, newBytes, 0, 4);
builder.add(Utils.fromByteArray(newBytes));
i += 4;
}
}
return builder.build();
}
}
......@@ -50,8 +50,6 @@ public class ShortenerSimpleImpl implements Shortener {
while (i < ids.length) {
Lookup lookup = getData().fetch(prefix);
Decission d = lookup.getDistinctTokens() == 1 ? lookup.average() : lookup.forId(ids[i++]);
d.setP(prefix);
d.setLookup(lookup);
sentence.add(d);
prefix = prefix.slide(d.getToken(), getData().getPrefixLength());
}
......
package markov;
import java.util.Base64;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class ShortenerStringImpl extends ShortenerSimpleImpl {
public ShortenerStringImpl(Data data) {
super(data);
}
String fromInts(Stream<Integer> ints) {
String cps = ints.map(i -> new String(Character.toChars(i))).collect(Collectors.joining());
return Base64.getEncoder().encodeToString(cps.getBytes());
}
}
......@@ -33,52 +33,69 @@ public class LookupTests {
public void forRandom() {
assertEquals(lookup.forRandom(0).getToken(), new Token("a"));
assertEquals(lookup.forRandom(0.5d).getToken(), new Token("a"));
assertEquals(lookup.forRandom(0.999d).getToken(), new Token("a"));
// assertEquals(lookup.forRandom(1), new Token("a"));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(-1));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1.001d));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(2));
lookup.resetFinishding();
lookup.add(new Token("b"));
Decission a = lookup.forRandom(0);
Decission b = lookup.forRandom(0.9d);
assertNotEquals(a, b);
lookup.resetFinishding();
lookup.add(new Token("c"));
a = lookup.forRandom(0);
b = lookup.forRandom(0.51d);
Decission c = lookup.forRandom(0.9d);
assertNotEquals(a, b);
assertNotEquals(a, c);
assertNotEquals(b, c);
assertEquals(new Token("a"), a.getToken());
assertEquals(new Token("b"), b.getToken());
assertEquals(new Token("c"), c.getToken());
lookup.resetFinishding();
lookup.add(new Token("c"));
lookup.add(new Token("c"));
assertEquals(new Token("a"), lookup.forRandom(0).getToken());
assertEquals(new Token("a"), lookup.forRandom(0.19d).getToken());
assertEquals(new Token("b"), lookup.forRandom(0.2d).getToken());
assertEquals(new Token("b"), lookup.forRandom(0.39d).getToken());
assertEquals(new Token("c"), lookup.forRandom(0.4d).getToken());
assertEquals(new Token("c"), lookup.forRandom(0.99d).getToken());
}
@Test
public void average() {
assertEquals(lookup.average().getToken(), new Token("a"));
assertEquals(new Token("a"), lookup.average().getToken());
lookup.resetFinishding();
lookup.add(new Token("b"));
Token token = lookup.average().getToken();
assertTrue(() -> token.equals(new Token("a")) || token.equals(new Token("b")));
lookup.resetFinishding();
lookup.add(new Token("a"));
assertEquals(lookup.average().getToken(), new Token("a"));
assertEquals(new Token("a"), lookup.average().getToken());
lookup.resetFinishding();
lookup.add(new Token("b"));
lookup.add(new Token("b"));
assertEquals(lookup.average().getToken(), new Token("b"));
assertEquals(new Token("b"), lookup.average().getToken());
}
@Test
public void allPossible() {
assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a")));
assertIterableEquals(lookup.allPossible(), Arrays.asList(new Token("a")));
lookup.add(new Token("b"));
assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b")));
assertIterableEquals(lookup.allPossible(), Arrays.asList(new Token("a"), new Token("b")));
lookup.add(new Token("c"));
assertIterableEquals(lookup.allPossible(),
assertIterableEquals(
lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b"), new Token("c")));
lookup.add(new Token("a"));
assertIterableEquals(lookup.allPossible(),
assertIterableEquals(
lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b"), new Token("c")));
}
......
......@@ -32,9 +32,32 @@ public class PrefixTests {
@Test
public void testFancySlide() {
prefix = new Prefix(Arrays.asList(new Token("a")));
Prefix p2 = prefix.slide(new Token("b"),2);
Prefix p2 = prefix.slide(new Token("b"), 2);
assertEquals(new Prefix(Arrays.asList(new Token("a"), new Token("b"))), p2);
}
@Test
void testExplicitGrow() {
prefix = new Prefix(Arrays.asList(new Token("a"), new Token("b")));
Prefix p2 = prefix.slide(new Token("c"), 3);
assertEquals(new Prefix(Arrays.asList(new Token("a"), new Token("b"), new Token("c"))
), p2);
}
@Test
void testToLargeGrow() {
prefix = new Prefix(Arrays.asList(new Token("a"), new Token("b")));
Prefix p2 = prefix.slide(new Token("c"), 4);
assertEquals(new Prefix(Arrays.asList(new Token("a"), new Token("b"), new Token("c"))
), p2);
}
@Test
public void testShrink() {
prefix = new Prefix(Arrays.asList(new Token("a"), new Token("b"), new Token("c")));
Prefix p2 = prefix.slide(new Token("d"), 2);
assertEquals(new Prefix(Arrays.asList(new Token("c"), new Token("d"))), p2);
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment