Commit 3e2e0d64 by Hut

analysis works + litle bit of refactoring

parent 273a6bcf
package directory.passive.markov;
import java.util.AbstractMap;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ThreadLocalRandom;
public class Builder {
private final int prefix_length;
private final Data data;
public Builder(int prefix_length, Data data) {
super();
this.prefix_length = prefix_length;
this.data = data;
}
public Collection<Map.Entry<Prefix, Token>> random() {
List<Map.Entry<Prefix, Token>> result = new LinkedList<>();
Token[] initToken = new Token[prefix_length];
for (int i = 0; i < prefix_length; i++) {
initToken[i] = Token.START;
}
Prefix p = new Prefix(initToken);
Token t = Token.START;
while (t != Token.END) {
t = nextRandom(p);
result.add(new AbstractMap.SimpleEntry<Prefix, Token>(p, t));
p = p.slide(t);
}
return result;
}
private Token nextRandom(Prefix prefix) {
return data.fetch(prefix).forRandom(nextRandomNumber());
}
private double nextRandomNumber() {
return ThreadLocalRandom.current().nextDouble();
}
}
......@@ -2,6 +2,7 @@ package directory.passive.markov;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
// FIXME rename
public class Data {
......@@ -15,7 +16,7 @@ public class Data {
}
public Lookup fetch(Prefix p) {
return data.getOrDefault(p, Lookup.empty);
return data.get(p);
}
@Override
......@@ -23,4 +24,27 @@ public class Data {
return "Data [data=" + data + "]";
}
public String dumpStats() {
return "Total hits: "
+ data.entrySet().stream()
.sorted((o1,
o2) -> -1 * Integer.compare(
o1.getValue().getTotalCounts(),
o2.getValue().getTotalCounts()))
.map(e -> String.format("%s - %d", e.getKey(),
e.getValue().getTotalCounts()))
.collect(Collectors.joining(", "))
+ "\n" + "Distinct possibilities "
+ data.entrySet().stream()
.sorted((o1,
o2) -> -1 * Integer.compare(
o1.getValue().getDistinctTokens(),
o2.getValue().getDistinctTokens()))
.map(e -> String.format("%s - %d", e.getKey(),
e.getValue().getDistinctTokens()))
.collect(Collectors.joining(", "));
}
}
......@@ -17,15 +17,23 @@ public class Lookup {
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<Token, Integer>();
private int getTotalCounts() {
int getTotalCounts() {
return tokens.values().stream().mapToInt(i -> i.intValue()).sum();
}
int getDistinctTokens() {
return tokens.size();
}
public void add(Token t) {
Integer i = tokens.getOrDefault(t, 0);
tokens.put(t, i + 1);
}
public int getAmount(Token t) {
return tokens.getOrDefault(t, 0);
}
public Token forRandom(double random) {
int id = (int) Math.floor(random * getTotalCounts());
int i = 0;
......
package directory.passive.markov;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
public class Main {
private final int prefix_length;
private final Data data = new Data();
public Main(int prefix_length) {
super();
this.prefix_length = prefix_length;
}
public void parse(String input) {
for (String line : input.split("\n")) {
line = line.replace("Kai☺UWE empfiehlt ", "");
Token[] tokens = tokenize(line);
for (int i = 0 + prefix_length; i < tokens.length; i++) {
Prefix p = getPrefix(tokens, i);
Token t = getToken(tokens, i);
data.add(p, t);
}
}
}
private Token[] tokenize(String line) {
String[] strings = line.split(" ");
Token[] tokens = new Token[strings.length + prefix_length + 1];
int i = 0;
for (; i < prefix_length; i++) {
tokens[i] = Token.START;
}
for (; i < strings.length + prefix_length; i++) {
tokens[i] = new Token(strings[i - prefix_length]);
}
tokens[i] = Token.END;
return tokens;
}
private Prefix getPrefix(Token[] tokens, int index) {
Token[] prefixTokens = new Token[prefix_length];
for (int i = 0; i < prefix_length; i++) {
prefixTokens[i] = tokens[index - prefix_length + i];
}
return new Prefix(prefixTokens);
}
private Token getToken(Token[] tokens, int i) {
return tokens[i];
}
private Collection<Token> random() {
List<Token> result = new LinkedList<>();
for (int i = 0; i < prefix_length; i++) {
result.add(Token.START);
}
Token current = Token.START;
while (current != Token.END) {
result.add(current);
current = nextRandom(result);
}
return result;
}
private Token nextRandom(List<Token> result) {
Token[] prefexTokens = new Token[prefix_length];
for (int i = 0; i < prefix_length; i++) {
prefexTokens[i] = result.get(result.size() - prefix_length + i);
}
Prefix p = new Prefix(prefexTokens);
Token nextToken = data.fetch(p).forRandom(nextRandomNumber());
return nextToken;
}
private double nextRandomNumber() {
return ThreadLocalRandom.current().nextDouble();
}
private static String render(Collection<Token> tokens) {
StringBuilder sb = new StringBuilder();
sb.append("KAI-uwe empfiehlt");
for (Token t : tokens) {
sb.append(t.render(" "));
}
return sb.toString();
}
public static void main(String[] args) {
Main main = new Main(2);
main.parse(input);
for (int i = 0; i < 10; i++) {
System.out.println(render(main.random()));
int prefixLength = 1;
Data data = new Parser(prefixLength).parse(input);
Builder b = new Builder(prefixLength, data);
Renderer r = new Renderer(data);
for (int i = 0; i < 25; i++) {
System.out.println(r.render(b.random()));
}
System.out.println(data.dumpStats());
}
private static final String input = "Kai☺UWE empfiehlt \"langsam ist Präzise und Präzise ist schnell\" zu verinnerlichen.\n"
......
package directory.passive.markov;
public class Parser {
private final int prefix_length;
public Parser(int prefix_length) {
super();
this.prefix_length = prefix_length;
}
public Data parse(String input) {
Data data = new Data();
for (String line : input.split("\n")) {
line = line.replace("Kai☺UWE empfiehlt ", "");
Token[] tokens = tokenize(line);
for (int i = 0 + prefix_length; i < tokens.length; i++) {
Prefix p = getPrefix(tokens, i);
Token t = getToken(tokens, i);
data.add(p, t);
}
}
return data;
}
private Token[] tokenize(String line) {
String[] strings = line.split(" ");
Token[] tokens = new Token[strings.length + prefix_length + 1];
int i = 0;
for (; i < prefix_length; i++) {
tokens[i] = Token.START;
}
for (; i < strings.length + prefix_length; i++) {
tokens[i] = new Token(strings[i - prefix_length]);
}
tokens[i] = Token.END;
return tokens;
}
private Prefix getPrefix(Token[] tokens, int index) {
Token[] prefixTokens = new Token[prefix_length];
for (int i = 0; i < prefix_length; i++) {
prefixTokens[i] = tokens[index - prefix_length + i];
}
return new Prefix(prefixTokens);
}
private Token getToken(Token[] tokens, int i) {
return tokens[i];
}
}
package directory.passive.markov;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
// FIXME rename
public class Prefix {
private final Token[] tokens;
private final List<Token> tokens;
public Prefix(Token[] tokens) {
this.tokens = Arrays.asList(tokens);
}
public Prefix(List<Token> tokens) {
this.tokens = tokens;
}
public Prefix slide(Token newToken) {
List<Token> newTokens = new ArrayList<>(this.tokens);
newTokens.remove(0);
newTokens.add(newToken);
return new Prefix(newTokens);
}
@Override
public String toString() {
return "Prefix [tokens=" + Arrays.toString(tokens) + "]";
return "P=" + tokens;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Arrays.hashCode(tokens);
result = prime * result + tokens.hashCode();
return result;
}
......@@ -32,10 +45,9 @@ public class Prefix {
if (getClass() != obj.getClass())
return false;
Prefix other = (Prefix) obj;
if (!Arrays.equals(tokens, other.tokens))
if (!tokens.equals(other.tokens))
return false;
return true;
}
}
package directory.passive.markov;
import java.util.Collection;
import java.util.Map.Entry;
public class Renderer {
private final Data data;
public Renderer(Data data) {
super();
this.data = data;
}
public String render(Collection<Entry<Prefix, Token>> collection) {
double p = 1;
StringBuilder sb = new StringBuilder();
sb.append("KAI-uwe empfiehlt");
for (Entry<Prefix, Token> t : collection) {
Lookup lookup = data.fetch(t.getKey());
int possibilities = lookup.getDistinctTokens();
p *= (double) lookup.getAmount(t.getValue())
/ (double) lookup.getTotalCounts();
if (possibilities <= 1) {
sb.append("-");
} else if (possibilities <= 2) {
sb.append("--");
} else if (possibilities <= 3) {
sb.append("---");
} else if (possibilities <= 5) {
sb.append("_");
} else if (possibilities <= 10) {
sb.append("__");
} else if (possibilities <= 20) {
sb.append("___");
} else if (possibilities <= 50) {
sb.append("*");
} else if (possibilities <= 100) {
sb.append("**");
} else if (possibilities <= 200) {
sb.append("***");
} else {
sb.append(" ");
}
sb.append(t.getValue().render(""));
}
sb.append(" - " + p);
return sb.toString();
}
}
......@@ -23,7 +23,7 @@ public class Token {
}
if (this == EMPTY)
return "TOKEN <empty>";
return "Token [content=" + content + "]";
return "T=[" + content + "]";
}
@Override
......@@ -70,7 +70,7 @@ public class Token {
@Override
public String render(String prefix) {
return "";
return this.toString();
}
}
......
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import java.util.Arrays;
import org.junit.Test;
import directory.passive.markov.Prefix;
import directory.passive.markov.Token;
public class TokenTests {
@Test
public void testTokenEquals() {
Token a = new Token("a");
Token aa = new Token("a");
Token b = new Token("b");
assertEquals(a, aa);
assertNotEquals(a, b);
assertNotEquals(aa, b);
}
@Test
public void testPrefixEquals() {
Prefix a = new Prefix(new Token[] { new Token("a") });
Prefix aa = new Prefix(new Token[] { new Token("a") });
Prefix aaa = new Prefix(Arrays.asList(new Token("a")));
Prefix b = new Prefix(new Token[] { new Token("b") });
assertEquals(a, aa);
assertEquals(aa, aaa);
assertEquals(a, aaa);
assertNotEquals(a, b);
assertNotEquals(aa, b);
assertNotEquals(aaa, b);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment