Commit 6763731a by Hut

many much

parent b450ff1d
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# * Deploy built artifacts from master branch only. # * Deploy built artifacts from master branch only.
# * Shows how to use multiple jobs in test stage for verifying functionality # * Shows how to use multiple jobs in test stage for verifying functionality
# with multiple JDKs. # with multiple JDKs.
# * Uses site:stage to collect the documentation for multi-module projects. # * Uses site:stage to combineToTokens the documentation for multi-module projects.
# * Publishes the documentation for `master` branch. # * Publishes the documentation for `master` branch.
variables: variables:
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
<groupId>passive.directory</groupId> <groupId>passive.directory</groupId>
<artifactId>markov</artifactId> <artifactId>markov</artifactId>
<version>1.0-SNAPSHOT</version> <version>0.2</version>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
...@@ -29,6 +29,12 @@ ...@@ -29,6 +29,12 @@
<version>1.0.0</version> <version>1.0.0</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>javax.mail</groupId>
<artifactId>mail</artifactId>
<version>1.4.7</version>
</dependency>
</dependencies> </dependencies>
<build> <build>
......
...@@ -10,52 +10,51 @@ import java.util.function.Function; ...@@ -10,52 +10,51 @@ import java.util.function.Function;
public class Builder { public class Builder {
private final int prefix_length; private final int prefix_length;
private final Data data; private final Data data;
public Builder(int prefix_length, Data data) { public Builder(int prefix_length, Data data) {
super(); super();
this.prefix_length = prefix_length; this.prefix_length = prefix_length;
this.data = data; this.data = data;
} }
public Collection<Map.Entry<Prefix, Token>> random() { public Collection<Map.Entry<Prefix, Decission>> random() {
return produce(p -> nextRandom(p)); return produce(p -> nextRandom(p));
} }
public Collection<Map.Entry<Prefix, Token>> average() { public Collection<Map.Entry<Prefix, Decission>> average() {
return produce(p -> data.fetch(p).average()); return produce(p -> data.fetch(p).average());
} }
private Collection<Map.Entry<Prefix, Token>> produce( private Collection<Decission> produce(
Function<Prefix, Token> producer) { Function<Prefix, Decission> producer) {
List<Map.Entry<Prefix, Token>> result = new LinkedList<>(); List<Decission> result = new LinkedList<>();
Prefix p = initPrefix(); Prefix p = initPrefix();
Token t = Token.START; Decission d = new Decission(Token.START, 0, p, null);
for (int i = 0; i < 200; i++) { for (int i = 0; i < 1000; i++) {
if (t == Token.END) if (d.getToken() == Token.END)
break; break;
t = producer.apply(p); d = producer.apply(p);
result.add(new AbstractMap.SimpleEntry<Prefix, Token>(p, t)); result.add(new AbstractMap.SimpleEntry<>(p, d));
p = p.slide(t); p = p.slide(d.getToken(), prefix_length);
} }
return result; return result;
} }
private Prefix initPrefix() { private Prefix initPrefix() {
Token[] initToken = new Token[prefix_length]; return new Prefix(new Token[0]);
for (int i = 0; i < prefix_length; i++) { }
initToken[i] = Token.START;
} private Decission nextRandom(Prefix prefix) {
Prefix p = new Prefix(initToken); Lookup l = data.fetch(prefix);
return p; if (l == null) {
} throw new NullPointerException(String.format("could not find a lookup for %s", prefix));
}
private Token nextRandom(Prefix prefix) { return l.forRandom(nextRandomNumber());
return data.fetch(prefix).forRandom(nextRandomNumber()); }
}
private double nextRandomNumber() {
private double nextRandomNumber() { return ThreadLocalRandom.current().nextDouble();
return ThreadLocalRandom.current().nextDouble(); }
}
} }
package markov;
import java.util.Collection;
import java.util.stream.Stream;
public class Collector {
private final int prefixLength;
private final Prefix[] slider = new Prefix[]{getNewPrefix()};
private final Data data;
public Collector(int prefixLength) {
this.prefixLength = prefixLength;
this.data = new Data(prefixLength);
}
public void learn(Token token) {
data.add(slider[0], token);
slider[0] = slider[0].slide(token, this.prefixLength);
}
public void reset() {
slider[0] = getNewPrefix();
}
private Prefix getNewPrefix() {
return new Prefix(new Token[0]);
}
public int getPrefixLength() {
return prefixLength;
}
public Data getData() {
return data;
}
}
...@@ -8,52 +8,64 @@ import java.util.stream.Collectors; ...@@ -8,52 +8,64 @@ import java.util.stream.Collectors;
// FIXME rename // FIXME rename
public class Data { public class Data {
private final Map<Prefix, Lookup> data = new HashMap<>(); private final int prefixLength;
private final Map<Prefix, Lookup> data = new HashMap<>();
public void add(Prefix p, Token t) {
Lookup l = data.getOrDefault(p, new Lookup()); public Data(int prefixLength) {
l.add(t); this.prefixLength = prefixLength;
data.put(p, l); }
}
public int getPrefixLength() {
public Lookup fetch(Prefix p) { return prefixLength;
return data.get(p); }
}
public void add(Prefix p, Token t) {
public Lookup info(Token... t) { Lookup l = data.getOrDefault(p, new Lookup());
return data.get(new Prefix(t)); // System.out.println(String.format("adding %s - %s", p, t, l));
} l.add(t);
data.put(p, l);
public Lookup info(String... s) { }
return data.get(new Prefix(Arrays.stream(s).map(t -> new Token(t, Glyph.Type.word))
.collect(Collectors.toList()))); public Lookup fetch(Prefix p) {
} return data.get(p);
}
@Override
public String toString() { public Lookup info(Token... t) {
return "Data [data=" + data + "]"; return data.get(new Prefix(t));
} }
public String dumpStats() { public Lookup info(String... s) {
return "Total hits: " return data.get(new Prefix(Arrays.stream(s).map(t -> new Token(t, Glyph.Type.word))
+ data.entrySet().stream() .collect(Collectors.toList())));
.sorted((o1, }
o2) -> -1 * Integer.compare(
o1.getValue().getTotalCounts(), @Override
o2.getValue().getTotalCounts())) public String toString() {
.map(e -> String.format("%s - %d", e.getKey(), return "Data [data=" + data + "]";
e.getValue().getTotalCounts())) }
.collect(Collectors.joining(", "))
+ "\n" + "Distinct possibilities " public String dumpStats() {
+ data.entrySet().stream() return "Total hits: "
.sorted((o1, + data.entrySet().stream()
o2) -> -1 * Integer.compare( .sorted((o1,
o1.getValue().getDistinctTokens(), o2) -> -1 * Integer.compare(
o2.getValue().getDistinctTokens())) o1.getValue().getTotalCounts(),
.map(e -> String.format("%s - %d", e.getKey(), o2.getValue().getTotalCounts()))
e.getValue().getDistinctTokens())) .limit(50)
.collect(Collectors.joining(", ")); .map(e -> String.format("%s - %d", e.getKey(),
e.getValue().getTotalCounts()))
} .collect(Collectors.joining(", "))
+ "\n" + "Distinct possibilities "
+ data.entrySet().stream()
.sorted((o1,
o2) -> -1 * Integer.compare(
o1.getValue().getDistinctTokens(),
o2.getValue().getDistinctTokens()))
.limit(50)
.map(e -> String.format("%s - %d", e.getKey(),
e.getValue().getDistinctTokens()))
.collect(Collectors.joining(", "));
}
} }
package markov;
public class Decission {
private final Token token;
private final int id;
private Prefix p;
private Lookup lookup;
public Decission(Token key, int id) {
token = key;
this.id = id;
}
public void setLookup(Lookup lookup) {
this.lookup = lookup;
}
public void setP(Prefix p) {
this.p = p;
}
public Prefix getP() {
return p;
}
public Lookup getLookup() {
return lookup;
}
public Token getToken() {
return token;
}
public int getId() {
return id;
}
}
...@@ -2,12 +2,9 @@ package markov; ...@@ -2,12 +2,9 @@ package markov;
public class Glyph { public class Glyph {
public enum Type { public enum Type {
word, interpunction, control, whitespace, empty word, punctuation, control, whitespace, empty, other
} }
public static final Glyph Start = new Glyph(Type.control, "START");
public static final Glyph End = new Glyph(Type.control, "END");
private Type type; private Type type;
private String content; private String content;
......
...@@ -5,18 +5,8 @@ import java.util.Map.Entry; ...@@ -5,18 +5,8 @@ import java.util.Map.Entry;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
// FIXME rename
public class Lookup { public class Lookup {
public static final Lookup empty = new Lookup() {
public Token forRandom(double random) {
return Token.EMPTY;
}
};
public Lookup() {
}
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<Token, Integer>(); private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<Token, Integer>();
int getTotalCounts() { int getTotalCounts() {
...@@ -36,8 +26,8 @@ public class Lookup { ...@@ -36,8 +26,8 @@ public class Lookup {
tokens.put(t, i + 1); tokens.put(t, i + 1);
} }
public Token forRandom(double random) { public Decission forRandom(double random) {
if(random <0 || random >=1) { if (random < 0 || random >= 1) {
throw new IllegalArgumentException("expected double [0; 1)"); throw new IllegalArgumentException("expected double [0; 1)");
} }
int id = (int) Math.floor(random * getTotalCounts()); int id = (int) Math.floor(random * getTotalCounts());
...@@ -45,7 +35,24 @@ public class Lookup { ...@@ -45,7 +35,24 @@ public class Lookup {
for (Entry<Token, Integer> entry : tokens.entrySet()) { for (Entry<Token, Integer> entry : tokens.entrySet()) {
i += entry.getValue(); i += entry.getValue();
if (id < i) { if (id < i) {
return entry.getKey(); return new Decission(entry.getKey(), id);
}
}
throw new IllegalStateException(String.format(
"failed to find a random token with seed %d within %s", id,
this));
}
public Decission forId(int id) {
if (id < 0 || id > getTotalCounts()) {
throw new IllegalArgumentException("expected id [0; totalCount)");
}
int i = 0;
for (Entry<Token, Integer> entry : tokens.entrySet()) {
i += entry.getValue();
if (id < i) {
return new Decission(entry.getKey(), id);
} }
} }
throw new IllegalStateException(String.format( throw new IllegalStateException(String.format(
...@@ -54,14 +61,14 @@ public class Lookup { ...@@ -54,14 +61,14 @@ public class Lookup {
} }
public Token average() { public Decission average() {
return tokens return tokens
.entrySet().stream().sorted((e1, e2) -> Integer .entrySet().stream().sorted((e1, e2) -> Integer
.compare(e2.getValue(), e1.getValue())) .compare(e2.getValue(), e1.getValue()))
.findFirst().get().getKey(); .findFirst().map(e -> new Decission(e.getKey(), e.getValue())).get();
} }
public Set<Token> allPosible() { public Set<Token> allPossible() {
return this.tokens.keySet(); return this.tokens.keySet();
} }
......
package markov;
import javax.mail.MessagingException;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.Properties;
import java.util.stream.Stream;
public class Mail {
public static void main(String[] args) {
int prefixLength = 3;
Parser parser = new Parser();
File parent = new File("C:\\Users\\admin\\Desktop\\emails");
Stream<String> mails = Arrays.stream(parent.listFiles()).limit(500).map(file -> file.toPath()).map(path -> {
try {
MimeMessage m = new MimeMessage(Session.getDefaultInstance(new Properties()), Files.newInputStream(path));
//System.out.println(m.getContentType());
// return m.getContent().toString();
return m;
} catch (IOException | MessagingException e) {
throw new RuntimeException(e);
}
}).filter(mimeMessage -> {
try {
return mimeMessage.getContentType().contains("text/plain");
} catch (MessagingException e) {
throw new RuntimeException(e);
}
}).map(m -> {
try {
return m.getContent().toString();
} catch (IOException | MessagingException e) {
throw new RuntimeException(e);
}
});
Data data = Utils.parse(mails, prefixLength);
Builder b = new Builder(prefixLength, data);
Renderer r = new Renderer(data, Renderer.Options.FULL);
Renderer r1 = new Renderer(data, Renderer.Options.NONE);
Shortener shortener = new Shortener(data);
for (int i = 0; i < 10; i++) {
Collection<Map.Entry<Prefix, Decission>> sentence = b.random();
// System.out.println(r1.render(sentence));
String id = shortener.getId(sentence);
System.out.println(id);
// System.out.println(r.render(shortener.getSentence(id)));
}
// Collection<Map.Entry<Prefix, Token>> a = b.average();
// System.out.println("average: " + r.render(a));
// for (Map.Entry<Prefix, Token> e : a) {
// System.out.println(e.getKey() + " -> " + data.fetch(e.getKey()));
// }
// System.out.println(data.dumpStats());
}
}
package markov; package markov;
public interface Parser { import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class Parser {
public Map<Integer, Data> parse(Stream<Stream<Token>> input, Collection<Integer> prefixes) {
Collection<Collector> collectors = new ArrayList<>();
for (Integer i : prefixes) {
collectors.add(new Collector(i));
}
input.forEachOrdered(tokenStream -> {
tokenStream.forEachOrdered(token -> {
for (Collector c : collectors) {
c.learn(token);
}
});
for (Collector c : collectors) {
c.reset();
}
});
return collectors.stream().collect(Collectors.toMap(c -> c.getPrefixLength(), c -> c.getData()));
}
} }
package markov;
public class Parser_first_version {
private final int prefix_length;
public Parser_first_version(int prefix_length) {
super();
this.prefix_length = prefix_length;
}
public Data parse(String input) {
Data data = new Data();
for (String line : input.split("\n")) {
line = line.replace("Kai☺UWE empfiehlt ", "");
Token[] tokens = tokenize(line);
for (int i = 0 + prefix_length; i < tokens.length; i++) {
Prefix p = getPrefix(tokens, i);
Token t = getToken(tokens, i);
data.add(p, t);
}
}
return data;
}
private Token[] tokenize(String line) {
String[] strings = line.split(" ");
Token[] tokens = new Token[strings.length + prefix_length + 1];
int i = 0;
for (; i < prefix_length; i++) {
tokens[i] = Token.START;
}
for (; i < strings.length + prefix_length; i++) {
tokens[i] = new Token(strings[i - prefix_length], Glyph.Type.word);
}
tokens[i] = Token.END;
return tokens;
}
private Prefix getPrefix(Token[] tokens, int index) {
Token[] prefixTokens = new Token[prefix_length];
System.arraycopy(tokens, index - prefix_length + 1, prefixTokens, 0, prefix_length);
// for (int i = 0; i < prefix_length; i++) {
// prefixTokens[i] = tokens[index - prefix_length + i];
// }
return new Prefix(prefixTokens);
}
private Token getToken(Token[] tokens, int i) {
return tokens[i];
}
}
...@@ -18,24 +18,31 @@ public class Prefix { ...@@ -18,24 +18,31 @@ public class Prefix {
} }
public Prefix slide(Token newToken) { public Prefix slide(Token newToken) {
return slide(newToken, this.tokens.size());
}
public Prefix slide(Token newToken, int maxLength) {
List<Token> newTokens = new ArrayList<>(this.tokens); List<Token> newTokens = new ArrayList<>(this.tokens);
newTokens.remove(0);
newTokens.add(newToken); newTokens.add(newToken);
while(newTokens.size() > maxLength) {
newTokens.remove(0);
}
return new Prefix(newTokens); return new Prefix(newTokens);
} }
private List<Token> compressLeadingStarts(List<Token> tokens) { private List<Token> compressLeadingStarts(List<Token> tokens) {
if (tokens.size() > 1 && tokens.get(0) == Token.START) {
Iterator<Token> iterator = tokens.iterator();
for (Token t = iterator.next(); iterator.hasNext(); t = iterator.next()) {
if (t == Token.START) {
iterator.remove();
} else {
break;
}
}
}
return tokens; return tokens;
// if (tokens.size() > 1 && tokens.get(0) == Token.START) {
// Iterator<Token> iterator = tokens.iterator();
// for (Token t = iterator.next(); iterator.hasNext(); t = iterator.next()) {
// if (t == Token.START) {
// iterator.remove();
// } else {
// break;
// }
// }
// }
// return tokens;
} }
@Override @Override
......
...@@ -4,76 +4,85 @@ import java.util.Collection; ...@@ -4,76 +4,85 @@ import java.util.Collection;
import java.util.Map.Entry; import java.util.Map.Entry;
public class Renderer { public class Renderer {
public static class Options { public static class Options {
private final boolean propability; private final boolean propability;
private final boolean possibilities; private final boolean possibilities;
private final boolean recomends;
private final boolean specialToken;
private final String prefix;
public Options(boolean propability, boolean possibilities) { public Options(boolean propability, boolean possibilities, boolean recomends, boolean specialToken, String prefix) {
super(); this.propability = propability;
this.propability = propability; this.possibilities = possibilities;
this.possibilities = possibilities; this.recomends = recomends;
} this.specialToken = specialToken;
this.prefix = prefix;
}
public static final Options NONE = new Options(false, false); public static final Options NONE = new Options(false, false, false, false, "");
public static final Options FULL = new Options(true, true); public static final Options FULL = new Options(true, true, true, true, "");
public static final Options TOKEN = new Options(true, false, false, true, "");
} }
private final Data data; private final Data data;
private final Options options; private final Options options;
public Renderer(Data data) { public Renderer(Data data) {
super(); super();
this.data = data; this.data = data;
this.options = Options.NONE; this.options = Options.NONE;
} }
public Renderer(Data data, Options options) { public Renderer(Data data, Options options) {
super(); super();
this.data = data; this.data = data;
this.options = options; this.options = options;
} }
public String render(Collection<Entry<Prefix, Token>> collection) { public String render(Collection<Entry<Prefix, Decission>> sentence) {
double p = 1;
StringBuilder sb = new StringBuilder();
if (options.recomends)
sb.append("KAI-uwe empfiehlt");
for (Entry<Prefix, Decission> t : sentence) {
Lookup lookup = data.fetch(t.getKey());
int possibilities = lookup.getDistinctTokens();
p *= (double) lookup.getAmount(t.getValue().getToken())
/ (double) lookup.getTotalCounts();
if (options.possibilities) {
if (possibilities <= 1) {
sb.append("-");
} else if (possibilities <= 2) {
sb.append("--");
} else if (possibilities <= 3) {
sb.append("---");
} else if (possibilities <= 5) {
sb.append("_");
} else if (possibilities <= 10) {
sb.append("__");
} else if (possibilities <= 20) {
sb.append("___");
} else if (possibilities <= 50) {
sb.append("*");
} else if (possibilities <= 100) {
sb.append("**");
} else if (possibilities <= 200) {
sb.append("***");
} else {
sb.append(" ");
}
}
sb.append(t.getValue().getToken().render(options.prefix));
}
if (options.propability) {
sb.append(" - " + p);
}
return sb.toString();
}
double p = 1;
StringBuilder sb = new StringBuilder();
sb.append("KAI-uwe empfiehlt");
for (Entry<Prefix, Token> t : collection) {
Lookup lookup = data.fetch(t.getKey());
int possibilities = lookup.getDistinctTokens();
p *= (double) lookup.getAmount(t.getValue())
/ (double) lookup.getTotalCounts();
if (options.possibilities) {
if (possibilities <= 1) {
sb.append("-");
} else if (possibilities <= 2) {
sb.append("--");
} else if (possibilities <= 3) {
sb.append("---");
} else if (possibilities <= 5) {
sb.append("_");
} else if (possibilities <= 10) {
sb.append("__");
} else if (possibilities <= 20) {
sb.append("___");
} else if (possibilities <= 50) {
sb.append("*");
} else if (possibilities <= 100) {
sb.append("**");
} else if (possibilities <= 200) {
sb.append("***");
} else {
sb.append(" ");
}
}
sb.append(t.getValue().render(" "));
}
if (options.propability) {
sb.append(" - " + p);
}
return sb.toString();
}
} }
package markov;
import java.nio.charset.Charset;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class Shortener {
private final Data data;
public Shortener(Data data) {
this.data = data;
}
public String getId(Collection<Map.Entry<Prefix, Decission>> sentence) {
return fromInts(sentence.stream()
.map(d -> d.getValue().getId()));
}
String fromInts(Stream<Integer> ints) {
String cps = ints.map(i -> {
System.out.println(i);
return new String(Character.toChars(i));
}).collect(Collectors.joining());
return Base64.getEncoder().encodeToString(cps.getBytes());
}
Stream<Integer> toInts(String hash) {
return new String(Base64.getDecoder().decode(hash.getBytes())).codePoints().boxed();
}
public Collection<Map.Entry<Prefix, Decission>> getSentence(String hash) {
Integer[] ids = toInts(hash).toArray(Integer[]::new);
Collection<Map.Entry<Prefix, Decission>> sentence = new ArrayList<>();
Prefix prefix = new Prefix(new Token[0]);
for (int id : ids) {
Lookup lookup = data.fetch(prefix);
Decission d = lookup.forId(id);
sentence.add(new AbstractMap.SimpleEntry<>(prefix, d));
prefix = prefix.slide(d.getToken(), data.getPrefixLength());
}
return sentence;
}
}
package markov; package markov;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
public class ParserStreamStyle { public class Tokenizer {
public Stream<Token> collect(Stream<Glyph> glyphs) {
Data data = new Data();
public Stream<Stream<Token>> tokenizeCombined(Stream<Stream<String>> input) {
return input.map(stringStream -> putMarkers(combineToTokens(stringStream.flatMap(s -> s.codePoints().boxed().map(integer -> glyphFromCodePoint(integer))))));
}
public Stream<Stream<Token>> tokenize(Stream<String> input) {
return input.map(s -> putMarkers(combineToTokens(s.codePoints().boxed().map(integer -> glyphFromCodePoint(integer)))));
}
private Glyph glyphFromCodePoint(int codePoint) {
Glyph.Type type = Glyph.Type.other;
if (Character.isWhitespace(codePoint)) {
type = Glyph.Type.whitespace;
} else if (Character.isAlphabetic(codePoint)) {
type = Glyph.Type.word;
} else if (Arrays.asList(Character.START_PUNCTUATION, Character.END_PUNCTUATION, Character.INITIAL_QUOTE_PUNCTUATION, Character.FINAL_QUOTE_PUNCTUATION,
Character.OTHER_PUNCTUATION).contains(Character.getType(codePoint))) {
type = Glyph.Type.punctuation;
}
String value = new String(Character.toChars(codePoint));
return new Glyph(type, value);
}
private Stream<Token> putMarkers(Stream<Token> input) {
return Stream.concat(Stream.of(Token.START), Stream.concat(input, Stream.of(Token.END)));
}
public Stream<Token> combineToTokens(Stream<Glyph> glyphs) {
final Container[] previous = new Container[]{null}; final Container[] previous = new Container[]{null};
return glyphs.map(g -> { return Stream.concat(glyphs, Stream.of(new Glyph(Glyph.Type.empty, "")))
Container o = new Container(g); .map(g -> {
o.p = previous[0]; Container o = new Container(g);
previous[0] = o; o.p = previous[0];
return o; previous[0] = o;
}).flatMap(go -> { return o;
if (go.p != null && go.p.self != null && !go.self.getType().equals(go.p.self.getType())) { }).flatMap(go -> {
List<Glyph> containers = new ArrayList<>(); if (go.p != null && go.p.self != null && !go.self.getType().equals(go.p.self.getType())) {
Container c = go; List<Glyph> tokenGlyphs = new ArrayList<>();
do { Container c = go;
c = c.p; do {
containers.add(0, c.self); c = c.p;
} tokenGlyphs.add(0, c.self);
while (c != null && c.p != null && c.p.self != null && }
c.self.getType().equals(c.p.self.getType())); while (c != null && c.p != null && c.p.self != null &&
return Stream.<List<Glyph>>builder().add(containers).build(); c.self.getType().equals(c.p.self.getType()));
} else { go.p = null; // memory optimization
return Stream.empty(); return Stream.<List<Glyph>>builder().add(tokenGlyphs).build();
} } else {
}).map(l -> { return Stream.empty();
String content = l.stream().map(glyph -> glyph.getContent()).collect(Collectors.joining()); }
Glyph.Type type = l.get(0).getType(); }).map(l -> getTokenFromGlyphs(l));
return new Token(content, type); }
});
private Token getTokenFromGlyphs(List<Glyph> l) {
String content = l.stream().map(glyph -> glyph.getContent()).collect(Collectors.joining());
Glyph.Type type = l.get(0).getType();
return new Token(content, type);
} }
private static class Container { private static class Container {
...@@ -45,6 +75,4 @@ public class ParserStreamStyle { ...@@ -45,6 +75,4 @@ public class ParserStreamStyle {
final Glyph self; final Glyph self;
Container p; Container p;
} }
;
} }
package markov;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.stream.Stream;
public class Utils {
public static byte[] toByteArray(int value) {
return ByteBuffer.allocate(4).putInt(value).array();
}
public static int fromByteArray(byte[] bytes) {
return ByteBuffer.wrap(bytes).getInt();
}
public static Data parse(Stream<String> input, int prefixLength) {
Collection<Integer> collectionDummy = new ArrayList<>();
collectionDummy.add(prefixLength);
return new Parser().parse(new Tokenizer().tokenize(input), collectionDummy).values()
.stream().findFirst().get();
}
}
...@@ -12,11 +12,12 @@ import static org.junit.jupiter.api.Assertions.assertTrue; ...@@ -12,11 +12,12 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
public class DataTests { public class DataTests {
private static final int prefixLength = 0;
Data data; Data data;
@Test @Test
public void isInstantiatedWithNew() { public void isInstantiatedWithNew() {
data = new Data(); data = new Data(prefixLength);
} }
@Nested @Nested
...@@ -27,17 +28,17 @@ public class DataTests { ...@@ -27,17 +28,17 @@ public class DataTests {
@BeforeEach @BeforeEach
public void create() { public void create() {
data = new Data(); data = new Data(prefixLength);
data.add(pa, ta); data.add(pa, ta);
} }
@Test @Test
public void canReceieve() { public void canReceieve() {
assertTrue(data.fetch(pa).allPosible().contains(new Token("a"))); assertTrue(data.fetch(pa).allPossible().contains(new Token("a")));
assertEquals(1, data.fetch(pa).allPosible().size()); assertEquals(1, data.fetch(pa).allPossible().size());
data.add(pa, new Token("b")); data.add(pa, new Token("b"));
assertTrue(data.fetch(pa).allPosible().contains(new Token("b"))); assertTrue(data.fetch(pa).allPossible().contains(new Token("b")));
assertEquals(2, data.fetch(pa).allPosible().size()); assertEquals(2, data.fetch(pa).allPossible().size());
} }
@Test @Test
......
...@@ -16,11 +16,11 @@ class GlyphTest { ...@@ -16,11 +16,11 @@ class GlyphTest {
assertEquals(a, a2); assertEquals(a, a2);
assertNotEquals(a, w); assertNotEquals(a, w);
assertNotEquals(a, c); assertNotEquals(a, c);
assertNotEquals(Glyph.End, c); // assertNotEquals(Glyph.End, c);
assertNotEquals(Glyph.Start, c); // assertNotEquals(Glyph.Start, c);
assertNotEquals(Glyph.Start, Glyph.End); // assertNotEquals(Glyph.Start, Glyph.End);
assertEquals(Glyph.Start, Glyph.Start); // assertEquals(Glyph.Start, Glyph.Start);
assertEquals(Glyph.End, Glyph.End); // assertEquals(Glyph.End, Glyph.End);
} }
} }
\ No newline at end of file
...@@ -25,14 +25,14 @@ public class LookupTests { ...@@ -25,14 +25,14 @@ public class LookupTests {
@Test @Test
public void add() { public void add() {
lookup.add(new Token("b")); lookup.add(new Token("b"));
assertTrue(lookup.allPosible().contains(new Token("b"))); assertTrue(lookup.allPossible().contains(new Token("b")));
assertTrue(lookup.allPosible().contains(new Token("a"))); assertTrue(lookup.allPossible().contains(new Token("a")));
} }
@Test @Test
public void forRandom() { public void forRandom() {
assertEquals(lookup.forRandom(0), new Token("a")); assertEquals(lookup.forRandom(0).getToken(), new Token("a"));
assertEquals(lookup.forRandom(0.5d), new Token("a")); assertEquals(lookup.forRandom(0.5d).getToken(), new Token("a"));
// assertEquals(lookup.forRandom(1), new Token("a")); // assertEquals(lookup.forRandom(1), new Token("a"));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1)); assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(-1)); assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(-1));
...@@ -40,14 +40,14 @@ public class LookupTests { ...@@ -40,14 +40,14 @@ public class LookupTests {
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(2)); assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(2));
lookup.add(new Token("b")); lookup.add(new Token("b"));
Token a = lookup.forRandom(0); Decission a = lookup.forRandom(0);
Token b = lookup.forRandom(0.9d); Decission b = lookup.forRandom(0.9d);
assertNotEquals(a, b); assertNotEquals(a, b);
lookup.add(new Token("c")); lookup.add(new Token("c"));
a = lookup.forRandom(0); a = lookup.forRandom(0);
b = lookup.forRandom(0.51d); b = lookup.forRandom(0.51d);
Token c = lookup.forRandom(0.9d); Decission c = lookup.forRandom(0.9d);
assertNotEquals(a, b); assertNotEquals(a, b);
assertNotEquals(a, c); assertNotEquals(a, c);
assertNotEquals(b, c); assertNotEquals(b, c);
...@@ -55,30 +55,30 @@ public class LookupTests { ...@@ -55,30 +55,30 @@ public class LookupTests {
@Test @Test
public void average() { public void average() {
assertEquals(lookup.average(), new Token("a")); assertEquals(lookup.average().getToken(), new Token("a"));
lookup.add(new Token("b")); lookup.add(new Token("b"));
Token token = lookup.average(); Token token = lookup.average().getToken();
assertTrue(() -> token.equals(new Token("a")) || token.equals(new Token("b"))); assertTrue(() -> token.equals(new Token("a")) || token.equals(new Token("b")));
lookup.add(new Token("a")); lookup.add(new Token("a"));
assertEquals(lookup.average(), new Token("a")); assertEquals(lookup.average().getToken(), new Token("a"));
lookup.add(new Token("b")); lookup.add(new Token("b"));
lookup.add(new Token("b")); lookup.add(new Token("b"));
assertEquals(lookup.average(), new Token("b")); assertEquals(lookup.average().getToken(), new Token("b"));
} }
@Test @Test
public void allPossible() { public void allPossible() {
assertIterableEquals(lookup.allPosible(), assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a"))); Arrays.asList(new Token("a")));
lookup.add(new Token("b")); lookup.add(new Token("b"));
assertIterableEquals(lookup.allPosible(), assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b"))); Arrays.asList(new Token("a"), new Token("b")));
lookup.add(new Token("c")); lookup.add(new Token("c"));
assertIterableEquals(lookup.allPosible(), assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b"), new Token("c"))); Arrays.asList(new Token("a"), new Token("b"), new Token("c")));
lookup.add(new Token("a")); lookup.add(new Token("a"));
assertIterableEquals(lookup.allPosible(), assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b"), new Token("c"))); Arrays.asList(new Token("a"), new Token("b"), new Token("c")));
} }
......
...@@ -7,26 +7,34 @@ import java.util.Arrays; ...@@ -7,26 +7,34 @@ import java.util.Arrays;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
class ParserStreamStyleTest { public class ParserStreamStyleTest {
@Test @Test
void tests() { void tests() {
ParserStreamStyle parserStreamStyle = new ParserStreamStyle(); // Collector parserStreamStyle = new Collector(1);
Assertions.assertIterableEquals( // Assertions.assertIterableEquals(
Arrays.asList( // Arrays.asList(
new Token(Glyph.Start.getContent(), Glyph.Type.control), // new Token(Glyph.Start.getContent(), Glyph.Type.control),
new Token("ab"), // new Token("ab"),
new Token(" ", Glyph.Type.whitespace), // new Token(" ", Glyph.Type.whitespace),
new Token("c"), // new Token("c"),
new Token(Glyph.End.getContent(), Glyph.Type.control)), // new Token(Glyph.End.getContent(), Glyph.Type.control)),
parserStreamStyle.collect(Stream.of(Glyph.Start, // parserStreamStyle.combineToTokens(Stream.of(Glyph.Start,
new Glyph(Glyph.Type.word, "a"), // new Glyph(Glyph.Type.word, "a"),
new Glyph(Glyph.Type.word, "b"), // new Glyph(Glyph.Type.word, "b"),
new Glyph(Glyph.Type.whitespace, " "), // new Glyph(Glyph.Type.whitespace, " "),
new Glyph(Glyph.Type.word, "c"), // new Glyph(Glyph.Type.word, "c"),
Glyph.End, new Glyph(Glyph.Type.empty, "EMPTY"))) // Glyph.End, new Glyph(Glyph.Type.empty, "EMPTY")))
.collect(Collectors.toList())); // .collect(Collectors.toList()));
} }
// @Test
// void test2() {
// Collector parserStreamStyle = new Collector(2);
// Data data = parserStreamStyle.learn(Stream.<String>builder().add("a bc a d_e ").build());
// System.out.println(data.dumpStats());
// System.out.println(data.fetch(new Prefix(Arrays.asList(new Token("e")))).allPossible());
// }
} }
\ No newline at end of file
...@@ -18,26 +18,6 @@ public class PrefixTests { ...@@ -18,26 +18,6 @@ public class PrefixTests {
assertEquals(new Prefix(Arrays.asList(new Token("a"))), prefix); assertEquals(new Prefix(Arrays.asList(new Token("a"))), prefix);
} }
// FIXME
@Test
public void testStartCompression() {
prefix = new Prefix(Arrays.asList(Token.START));
assertEquals(new Prefix(Arrays.asList(Token.START)), prefix);
assertEquals(new Prefix(Arrays.asList(Token.START, Token.START, Token.START)), prefix);
prefix = new Prefix(Arrays.asList(Token.START, Token.START));
assertEquals(new Prefix(Arrays.asList(Token.START)), prefix);
prefix = new Prefix(Arrays.asList(Token.START, new Token("")));
assertEquals(new Prefix(Arrays.asList(Token.START, new Token(""))), prefix);
prefix = new Prefix(Arrays.asList(Token.START, new Token(""), new Token("")));
assertEquals(new Prefix(Arrays.asList(Token.START, new Token(""), new Token(""))), prefix);
assertEquals(new Prefix(Arrays.asList(Token.START, Token.START, Token.START, new Token(""), new Token(""))),
prefix);
prefix = new Prefix(Arrays.asList(Token.START, Token.START, new Token(""), new Token("")));
assertEquals(new Prefix(Arrays.asList(Token.START, new Token(""), new Token(""))), prefix);
}
@Test @Test
public void testSlide() { public void testSlide() {
prefix = new Prefix(Arrays.asList(new Token("a"))); prefix = new Prefix(Arrays.asList(new Token("a")));
...@@ -49,5 +29,12 @@ public class PrefixTests { ...@@ -49,5 +29,12 @@ public class PrefixTests {
assertEquals(new Prefix(Arrays.asList(new Token("b"), new Token("c"))), p2); assertEquals(new Prefix(Arrays.asList(new Token("b"), new Token("c"))), p2);
} }
@Test
public void testFancySlide() {
prefix = new Prefix(Arrays.asList(new Token("a")));
Prefix p2 = prefix.slide(new Token("b"),2);
assertEquals(new Prefix(Arrays.asList(new Token("a"), new Token("b"))), p2);
}
} }
\ No newline at end of file
package markov;
import org.junit.jupiter.api.Test;
import java.util.Base64;
import java.util.stream.Stream;
class ShortenerTest {
@Test
void shortenerTest() {
String hash = new Shortener(null).fromInts(Stream.of(561,0,64,0));
System.out.println(Base64.getEncoder().encodeToString("\0\0@\0\0".getBytes()));
System.out.println(hash);
System.out.println(hash.length());
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment