Commit 6763731a by Hut

many much

parent b450ff1d
......@@ -12,7 +12,7 @@
# * Deploy built artifacts from master branch only.
# * Shows how to use multiple jobs in test stage for verifying functionality
# with multiple JDKs.
# * Uses site:stage to collect the documentation for multi-module projects.
# * Uses site:stage to combineToTokens the documentation for multi-module projects.
# * Publishes the documentation for `master` branch.
variables:
......
......@@ -6,7 +6,7 @@
<groupId>passive.directory</groupId>
<artifactId>markov</artifactId>
<version>1.0-SNAPSHOT</version>
<version>0.2</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
......@@ -29,6 +29,12 @@
<version>1.0.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>javax.mail</groupId>
<artifactId>mail</artifactId>
<version>1.4.7</version>
</dependency>
</dependencies>
<build>
......
......@@ -10,52 +10,51 @@ import java.util.function.Function;
public class Builder {
private final int prefix_length;
private final Data data;
public Builder(int prefix_length, Data data) {
super();
this.prefix_length = prefix_length;
this.data = data;
}
public Collection<Map.Entry<Prefix, Token>> random() {
return produce(p -> nextRandom(p));
}
public Collection<Map.Entry<Prefix, Token>> average() {
return produce(p -> data.fetch(p).average());
}
private Collection<Map.Entry<Prefix, Token>> produce(
Function<Prefix, Token> producer) {
List<Map.Entry<Prefix, Token>> result = new LinkedList<>();
Prefix p = initPrefix();
Token t = Token.START;
for (int i = 0; i < 200; i++) {
if (t == Token.END)
break;
t = producer.apply(p);
result.add(new AbstractMap.SimpleEntry<Prefix, Token>(p, t));
p = p.slide(t);
}
return result;
}
private Prefix initPrefix() {
Token[] initToken = new Token[prefix_length];
for (int i = 0; i < prefix_length; i++) {
initToken[i] = Token.START;
}
Prefix p = new Prefix(initToken);
return p;
}
private Token nextRandom(Prefix prefix) {
return data.fetch(prefix).forRandom(nextRandomNumber());
}
private double nextRandomNumber() {
return ThreadLocalRandom.current().nextDouble();
}
private final int prefix_length;
private final Data data;
public Builder(int prefix_length, Data data) {
super();
this.prefix_length = prefix_length;
this.data = data;
}
public Collection<Map.Entry<Prefix, Decission>> random() {
return produce(p -> nextRandom(p));
}
public Collection<Map.Entry<Prefix, Decission>> average() {
return produce(p -> data.fetch(p).average());
}
private Collection<Decission> produce(
Function<Prefix, Decission> producer) {
List<Decission> result = new LinkedList<>();
Prefix p = initPrefix();
Decission d = new Decission(Token.START, 0, p, null);
for (int i = 0; i < 1000; i++) {
if (d.getToken() == Token.END)
break;
d = producer.apply(p);
result.add(new AbstractMap.SimpleEntry<>(p, d));
p = p.slide(d.getToken(), prefix_length);
}
return result;
}
private Prefix initPrefix() {
return new Prefix(new Token[0]);
}
private Decission nextRandom(Prefix prefix) {
Lookup l = data.fetch(prefix);
if (l == null) {
throw new NullPointerException(String.format("could not find a lookup for %s", prefix));
}
return l.forRandom(nextRandomNumber());
}
private double nextRandomNumber() {
return ThreadLocalRandom.current().nextDouble();
}
}
package markov;
import java.util.Collection;
import java.util.stream.Stream;
public class Collector {
private final int prefixLength;
private final Prefix[] slider = new Prefix[]{getNewPrefix()};
private final Data data;
public Collector(int prefixLength) {
this.prefixLength = prefixLength;
this.data = new Data(prefixLength);
}
public void learn(Token token) {
data.add(slider[0], token);
slider[0] = slider[0].slide(token, this.prefixLength);
}
public void reset() {
slider[0] = getNewPrefix();
}
private Prefix getNewPrefix() {
return new Prefix(new Token[0]);
}
public int getPrefixLength() {
return prefixLength;
}
public Data getData() {
return data;
}
}
......@@ -8,52 +8,64 @@ import java.util.stream.Collectors;
// FIXME rename
public class Data {
private final Map<Prefix, Lookup> data = new HashMap<>();
public void add(Prefix p, Token t) {
Lookup l = data.getOrDefault(p, new Lookup());
l.add(t);
data.put(p, l);
}
public Lookup fetch(Prefix p) {
return data.get(p);
}
public Lookup info(Token... t) {
return data.get(new Prefix(t));
}
public Lookup info(String... s) {
return data.get(new Prefix(Arrays.stream(s).map(t -> new Token(t, Glyph.Type.word))
.collect(Collectors.toList())));
}
@Override
public String toString() {
return "Data [data=" + data + "]";
}
public String dumpStats() {
return "Total hits: "
+ data.entrySet().stream()
.sorted((o1,
o2) -> -1 * Integer.compare(
o1.getValue().getTotalCounts(),
o2.getValue().getTotalCounts()))
.map(e -> String.format("%s - %d", e.getKey(),
e.getValue().getTotalCounts()))
.collect(Collectors.joining(", "))
+ "\n" + "Distinct possibilities "
+ data.entrySet().stream()
.sorted((o1,
o2) -> -1 * Integer.compare(
o1.getValue().getDistinctTokens(),
o2.getValue().getDistinctTokens()))
.map(e -> String.format("%s - %d", e.getKey(),
e.getValue().getDistinctTokens()))
.collect(Collectors.joining(", "));
}
private final int prefixLength;
private final Map<Prefix, Lookup> data = new HashMap<>();
public Data(int prefixLength) {
this.prefixLength = prefixLength;
}
public int getPrefixLength() {
return prefixLength;
}
public void add(Prefix p, Token t) {
Lookup l = data.getOrDefault(p, new Lookup());
// System.out.println(String.format("adding %s - %s", p, t, l));
l.add(t);
data.put(p, l);
}
public Lookup fetch(Prefix p) {
return data.get(p);
}
public Lookup info(Token... t) {
return data.get(new Prefix(t));
}
public Lookup info(String... s) {
return data.get(new Prefix(Arrays.stream(s).map(t -> new Token(t, Glyph.Type.word))
.collect(Collectors.toList())));
}
@Override
public String toString() {
return "Data [data=" + data + "]";
}
public String dumpStats() {
return "Total hits: "
+ data.entrySet().stream()
.sorted((o1,
o2) -> -1 * Integer.compare(
o1.getValue().getTotalCounts(),
o2.getValue().getTotalCounts()))
.limit(50)
.map(e -> String.format("%s - %d", e.getKey(),
e.getValue().getTotalCounts()))
.collect(Collectors.joining(", "))
+ "\n" + "Distinct possibilities "
+ data.entrySet().stream()
.sorted((o1,
o2) -> -1 * Integer.compare(
o1.getValue().getDistinctTokens(),
o2.getValue().getDistinctTokens()))
.limit(50)
.map(e -> String.format("%s - %d", e.getKey(),
e.getValue().getDistinctTokens()))
.collect(Collectors.joining(", "));
}
}
package markov;
public class Decission {
private final Token token;
private final int id;
private Prefix p;
private Lookup lookup;
public Decission(Token key, int id) {
token = key;
this.id = id;
}
public void setLookup(Lookup lookup) {
this.lookup = lookup;
}
public void setP(Prefix p) {
this.p = p;
}
public Prefix getP() {
return p;
}
public Lookup getLookup() {
return lookup;
}
public Token getToken() {
return token;
}
public int getId() {
return id;
}
}
......@@ -2,12 +2,9 @@ package markov;
public class Glyph {
public enum Type {
word, interpunction, control, whitespace, empty
word, punctuation, control, whitespace, empty, other
}
public static final Glyph Start = new Glyph(Type.control, "START");
public static final Glyph End = new Glyph(Type.control, "END");
private Type type;
private String content;
......
......@@ -5,18 +5,8 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
// FIXME rename
public class Lookup {
public static final Lookup empty = new Lookup() {
public Token forRandom(double random) {
return Token.EMPTY;
}
};
public Lookup() {
}
private final LinkedHashMap<Token, Integer> tokens = new LinkedHashMap<Token, Integer>();
int getTotalCounts() {
......@@ -36,8 +26,8 @@ public class Lookup {
tokens.put(t, i + 1);
}
public Token forRandom(double random) {
if(random <0 || random >=1) {
public Decission forRandom(double random) {
if (random < 0 || random >= 1) {
throw new IllegalArgumentException("expected double [0; 1)");
}
int id = (int) Math.floor(random * getTotalCounts());
......@@ -45,7 +35,24 @@ public class Lookup {
for (Entry<Token, Integer> entry : tokens.entrySet()) {
i += entry.getValue();
if (id < i) {
return entry.getKey();
return new Decission(entry.getKey(), id);
}
}
throw new IllegalStateException(String.format(
"failed to find a random token with seed %d within %s", id,
this));
}
public Decission forId(int id) {
if (id < 0 || id > getTotalCounts()) {
throw new IllegalArgumentException("expected id [0; totalCount)");
}
int i = 0;
for (Entry<Token, Integer> entry : tokens.entrySet()) {
i += entry.getValue();
if (id < i) {
return new Decission(entry.getKey(), id);
}
}
throw new IllegalStateException(String.format(
......@@ -54,14 +61,14 @@ public class Lookup {
}
public Token average() {
public Decission average() {
return tokens
.entrySet().stream().sorted((e1, e2) -> Integer
.compare(e2.getValue(), e1.getValue()))
.findFirst().get().getKey();
.findFirst().map(e -> new Decission(e.getKey(), e.getValue())).get();
}
public Set<Token> allPosible() {
public Set<Token> allPossible() {
return this.tokens.keySet();
}
......
package markov;
import javax.mail.MessagingException;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.Properties;
import java.util.stream.Stream;
public class Mail {
public static void main(String[] args) {
int prefixLength = 3;
Parser parser = new Parser();
File parent = new File("C:\\Users\\admin\\Desktop\\emails");
Stream<String> mails = Arrays.stream(parent.listFiles()).limit(500).map(file -> file.toPath()).map(path -> {
try {
MimeMessage m = new MimeMessage(Session.getDefaultInstance(new Properties()), Files.newInputStream(path));
//System.out.println(m.getContentType());
// return m.getContent().toString();
return m;
} catch (IOException | MessagingException e) {
throw new RuntimeException(e);
}
}).filter(mimeMessage -> {
try {
return mimeMessage.getContentType().contains("text/plain");
} catch (MessagingException e) {
throw new RuntimeException(e);
}
}).map(m -> {
try {
return m.getContent().toString();
} catch (IOException | MessagingException e) {
throw new RuntimeException(e);
}
});
Data data = Utils.parse(mails, prefixLength);
Builder b = new Builder(prefixLength, data);
Renderer r = new Renderer(data, Renderer.Options.FULL);
Renderer r1 = new Renderer(data, Renderer.Options.NONE);
Shortener shortener = new Shortener(data);
for (int i = 0; i < 10; i++) {
Collection<Map.Entry<Prefix, Decission>> sentence = b.random();
// System.out.println(r1.render(sentence));
String id = shortener.getId(sentence);
System.out.println(id);
// System.out.println(r.render(shortener.getSentence(id)));
}
// Collection<Map.Entry<Prefix, Token>> a = b.average();
// System.out.println("average: " + r.render(a));
// for (Map.Entry<Prefix, Token> e : a) {
// System.out.println(e.getKey() + " -> " + data.fetch(e.getKey()));
// }
// System.out.println(data.dumpStats());
}
}
package markov;
public interface Parser {
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class Parser {
public Map<Integer, Data> parse(Stream<Stream<Token>> input, Collection<Integer> prefixes) {
Collection<Collector> collectors = new ArrayList<>();
for (Integer i : prefixes) {
collectors.add(new Collector(i));
}
input.forEachOrdered(tokenStream -> {
tokenStream.forEachOrdered(token -> {
for (Collector c : collectors) {
c.learn(token);
}
});
for (Collector c : collectors) {
c.reset();
}
});
return collectors.stream().collect(Collectors.toMap(c -> c.getPrefixLength(), c -> c.getData()));
}
}
package markov;
public class Parser_first_version {
private final int prefix_length;
public Parser_first_version(int prefix_length) {
super();
this.prefix_length = prefix_length;
}
public Data parse(String input) {
Data data = new Data();
for (String line : input.split("\n")) {
line = line.replace("Kai☺UWE empfiehlt ", "");
Token[] tokens = tokenize(line);
for (int i = 0 + prefix_length; i < tokens.length; i++) {
Prefix p = getPrefix(tokens, i);
Token t = getToken(tokens, i);
data.add(p, t);
}
}
return data;
}
private Token[] tokenize(String line) {
String[] strings = line.split(" ");
Token[] tokens = new Token[strings.length + prefix_length + 1];
int i = 0;
for (; i < prefix_length; i++) {
tokens[i] = Token.START;
}
for (; i < strings.length + prefix_length; i++) {
tokens[i] = new Token(strings[i - prefix_length], Glyph.Type.word);
}
tokens[i] = Token.END;
return tokens;
}
private Prefix getPrefix(Token[] tokens, int index) {
Token[] prefixTokens = new Token[prefix_length];
System.arraycopy(tokens, index - prefix_length + 1, prefixTokens, 0, prefix_length);
// for (int i = 0; i < prefix_length; i++) {
// prefixTokens[i] = tokens[index - prefix_length + i];
// }
return new Prefix(prefixTokens);
}
private Token getToken(Token[] tokens, int i) {
return tokens[i];
}
}
......@@ -18,24 +18,31 @@ public class Prefix {
}
public Prefix slide(Token newToken) {
return slide(newToken, this.tokens.size());
}
public Prefix slide(Token newToken, int maxLength) {
List<Token> newTokens = new ArrayList<>(this.tokens);
newTokens.remove(0);
newTokens.add(newToken);
while(newTokens.size() > maxLength) {
newTokens.remove(0);
}
return new Prefix(newTokens);
}
private List<Token> compressLeadingStarts(List<Token> tokens) {
if (tokens.size() > 1 && tokens.get(0) == Token.START) {
Iterator<Token> iterator = tokens.iterator();
for (Token t = iterator.next(); iterator.hasNext(); t = iterator.next()) {
if (t == Token.START) {
iterator.remove();
} else {
break;
}
}
}
return tokens;
// if (tokens.size() > 1 && tokens.get(0) == Token.START) {
// Iterator<Token> iterator = tokens.iterator();
// for (Token t = iterator.next(); iterator.hasNext(); t = iterator.next()) {
// if (t == Token.START) {
// iterator.remove();
// } else {
// break;
// }
// }
// }
// return tokens;
}
@Override
......
......@@ -4,76 +4,85 @@ import java.util.Collection;
import java.util.Map.Entry;
public class Renderer {
public static class Options {
private final boolean propability;
private final boolean possibilities;
public static class Options {
private final boolean propability;
private final boolean possibilities;
private final boolean recomends;
private final boolean specialToken;
private final String prefix;
public Options(boolean propability, boolean possibilities) {
super();
this.propability = propability;
this.possibilities = possibilities;
}
public Options(boolean propability, boolean possibilities, boolean recomends, boolean specialToken, String prefix) {
this.propability = propability;
this.possibilities = possibilities;
this.recomends = recomends;
this.specialToken = specialToken;
this.prefix = prefix;
}
public static final Options NONE = new Options(false, false);
public static final Options FULL = new Options(true, true);
public static final Options NONE = new Options(false, false, false, false, "");
public static final Options FULL = new Options(true, true, true, true, "");
public static final Options TOKEN = new Options(true, false, false, true, "");
}
}
private final Data data;
private final Options options;
private final Data data;
private final Options options;
public Renderer(Data data) {
super();
this.data = data;
this.options = Options.NONE;
}
public Renderer(Data data) {
super();
this.data = data;
this.options = Options.NONE;
}
public Renderer(Data data, Options options) {
super();
this.data = data;
this.options = options;
}
public Renderer(Data data, Options options) {
super();
this.data = data;
this.options = options;
}
public String render(Collection<Entry<Prefix, Token>> collection) {
public String render(Collection<Entry<Prefix, Decission>> sentence) {
double p = 1;
StringBuilder sb = new StringBuilder();
if (options.recomends)
sb.append("KAI-uwe empfiehlt");
for (Entry<Prefix, Decission> t : sentence) {
Lookup lookup = data.fetch(t.getKey());
int possibilities = lookup.getDistinctTokens();
p *= (double) lookup.getAmount(t.getValue().getToken())
/ (double) lookup.getTotalCounts();
if (options.possibilities) {
if (possibilities <= 1) {
sb.append("-");
} else if (possibilities <= 2) {
sb.append("--");
} else if (possibilities <= 3) {
sb.append("---");
} else if (possibilities <= 5) {
sb.append("_");
} else if (possibilities <= 10) {
sb.append("__");
} else if (possibilities <= 20) {
sb.append("___");
} else if (possibilities <= 50) {
sb.append("*");
} else if (possibilities <= 100) {
sb.append("**");
} else if (possibilities <= 200) {
sb.append("***");
} else {
sb.append(" ");
}
}
sb.append(t.getValue().getToken().render(options.prefix));
}
if (options.propability) {
sb.append(" - " + p);
}
return sb.toString();
}
double p = 1;
StringBuilder sb = new StringBuilder();
sb.append("KAI-uwe empfiehlt");
for (Entry<Prefix, Token> t : collection) {
Lookup lookup = data.fetch(t.getKey());
int possibilities = lookup.getDistinctTokens();
p *= (double) lookup.getAmount(t.getValue())
/ (double) lookup.getTotalCounts();
if (options.possibilities) {
if (possibilities <= 1) {
sb.append("-");
} else if (possibilities <= 2) {
sb.append("--");
} else if (possibilities <= 3) {
sb.append("---");
} else if (possibilities <= 5) {
sb.append("_");
} else if (possibilities <= 10) {
sb.append("__");
} else if (possibilities <= 20) {
sb.append("___");
} else if (possibilities <= 50) {
sb.append("*");
} else if (possibilities <= 100) {
sb.append("**");
} else if (possibilities <= 200) {
sb.append("***");
} else {
sb.append(" ");
}
}
sb.append(t.getValue().render(" "));
}
if (options.propability) {
sb.append(" - " + p);
}
return sb.toString();
}
}
package markov;
import java.nio.charset.Charset;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class Shortener {
private final Data data;
public Shortener(Data data) {
this.data = data;
}
public String getId(Collection<Map.Entry<Prefix, Decission>> sentence) {
return fromInts(sentence.stream()
.map(d -> d.getValue().getId()));
}
String fromInts(Stream<Integer> ints) {
String cps = ints.map(i -> {
System.out.println(i);
return new String(Character.toChars(i));
}).collect(Collectors.joining());
return Base64.getEncoder().encodeToString(cps.getBytes());
}
Stream<Integer> toInts(String hash) {
return new String(Base64.getDecoder().decode(hash.getBytes())).codePoints().boxed();
}
public Collection<Map.Entry<Prefix, Decission>> getSentence(String hash) {
Integer[] ids = toInts(hash).toArray(Integer[]::new);
Collection<Map.Entry<Prefix, Decission>> sentence = new ArrayList<>();
Prefix prefix = new Prefix(new Token[0]);
for (int id : ids) {
Lookup lookup = data.fetch(prefix);
Decission d = lookup.forId(id);
sentence.add(new AbstractMap.SimpleEntry<>(prefix, d));
prefix = prefix.slide(d.getToken(), data.getPrefixLength());
}
return sentence;
}
}
package markov;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class ParserStreamStyle {
public class Tokenizer {
public Stream<Token> collect(Stream<Glyph> glyphs) {
Data data = new Data();
public Stream<Stream<Token>> tokenizeCombined(Stream<Stream<String>> input) {
return input.map(stringStream -> putMarkers(combineToTokens(stringStream.flatMap(s -> s.codePoints().boxed().map(integer -> glyphFromCodePoint(integer))))));
}
public Stream<Stream<Token>> tokenize(Stream<String> input) {
return input.map(s -> putMarkers(combineToTokens(s.codePoints().boxed().map(integer -> glyphFromCodePoint(integer)))));
}
private Glyph glyphFromCodePoint(int codePoint) {
Glyph.Type type = Glyph.Type.other;
if (Character.isWhitespace(codePoint)) {
type = Glyph.Type.whitespace;
} else if (Character.isAlphabetic(codePoint)) {
type = Glyph.Type.word;
} else if (Arrays.asList(Character.START_PUNCTUATION, Character.END_PUNCTUATION, Character.INITIAL_QUOTE_PUNCTUATION, Character.FINAL_QUOTE_PUNCTUATION,
Character.OTHER_PUNCTUATION).contains(Character.getType(codePoint))) {
type = Glyph.Type.punctuation;
}
String value = new String(Character.toChars(codePoint));
return new Glyph(type, value);
}
private Stream<Token> putMarkers(Stream<Token> input) {
return Stream.concat(Stream.of(Token.START), Stream.concat(input, Stream.of(Token.END)));
}
public Stream<Token> combineToTokens(Stream<Glyph> glyphs) {
final Container[] previous = new Container[]{null};
return glyphs.map(g -> {
Container o = new Container(g);
o.p = previous[0];
previous[0] = o;
return o;
}).flatMap(go -> {
if (go.p != null && go.p.self != null && !go.self.getType().equals(go.p.self.getType())) {
List<Glyph> containers = new ArrayList<>();
Container c = go;
do {
c = c.p;
containers.add(0, c.self);
}
while (c != null && c.p != null && c.p.self != null &&
c.self.getType().equals(c.p.self.getType()));
return Stream.<List<Glyph>>builder().add(containers).build();
} else {
return Stream.empty();
}
}).map(l -> {
String content = l.stream().map(glyph -> glyph.getContent()).collect(Collectors.joining());
Glyph.Type type = l.get(0).getType();
return new Token(content, type);
});
return Stream.concat(glyphs, Stream.of(new Glyph(Glyph.Type.empty, "")))
.map(g -> {
Container o = new Container(g);
o.p = previous[0];
previous[0] = o;
return o;
}).flatMap(go -> {
if (go.p != null && go.p.self != null && !go.self.getType().equals(go.p.self.getType())) {
List<Glyph> tokenGlyphs = new ArrayList<>();
Container c = go;
do {
c = c.p;
tokenGlyphs.add(0, c.self);
}
while (c != null && c.p != null && c.p.self != null &&
c.self.getType().equals(c.p.self.getType()));
go.p = null; // memory optimization
return Stream.<List<Glyph>>builder().add(tokenGlyphs).build();
} else {
return Stream.empty();
}
}).map(l -> getTokenFromGlyphs(l));
}
private Token getTokenFromGlyphs(List<Glyph> l) {
String content = l.stream().map(glyph -> glyph.getContent()).collect(Collectors.joining());
Glyph.Type type = l.get(0).getType();
return new Token(content, type);
}
private static class Container {
......@@ -45,6 +75,4 @@ public class ParserStreamStyle {
final Glyph self;
Container p;
}
;
}
package markov;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.stream.Stream;
public class Utils {
public static byte[] toByteArray(int value) {
return ByteBuffer.allocate(4).putInt(value).array();
}
public static int fromByteArray(byte[] bytes) {
return ByteBuffer.wrap(bytes).getInt();
}
public static Data parse(Stream<String> input, int prefixLength) {
Collection<Integer> collectionDummy = new ArrayList<>();
collectionDummy.add(prefixLength);
return new Parser().parse(new Tokenizer().tokenize(input), collectionDummy).values()
.stream().findFirst().get();
}
}
......@@ -12,11 +12,12 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
public class DataTests {
private static final int prefixLength = 0;
Data data;
@Test
public void isInstantiatedWithNew() {
data = new Data();
data = new Data(prefixLength);
}
@Nested
......@@ -27,17 +28,17 @@ public class DataTests {
@BeforeEach
public void create() {
data = new Data();
data = new Data(prefixLength);
data.add(pa, ta);
}
@Test
public void canReceieve() {
assertTrue(data.fetch(pa).allPosible().contains(new Token("a")));
assertEquals(1, data.fetch(pa).allPosible().size());
assertTrue(data.fetch(pa).allPossible().contains(new Token("a")));
assertEquals(1, data.fetch(pa).allPossible().size());
data.add(pa, new Token("b"));
assertTrue(data.fetch(pa).allPosible().contains(new Token("b")));
assertEquals(2, data.fetch(pa).allPosible().size());
assertTrue(data.fetch(pa).allPossible().contains(new Token("b")));
assertEquals(2, data.fetch(pa).allPossible().size());
}
@Test
......
......@@ -16,11 +16,11 @@ class GlyphTest {
assertEquals(a, a2);
assertNotEquals(a, w);
assertNotEquals(a, c);
assertNotEquals(Glyph.End, c);
assertNotEquals(Glyph.Start, c);
assertNotEquals(Glyph.Start, Glyph.End);
assertEquals(Glyph.Start, Glyph.Start);
assertEquals(Glyph.End, Glyph.End);
// assertNotEquals(Glyph.End, c);
// assertNotEquals(Glyph.Start, c);
// assertNotEquals(Glyph.Start, Glyph.End);
// assertEquals(Glyph.Start, Glyph.Start);
// assertEquals(Glyph.End, Glyph.End);
}
}
\ No newline at end of file
......@@ -25,14 +25,14 @@ public class LookupTests {
@Test
public void add() {
lookup.add(new Token("b"));
assertTrue(lookup.allPosible().contains(new Token("b")));
assertTrue(lookup.allPosible().contains(new Token("a")));
assertTrue(lookup.allPossible().contains(new Token("b")));
assertTrue(lookup.allPossible().contains(new Token("a")));
}
@Test
public void forRandom() {
assertEquals(lookup.forRandom(0), new Token("a"));
assertEquals(lookup.forRandom(0.5d), new Token("a"));
assertEquals(lookup.forRandom(0).getToken(), new Token("a"));
assertEquals(lookup.forRandom(0.5d).getToken(), new Token("a"));
// assertEquals(lookup.forRandom(1), new Token("a"));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(1));
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(-1));
......@@ -40,14 +40,14 @@ public class LookupTests {
assertThrows(IllegalArgumentException.class, () -> lookup.forRandom(2));
lookup.add(new Token("b"));
Token a = lookup.forRandom(0);
Token b = lookup.forRandom(0.9d);
Decission a = lookup.forRandom(0);
Decission b = lookup.forRandom(0.9d);
assertNotEquals(a, b);
lookup.add(new Token("c"));
a = lookup.forRandom(0);
b = lookup.forRandom(0.51d);
Token c = lookup.forRandom(0.9d);
Decission c = lookup.forRandom(0.9d);
assertNotEquals(a, b);
assertNotEquals(a, c);
assertNotEquals(b, c);
......@@ -55,30 +55,30 @@ public class LookupTests {
@Test
public void average() {
assertEquals(lookup.average(), new Token("a"));
assertEquals(lookup.average().getToken(), new Token("a"));
lookup.add(new Token("b"));
Token token = lookup.average();
Token token = lookup.average().getToken();
assertTrue(() -> token.equals(new Token("a")) || token.equals(new Token("b")));
lookup.add(new Token("a"));
assertEquals(lookup.average(), new Token("a"));
assertEquals(lookup.average().getToken(), new Token("a"));
lookup.add(new Token("b"));
lookup.add(new Token("b"));
assertEquals(lookup.average(), new Token("b"));
assertEquals(lookup.average().getToken(), new Token("b"));
}
@Test
public void allPossible() {
assertIterableEquals(lookup.allPosible(),
assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a")));
lookup.add(new Token("b"));
assertIterableEquals(lookup.allPosible(),
assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b")));
lookup.add(new Token("c"));
assertIterableEquals(lookup.allPosible(),
assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b"), new Token("c")));
lookup.add(new Token("a"));
assertIterableEquals(lookup.allPosible(),
assertIterableEquals(lookup.allPossible(),
Arrays.asList(new Token("a"), new Token("b"), new Token("c")));
}
......
......@@ -7,26 +7,34 @@ import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.stream.Stream;
class ParserStreamStyleTest {
public class ParserStreamStyleTest {
@Test
void tests() {
ParserStreamStyle parserStreamStyle = new ParserStreamStyle();
Assertions.assertIterableEquals(
Arrays.asList(
new Token(Glyph.Start.getContent(), Glyph.Type.control),
new Token("ab"),
new Token(" ", Glyph.Type.whitespace),
new Token("c"),
new Token(Glyph.End.getContent(), Glyph.Type.control)),
parserStreamStyle.collect(Stream.of(Glyph.Start,
new Glyph(Glyph.Type.word, "a"),
new Glyph(Glyph.Type.word, "b"),
new Glyph(Glyph.Type.whitespace, " "),
new Glyph(Glyph.Type.word, "c"),
Glyph.End, new Glyph(Glyph.Type.empty, "EMPTY")))
.collect(Collectors.toList()));
// Collector parserStreamStyle = new Collector(1);
// Assertions.assertIterableEquals(
// Arrays.asList(
// new Token(Glyph.Start.getContent(), Glyph.Type.control),
// new Token("ab"),
// new Token(" ", Glyph.Type.whitespace),
// new Token("c"),
// new Token(Glyph.End.getContent(), Glyph.Type.control)),
// parserStreamStyle.combineToTokens(Stream.of(Glyph.Start,
// new Glyph(Glyph.Type.word, "a"),
// new Glyph(Glyph.Type.word, "b"),
// new Glyph(Glyph.Type.whitespace, " "),
// new Glyph(Glyph.Type.word, "c"),
// Glyph.End, new Glyph(Glyph.Type.empty, "EMPTY")))
// .collect(Collectors.toList()));
}
// @Test
// void test2() {
// Collector parserStreamStyle = new Collector(2);
// Data data = parserStreamStyle.learn(Stream.<String>builder().add("a bc a d_e ").build());
// System.out.println(data.dumpStats());
// System.out.println(data.fetch(new Prefix(Arrays.asList(new Token("e")))).allPossible());
// }
}
\ No newline at end of file
......@@ -18,26 +18,6 @@ public class PrefixTests {
assertEquals(new Prefix(Arrays.asList(new Token("a"))), prefix);
}
// FIXME
@Test
public void testStartCompression() {
prefix = new Prefix(Arrays.asList(Token.START));
assertEquals(new Prefix(Arrays.asList(Token.START)), prefix);
assertEquals(new Prefix(Arrays.asList(Token.START, Token.START, Token.START)), prefix);
prefix = new Prefix(Arrays.asList(Token.START, Token.START));
assertEquals(new Prefix(Arrays.asList(Token.START)), prefix);
prefix = new Prefix(Arrays.asList(Token.START, new Token("")));
assertEquals(new Prefix(Arrays.asList(Token.START, new Token(""))), prefix);
prefix = new Prefix(Arrays.asList(Token.START, new Token(""), new Token("")));
assertEquals(new Prefix(Arrays.asList(Token.START, new Token(""), new Token(""))), prefix);
assertEquals(new Prefix(Arrays.asList(Token.START, Token.START, Token.START, new Token(""), new Token(""))),
prefix);
prefix = new Prefix(Arrays.asList(Token.START, Token.START, new Token(""), new Token("")));
assertEquals(new Prefix(Arrays.asList(Token.START, new Token(""), new Token(""))), prefix);
}
@Test
public void testSlide() {
prefix = new Prefix(Arrays.asList(new Token("a")));
......@@ -49,5 +29,12 @@ public class PrefixTests {
assertEquals(new Prefix(Arrays.asList(new Token("b"), new Token("c"))), p2);
}
@Test
public void testFancySlide() {
prefix = new Prefix(Arrays.asList(new Token("a")));
Prefix p2 = prefix.slide(new Token("b"),2);
assertEquals(new Prefix(Arrays.asList(new Token("a"), new Token("b"))), p2);
}
}
\ No newline at end of file
package markov;
import org.junit.jupiter.api.Test;
import java.util.Base64;
import java.util.stream.Stream;
class ShortenerTest {
@Test
void shortenerTest() {
String hash = new Shortener(null).fromInts(Stream.of(561,0,64,0));
System.out.println(Base64.getEncoder().encodeToString("\0\0@\0\0".getBytes()));
System.out.println(hash);
System.out.println(hash.length());
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment