Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
markov
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Patrick Friedel
markov
Commits
e41e070f
Commit
e41e070f
authored
Feb 23, 2018
by
Hut
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
many much
parent
6763731a
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
167 additions
and
121 deletions
+167
-121
Builder.java
src/main/java/markov/Builder.java
+12
-9
Data.java
src/main/java/markov/Data.java
+3
-1
Glyph.java
src/main/java/markov/Glyph.java
+3
-3
Lookup.java
src/main/java/markov/Lookup.java
+3
-2
Mail.java
src/main/java/markov/Mail.java
+53
-46
Main.java
src/main/java/markov/Main.java
+3
-5
Parser.java
src/main/java/markov/Parser.java
+1
-1
Prefix.java
src/main/java/markov/Prefix.java
+2
-1
Renderer.java
src/main/java/markov/Renderer.java
+10
-18
Sentence.java
src/main/java/markov/Sentence.java
+47
-0
Shortener.java
src/main/java/markov/Shortener.java
+17
-18
Token.java
src/main/java/markov/Token.java
+13
-9
Utils.java
src/main/java/markov/Utils.java
+0
-8
No files found.
src/main/java/markov/Builder.java
View file @
e41e070f
...
...
@@ -19,27 +19,28 @@ public class Builder {
this
.
data
=
data
;
}
public
Collection
<
Map
.
Entry
<
Prefix
,
Decission
>>
random
()
{
public
Sentence
random
()
{
return
produce
(
p
->
nextRandom
(
p
));
}
public
Collection
<
Map
.
Entry
<
Prefix
,
Decission
>>
average
()
{
public
Sentence
average
()
{
return
produce
(
p
->
data
.
fetch
(
p
).
average
());
}
private
Collection
<
Decission
>
produce
(
private
Sentence
produce
(
Function
<
Prefix
,
Decission
>
producer
)
{
List
<
Decission
>
result
=
new
LinkedList
<>(
);
Sentence
sentence
=
new
Sentence
(
data
);
Prefix
p
=
initPrefix
();
Decission
d
=
new
Decission
(
Token
.
START
,
0
,
p
,
null
);
Decission
d
=
new
Decission
(
Token
.
START
,
0
);
for
(
int
i
=
0
;
i
<
1000
;
i
++)
{
if
(
d
.
getToken
()
==
Token
.
END
)
if
(
Token
.
END
.
equals
(
d
.
getToken
())
)
break
;
d
=
producer
.
apply
(
p
);
result
.
add
(
new
AbstractMap
.
SimpleEntry
<>(
p
,
d
));
d
.
setP
(
p
);
sentence
.
add
(
d
);
p
=
p
.
slide
(
d
.
getToken
(),
prefix_length
);
}
return
result
;
return
sentence
;
}
private
Prefix
initPrefix
()
{
...
...
@@ -51,7 +52,9 @@ public class Builder {
if
(
l
==
null
)
{
throw
new
NullPointerException
(
String
.
format
(
"could not find a lookup for %s"
,
prefix
));
}
return
l
.
forRandom
(
nextRandomNumber
());
Decission
decission
=
l
.
forRandom
(
nextRandomNumber
());
decission
.
setLookup
(
l
);
return
decission
;
}
private
double
nextRandomNumber
()
{
...
...
src/main/java/markov/Data.java
View file @
e41e070f
package
markov
;
import
java.io.Serializable
;
import
java.util.Arrays
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.stream.Collectors
;
// FIXME rename
public
class
Data
{
// TODO add builder and move add()
public
class
Data
implements
Serializable
{
private
final
int
prefixLength
;
private
final
Map
<
Prefix
,
Lookup
>
data
=
new
HashMap
<>();
...
...
src/main/java/markov/Glyph.java
View file @
e41e070f
package
markov
;
public
class
Glyph
{
import
java.io.Serializable
;
public
class
Glyph
implements
Serializable
{
public
enum
Type
{
word
,
punctuation
,
control
,
whitespace
,
empty
,
other
}
...
...
@@ -31,8 +33,6 @@ public class Glyph {
if
(
type
!=
glyph
.
type
)
{
return
false
;
}
else
if
(
type
==
Type
.
control
){
return
this
==
o
;
}
return
content
!=
null
?
content
.
equals
(
glyph
.
content
)
:
glyph
.
content
==
null
;
}
...
...
src/main/java/markov/Lookup.java
View file @
e41e070f
package
markov
;
import
java.io.Serializable
;
import
java.util.LinkedHashMap
;
import
java.util.Map.Entry
;
import
java.util.Set
;
import
java.util.stream.Collectors
;
public
class
Lookup
{
public
class
Lookup
implements
Serializable
{
private
final
LinkedHashMap
<
Token
,
Integer
>
tokens
=
new
LinkedHashMap
<
Token
,
Integer
>();
...
...
@@ -46,7 +47,7 @@ public class Lookup {
public
Decission
forId
(
int
id
)
{
if
(
id
<
0
||
id
>
getTotalCounts
())
{
throw
new
IllegalArgumentException
(
"expected id [0; totalCount)"
);
throw
new
IllegalArgumentException
(
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
getTotalCounts
())
);
}
int
i
=
0
;
for
(
Entry
<
Token
,
Integer
>
entry
:
tokens
.
entrySet
())
{
...
...
src/main/java/markov/Mail.java
View file @
e41e070f
package
markov
;
import
javax.mail.MessagingException
;
import
javax.mail.Session
;
import
javax.mail.internet.MimeMessage
;
import
java.io.File
;
import
java.io.IOException
;
import
java.io.ObjectInputStream
;
import
java.nio.file.Files
;
import
java.util.
Arrays
;
import
java.util.
Collection
;
import
java.util.
Base64
;
import
java.util.
HashMap
;
import
java.util.Map
;
import
java.util.
Propertie
s
;
import
java.util.
stream.Collector
s
;
import
java.util.stream.Stream
;
public
class
Mail
{
public
static
void
main
(
String
[]
args
)
{
int
prefixLength
=
3
;
public
static
void
main
(
String
[]
args
)
throws
IOException
,
ClassNotFoundException
{
int
prefixLength
=
4
;
System
.
out
.
println
(
"reading map"
);
Map
<
Integer
,
Data
>
dataMap
=
(
Map
<
Integer
,
Data
>)
new
ObjectInputStream
(
Files
.
newInputStream
(
new
File
(
"C:\\Users\\admin\\git\\markovdisplay\\target\\classes\\data"
).
toPath
())).
readObject
();
System
.
out
.
println
(
"read map"
);
Builder
b
=
new
Builder
(
prefixLength
,
dataMap
.
get
(
prefixLength
));
Renderer
r
=
new
Renderer
(
Renderer
.
Options
.
FULL
);
Renderer
r1
=
new
Renderer
(
Renderer
.
Options
.
NONE
);
Shortener
shortener
=
new
Shortener
(
dataMap
.
get
(
prefixLength
));
CountMap
<
Integer
>
counter
=
new
CountMap
<>();
System
.
out
.
println
(
"creating..."
);
final
int
max
=
100_000
;
for
(
int
i
=
0
;
i
<
100_000
;
i
++)
{
if
(
i
%
(
max
/
10
)
==
0
)
{
System
.
out
.
println
(
i
/
(
max
/
100
)
+
"%"
);
}
Sentence
sentence
=
b
.
random
();
String
id
=
shortener
.
getId
(
sentence
);
shortener
.
toInts
(
id
).
forEach
(
c
->
counter
.
count
(
c
));
}
System
.
out
.
println
(
"distinct: "
+
counter
.
map
.
size
());
System
.
out
.
println
(
counter
.
result
().
limit
(
100
).
map
(
e
->
e
.
getValue
().
get
()
+
"*"
+
e
.
getKey
()).
collect
(
Collectors
.
joining
(
"\n"
)));
System
.
out
.
println
(
"done"
);
}
private
static
class
CountMap
<
T
>
{
static
class
MutableInt
{
int
value
=
1
;
Parser
parser
=
new
Parser
();
File
parent
=
new
File
(
"C:\\Users\\admin\\Desktop\\emails"
);
Stream
<
String
>
mails
=
Arrays
.
stream
(
parent
.
listFiles
()).
limit
(
500
).
map
(
file
->
file
.
toPath
()).
map
(
path
->
{
try
{
public
MutableInt
(
int
value
)
{
this
.
value
=
value
;
}
MimeMessage
m
=
new
MimeMessage
(
Session
.
getDefaultInstance
(
new
Properties
()),
Files
.
newInputStream
(
path
));
//System.out.println(m.getContentType());
// return m.getContent().toString();
return
m
;
}
catch
(
IOException
|
MessagingException
e
)
{
throw
new
RuntimeException
(
e
);
public
void
increment
()
{
++
value
;
}
}).
filter
(
mimeMessage
->
{
try
{
return
mimeMessage
.
getContentType
().
contains
(
"text/plain"
);
}
catch
(
MessagingException
e
)
{
throw
new
RuntimeException
(
e
);
public
int
get
()
{
return
value
;
}
}).
map
(
m
->
{
try
{
return
m
.
getContent
().
toString
();
}
catch
(
IOException
|
MessagingException
e
)
{
throw
new
RuntimeException
(
e
);
}
});
Data
data
=
Utils
.
parse
(
mails
,
prefixLength
);
Builder
b
=
new
Builder
(
prefixLength
,
data
);
Renderer
r
=
new
Renderer
(
data
,
Renderer
.
Options
.
FULL
);
Renderer
r1
=
new
Renderer
(
data
,
Renderer
.
Options
.
NONE
);
Shortener
shortener
=
new
Shortener
(
data
);
private
final
Map
<
T
,
MutableInt
>
map
=
new
HashMap
<>();
for
(
int
i
=
0
;
i
<
10
;
i
++)
{
Collection
<
Map
.
Entry
<
Prefix
,
Decission
>>
sentence
=
b
.
random
();
// System.out.println(r1.render(sentence));
String
id
=
shortener
.
getId
(
sentence
);
System
.
out
.
println
(
id
);
// System.out.println(r.render(shortener.getSentence(id)));
public
void
count
(
T
t
)
{
MutableInt
i
=
map
.
get
(
t
);
if
(
i
==
null
)
{
map
.
put
(
t
,
new
MutableInt
(
1
));
}
else
{
i
.
increment
();
}
}
public
Stream
<
Map
.
Entry
<
T
,
MutableInt
>>
result
()
{
return
map
.
entrySet
().
stream
().
sorted
((
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
().
get
(),
e1
.
getValue
().
get
()));
}
// Collection<Map.Entry<Prefix, Token>> a = b.average();
// System.out.println("average: " + r.render(a));
// for (Map.Entry<Prefix, Token> e : a) {
// System.out.println(e.getKey() + " -> " + data.fetch(e.getKey()));
// }
// System.out.println(data.dumpStats());
}
}
src/main/java/markov/Main.java
View file @
e41e070f
package
markov
;
import
java.util.Collection
;
import
java.util.Map
;
import
java.util.stream.Stream
;
public
class
Main
{
...
...
@@ -13,9 +11,9 @@ public class Main {
Data
data
=
Utils
.
parse
(
Stream
.
of
(
input
.
replace
(
"Kai☺UWE empfiehlt "
,
""
).
split
(
"\n"
)),
prefixLength
);
Builder
b
=
new
Builder
(
prefixLength
,
data
);
Renderer
r
=
new
Renderer
(
data
,
Renderer
.
Options
.
NONE
);
Renderer
r2
=
new
Renderer
(
data
,
Renderer
.
Options
.
FULL
);
Collection
<
Map
.
Entry
<
Prefix
,
Decission
>>
sentence
=
b
.
random
();
Renderer
r
=
new
Renderer
(
Renderer
.
Options
.
NONE
);
Renderer
r2
=
new
Renderer
(
Renderer
.
Options
.
FULL
);
Sentence
sentence
=
b
.
random
();
for
(
int
i
=
0
;
i
<
1
;
i
++)
{
System
.
out
.
println
(
r2
.
render
(
sentence
));
}
...
...
src/main/java/markov/Parser.java
View file @
e41e070f
...
...
@@ -10,7 +10,7 @@ public class Parser {
public
Map
<
Integer
,
Data
>
parse
(
Stream
<
Stream
<
Token
>>
input
,
Collection
<
Integer
>
prefixes
)
{
public
Map
<
Integer
,
Data
>
parse
(
Stream
<
Stream
<
Token
>>
input
,
Iterable
<
Integer
>
prefixes
)
{
Collection
<
Collector
>
collectors
=
new
ArrayList
<>();
for
(
Integer
i
:
prefixes
)
{
collectors
.
add
(
new
Collector
(
i
));
...
...
src/main/java/markov/Prefix.java
View file @
e41e070f
package
markov
;
import
java.io.Serializable
;
import
java.util.ArrayList
;
import
java.util.Arrays
;
import
java.util.Iterator
;
import
java.util.List
;
// FIXME rename
public
class
Prefix
{
public
class
Prefix
implements
Serializable
{
private
final
List
<
Token
>
tokens
;
public
Prefix
(
Token
[]
tokens
)
{
...
...
src/main/java/markov/Renderer.java
View file @
e41e070f
package
markov
;
import
java.util.Collection
;
import
java.util.Map.Entry
;
public
class
Renderer
{
public
static
class
Options
{
private
final
boolean
propability
;
...
...
@@ -25,32 +22,25 @@ public class Renderer {
}
private
final
Data
data
;
private
final
Options
options
;
public
Renderer
(
Data
data
)
{
public
Renderer
()
{
super
();
this
.
data
=
data
;
this
.
options
=
Options
.
NONE
;
}
public
Renderer
(
Data
data
,
Options
options
)
{
public
Renderer
(
Options
options
)
{
super
();
this
.
data
=
data
;
this
.
options
=
options
;
}
public
String
render
(
Collection
<
Entry
<
Prefix
,
Decission
>>
sentence
)
{
double
p
=
1
;
public
String
render
(
Sentence
sentence
)
{
StringBuilder
sb
=
new
StringBuilder
();
if
(
options
.
recomends
)
sb
.
append
(
"KAI-uwe empfiehlt"
);
for
(
Entry
<
Prefix
,
Decission
>
t
:
sentence
)
{
Lookup
lookup
=
d
ata
.
fetch
(
t
.
getKey
()
);
for
(
Decission
d
:
sentence
)
{
Lookup
lookup
=
d
.
getLookup
(
);
int
possibilities
=
lookup
.
getDistinctTokens
();
p
*=
(
double
)
lookup
.
getAmount
(
t
.
getValue
().
getToken
())
/
(
double
)
lookup
.
getTotalCounts
();
if
(
options
.
possibilities
)
{
if
(
possibilities
<=
1
)
{
...
...
@@ -75,14 +65,16 @@ public class Renderer {
sb
.
append
(
" "
);
}
}
sb
.
append
(
t
.
getValue
().
getToken
().
render
(
options
.
prefix
));
if
((
Token
.
START
.
equals
(
d
.
getToken
())||
Token
.
END
.
equals
(
d
.
getToken
())
)
&&
!
options
.
specialToken
)
{
continue
;
}
sb
.
append
(
d
.
getToken
().
render
(
options
.
prefix
));
}
if
(
options
.
propability
)
{
sb
.
append
(
" - "
+
p
);
sb
.
append
(
" - "
+
sentence
.
propability
()
);
}
return
sb
.
toString
();
}
}
src/main/java/markov/Sentence.java
0 → 100644
View file @
e41e070f
package
markov
;
import
java.io.Serializable
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Iterator
;
import
java.util.stream.Stream
;
public
class
Sentence
implements
Iterable
<
Decission
>,
Serializable
{
private
final
Collection
<
Decission
>
decissions
=
new
ArrayList
<>();
private
final
Data
data
;
public
Sentence
(
Data
data
)
{
this
.
data
=
data
;
}
public
Data
getData
()
{
return
data
;
}
public
void
add
(
Decission
decission
)
{
this
.
decissions
.
add
(
decission
);
}
public
Stream
<
Decission
>
asStream
()
{
return
decissions
.
stream
();
}
public
double
propability
()
{
return
asStream
()
.
mapToDouble
(
d
->
(
double
)
d
.
getLookup
().
getAmount
(
d
.
getToken
())
/
(
double
)
d
.
getLookup
().
getTotalCounts
())
.
reduce
((
d1
,
d2
)
->
d1
*
d2
).
getAsDouble
();
}
public
String
render
()
{
return
new
Renderer
().
render
(
this
);
}
public
String
id
()
{
return
new
Shortener
(
data
).
getId
(
this
);
}
@Override
public
Iterator
<
Decission
>
iterator
()
{
return
decissions
.
iterator
();
}
}
src/main/java/markov/Shortener.java
View file @
e41e070f
package
markov
;
import
java.nio.charset.Charset
;
import
java.util.AbstractMap
;
import
java.util.ArrayList
;
import
java.util.Base64
;
import
java.util.Collection
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.stream.Collectors
;
import
java.util.stream.Stream
;
...
...
@@ -18,17 +12,19 @@ public class Shortener {
this
.
data
=
data
;
}
public
String
getId
(
Collection
<
Map
.
Entry
<
Prefix
,
Decission
>>
sentence
)
{
return
fromInts
(
sentence
.
stream
()
.
map
(
d
->
d
.
getValue
().
getId
()));
public
String
getId
(
Sentence
sentence
)
{
return
fromInts
(
sentence
.
asStream
()
.
flatMap
(
d
->
{
if
(
d
.
getLookup
().
getDistinctTokens
()
==
1
)
{
return
Stream
.
empty
();
}
else
return
Stream
.
of
(
d
.
getId
());
}));
}
String
fromInts
(
Stream
<
Integer
>
ints
)
{
String
cps
=
ints
.
map
(
i
->
{
System
.
out
.
println
(
i
);
return
new
String
(
Character
.
toChars
(
i
));
}).
collect
(
Collectors
.
joining
());
String
cps
=
ints
.
map
(
i
->
new
String
(
Character
.
toChars
(
i
))).
collect
(
Collectors
.
joining
());
return
Base64
.
getEncoder
().
encodeToString
(
cps
.
getBytes
());
}
...
...
@@ -36,15 +32,18 @@ public class Shortener {
return
new
String
(
Base64
.
getDecoder
().
decode
(
hash
.
getBytes
())).
codePoints
().
boxed
();
}
public
Collection
<
Map
.
Entry
<
Prefix
,
Decission
>>
getSentence
(
String
hash
)
{
public
Sentence
getSentence
(
String
hash
)
{
Integer
[]
ids
=
toInts
(
hash
).
toArray
(
Integer
[]::
new
);
Collection
<
Map
.
Entry
<
Prefix
,
Decission
>>
sentence
=
new
ArrayList
<>(
);
Sentence
sentence
=
new
Sentence
(
data
);
Prefix
prefix
=
new
Prefix
(
new
Token
[
0
]);
for
(
int
id
:
ids
)
{
int
i
=
0
;
while
(
i
<
ids
.
length
)
{
Lookup
lookup
=
data
.
fetch
(
prefix
);
Decission
d
=
lookup
.
forId
(
id
);
sentence
.
add
(
new
AbstractMap
.
SimpleEntry
<>(
prefix
,
d
));
Decission
d
=
lookup
.
getDistinctTokens
()
==
1
?
lookup
.
average
()
:
lookup
.
forId
(
ids
[
i
++]);
d
.
setP
(
prefix
);
d
.
setLookup
(
lookup
);
sentence
.
add
(
d
);
prefix
=
prefix
.
slide
(
d
.
getToken
(),
data
.
getPrefixLength
());
}
return
sentence
;
...
...
src/main/java/markov/Token.java
View file @
e41e070f
package
markov
;
public
class
Token
{
import
java.io.Serializable
;
public
static
final
Token
START
=
new
SpecialToken
();
public
static
final
Token
END
=
new
SpecialToken
();
protected
static
final
Token
EMPTY
=
new
SpecialToken
();
public
class
Token
implements
Serializable
{
public
static
final
Token
START
=
new
SpecialToken
(
"START"
);
public
static
final
Token
END
=
new
SpecialToken
(
"END"
);
protected
static
final
Token
EMPTY
=
new
SpecialToken
(
"EMPTY"
);
public
Token
(
String
content
)
{
this
(
content
,
Glyph
.
Type
.
word
);
}
public
Token
(
String
content
,
Glyph
.
Type
type
)
{
super
();
this
.
content
=
content
;
...
...
@@ -19,14 +22,15 @@ public class Token {
@Override
public
String
toString
()
{
if
(
this
==
END
)
{
if
(
this
.
equals
(
END
)
)
{
return
"Token <END>"
;
}
if
(
this
==
START
)
{
if
(
this
.
equals
(
START
)
)
{
return
"TOKEN <Start>"
;
}
if
(
this
==
EMPTY
)
if
(
this
.
equals
(
EMPTY
))
{
return
"TOKEN <empty>"
;
}
return
"T=["
+
content
+
"]"
;
}
...
...
@@ -54,8 +58,8 @@ public class Token {
private
static
class
SpecialToken
extends
Token
{
public
SpecialToken
(
)
{
super
(
""
,
Glyph
.
Type
.
control
);
public
SpecialToken
(
String
s
)
{
super
(
s
,
Glyph
.
Type
.
control
);
}
@Override
...
...
src/main/java/markov/Utils.java
View file @
e41e070f
package
markov
;
import
java.nio.ByteBuffer
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.stream.Stream
;
public
class
Utils
{
public
static
byte
[]
toByteArray
(
int
value
)
{
return
ByteBuffer
.
allocate
(
4
).
putInt
(
value
).
array
();
}
public
static
int
fromByteArray
(
byte
[]
bytes
)
{
return
ByteBuffer
.
wrap
(
bytes
).
getInt
();
}
public
static
Data
parse
(
Stream
<
String
>
input
,
int
prefixLength
)
{
Collection
<
Integer
>
collectionDummy
=
new
ArrayList
<>();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment