Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
markov
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Patrick Friedel
markov
Commits
f6319bee
Commit
f6319bee
authored
Apr 06, 2018
by
Hut
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bugfixes
parent
c64692b3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
238 additions
and
208 deletions
+238
-208
Builder.java
src/main/java/markov/Builder.java
+0
-5
Data.java
src/main/java/markov/Data.java
+1
-1
Lookup.java
src/main/java/markov/Lookup.java
+9
-113
LookupImpl.java
src/main/java/markov/LookupImpl.java
+202
-0
Mail.java
src/main/java/markov/Mail.java
+2
-4
Prefix.java
src/main/java/markov/Prefix.java
+1
-0
Token.java
src/main/java/markov/Token.java
+19
-21
ShortenerByteHUffmanTrainerImpl.java
...ava/markov/shortener/ShortenerByteHUffmanTrainerImpl.java
+1
-1
ShortenerByteHuffmanImpl.java
src/main/java/markov/shortener/ShortenerByteHuffmanImpl.java
+1
-1
GlyphTest.java
src/test/java/markov/GlyphTest.java
+2
-7
LookupTests.java
src/test/java/markov/LookupTests.java
+0
-0
ParserStreamStyleTest.java
src/test/java/markov/ParserStreamStyleTest.java
+0
-41
ShortenerByteHuffmanImplTest.java
src/test/java/markov/ShortenerByteHuffmanImplTest.java
+0
-14
No files found.
src/main/java/markov/Builder.java
View file @
f6319bee
package
markov
;
package
markov
;
import
java.util.AbstractMap
;
import
java.util.Collection
;
import
java.util.LinkedList
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.concurrent.ThreadLocalRandom
;
import
java.util.concurrent.ThreadLocalRandom
;
import
java.util.function.Function
;
import
java.util.function.Function
;
...
...
src/main/java/markov/Data.java
View file @
f6319bee
...
@@ -22,7 +22,7 @@ public class Data implements Serializable{
...
@@ -22,7 +22,7 @@ public class Data implements Serializable{
}
}
public
void
add
(
Prefix
p
,
Token
t
)
{
public
void
add
(
Prefix
p
,
Token
t
)
{
Lookup
l
=
data
.
getOrDefault
(
p
,
new
Lookup
());
Lookup
l
=
data
.
getOrDefault
(
p
,
new
Lookup
Impl
());
l
.
add
(
t
);
l
.
add
(
t
);
data
.
put
(
p
,
l
);
data
.
put
(
p
,
l
);
}
}
...
...
src/main/java/markov/Lookup.java
View file @
f6319bee
package
markov
;
package
markov
;
import
java.io.Serializable
;
import
java.io.Serializable
;
import
java.util.Arrays
;
import
java.util.Comparator
;
import
java.util.LinkedHashMap
;
import
java.util.List
;
import
java.util.Map.Entry
;
import
java.util.Set
;
import
java.util.Set
;
import
java.util.stream.Collectors
;
public
class
Lookup
implements
Serializable
{
public
interface
Lookup
extends
Serializable
{
int
getDistinctTokens
();
private
final
LinkedHashMap
<
Token
,
Integer
>
tokens
=
new
LinkedHashMap
<>();
void
add
(
Token
t
);
private
Decission
[]
finalData
;
private
int
[]
finishedSums
;
private
boolean
isFinishedCollecting
=
false
;
private
int
totalCounts
=
0
;
Decission
forRandom
(
double
random
)
;
public
int
getDistinctTokens
()
{
Decission
forId
(
int
id
);
finishGuard
();
return
finalData
.
length
;
}
private
void
finishGuard
()
{
Decission
average
();
if
(!
isFinishedCollecting
)
{
throw
new
IllegalStateException
(
"not jet finished"
);
}
}
public
void
add
(
Token
t
)
{
Set
<
Token
>
allPossible
();
if
(
isFinishedCollecting
)
{
throw
new
IllegalStateException
(
"already finished"
);
}
Integer
i
=
tokens
.
getOrDefault
(
t
,
0
);
tokens
.
put
(
t
,
i
+
1
);
totalCounts
++;
}
public
Decission
forRandom
(
double
random
)
{
finishGuard
();
if
(
random
<
0
||
random
>=
1
)
{
throw
new
IllegalArgumentException
(
"expected double [0; 1)"
);
}
int
id
=
(
int
)
Math
.
floor
(
random
*
totalCounts
);
int
i
=
Arrays
.
binarySearch
(
finishedSums
,
id
);
i
=
Math
.
min
(
i
>=
0
?
i
+
1
:
-
1
*
(
i
+
1
),
finishedSums
.
length
-
1
);
return
finalData
[
i
];
}
public
Decission
forId
(
int
id
)
{
finishGuard
();
if
(
id
<
0
||
id
>
finishedSums
.
length
)
{
throw
new
IllegalArgumentException
(
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
finishedSums
.
length
));
}
return
finalData
[
id
];
}
public
Decission
average
()
{
finishGuard
();
return
finalData
[
this
.
finishedSums
.
length
-
1
];
}
public
Set
<
Token
>
allPossible
()
{
finishGuard
();
return
Arrays
.
asList
(
finalData
)
.
stream
()
.
map
(
Decission:
:
getToken
)
.
collect
(
Collectors
.
toSet
());
}
void
finishCollection
()
{
if
(!
isFinishedCollecting
)
{
int
size
=
tokens
.
size
();
finishedSums
=
new
int
[
size
];
finalData
=
new
Decission
[
size
];
List
<
Entry
<
Token
,
Integer
>>
orderedEntries
=
tokens
.
entrySet
()
.
stream
()
.
sequential
()
.
sorted
(
Comparator
.
comparingInt
(
Entry:
:
getValue
))
.
collect
(
Collectors
.
toList
());
int
sum
=
0
;
for
(
int
i
=
0
;
i
<
orderedEntries
.
size
();
i
++)
{
Entry
<
Token
,
Integer
>
entry
=
orderedEntries
.
get
(
i
);
sum
+=
entry
.
getValue
();
finishedSums
[
i
]
=
sum
;
finalData
[
i
]
=
new
Decission
(
entry
.
getKey
(),
i
,
this
,
(
double
)
entry
.
getValue
()
/
getTotalCounts
());
}
this
.
tokens
.
clear
();
isFinishedCollecting
=
true
;
}
}
int
getTotalCounts
()
{
return
totalCounts
;
}
void
resetFinishding
()
{
int
sum
=
0
;
for
(
int
i
=
0
;
i
<
finalData
.
length
;
i
++)
{
this
.
tokens
.
put
(
finalData
[
i
].
getToken
(),
finishedSums
[
i
]
-
sum
);
sum
+=
finishedSums
[
i
];
}
this
.
isFinishedCollecting
=
false
;
this
.
finishedSums
=
null
;
this
.
finalData
=
null
;
}
@Override
public
String
toString
()
{
return
isFinishedCollecting
?
"Lookup"
:
(
"Lookup [tokens= "
+
tokens
.
entrySet
()
.
stream
()
.
sorted
((
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
(),
e1
.
getValue
()))
.
map
(
e
->
String
.
format
(
"%d*%s"
,
e
.
getValue
(),
e
.
getKey
()))
.
collect
(
Collectors
.
joining
(
", "
))
+
"]"
);
}
void
finishCollection
();
int
getTotalCounts
();
}
}
src/main/java/markov/LookupImpl.java
0 → 100644
View file @
f6319bee
package
markov
;
import
java.util.Arrays
;
import
java.util.Comparator
;
import
java.util.LinkedHashMap
;
import
java.util.List
;
import
java.util.Map.Entry
;
import
java.util.Set
;
import
java.util.stream.Collectors
;
public
class
LookupImpl
implements
Lookup
{
private
static
final
long
serialVersionUID
=
8382785417663574549L
;
private
Lookup
innerLookup
=
new
CollectingLookup
();
private
int
totalCounts
=
0
;
private
class
CollectingLookup
implements
Lookup
{
private
static
final
long
serialVersionUID
=
8508868695745897489L
;
private
final
LinkedHashMap
<
Token
,
Integer
>
tokens
=
new
LinkedHashMap
<>();
@Override
public
int
getDistinctTokens
()
{
throw
newISE
();
}
private
IllegalStateException
newISE
()
{
return
new
IllegalStateException
(
"call finishCollecting first"
);
}
@Override
public
void
add
(
Token
t
)
{
Integer
i
=
tokens
.
getOrDefault
(
t
,
0
);
tokens
.
put
(
t
,
i
+
1
);
totalCounts
++;
}
@Override
public
Decission
forRandom
(
double
random
)
{
throw
newISE
();
}
@Override
public
Decission
forId
(
int
id
)
{
throw
newISE
();
}
@Override
public
Decission
average
()
{
throw
newISE
();
}
@Override
public
Set
<
Token
>
allPossible
()
{
throw
newISE
();
}
@Override
public
void
finishCollection
()
{
int
size
=
tokens
.
size
();
int
[]
finishedSums
=
new
int
[
size
];
Decission
[]
finalData
=
new
Decission
[
size
];
List
<
Entry
<
Token
,
Integer
>>
orderedEntries
=
tokens
.
entrySet
()
.
stream
()
.
sequential
()
.
sorted
(
Comparator
.
comparingInt
(
Entry:
:
getValue
))
.
collect
(
Collectors
.
toList
());
int
sum
=
0
;
for
(
int
i
=
0
;
i
<
orderedEntries
.
size
();
i
++)
{
Entry
<
Token
,
Integer
>
entry
=
orderedEntries
.
get
(
i
);
sum
+=
entry
.
getValue
();
finishedSums
[
i
]
=
sum
;
finalData
[
i
]
=
new
Decission
(
entry
.
getKey
(),
i
,
LookupImpl
.
this
,
(
double
)
entry
.
getValue
()
/
totalCounts
);
}
innerLookup
=
new
OptimizedLookup
(
finalData
,
finishedSums
);
}
@Override
public
int
getTotalCounts
()
{
throw
newISE
();
}
}
private
class
OptimizedLookup
implements
Lookup
{
private
static
final
long
serialVersionUID
=
-
8666528642123058396L
;
private
Decission
[]
finalData
;
private
int
[]
finishedSums
;
OptimizedLookup
(
Decission
[]
finalData
,
int
[]
finishedSums
)
{
this
.
finalData
=
finalData
;
this
.
finishedSums
=
finishedSums
;
}
@Override
public
int
getDistinctTokens
()
{
return
finalData
.
length
;
}
@Override
public
void
add
(
Token
t
)
{
throw
new
IllegalStateException
(
"already optimized"
);
}
@Override
public
Decission
forRandom
(
double
random
)
{
if
(
random
<
0
||
random
>=
1
)
{
throw
new
IllegalArgumentException
(
"expected double [0; 1)"
);
}
if
(
totalCounts
==
0
)
{
return
null
;
}
int
id
=
(
int
)
Math
.
floor
(
random
*
totalCounts
);
int
i
=
Arrays
.
binarySearch
(
finishedSums
,
id
);
i
=
Math
.
min
(
i
>=
0
?
i
+
1
:
-
1
*
(
i
+
1
),
finishedSums
.
length
-
1
);
return
finalData
[
i
];
}
@Override
public
Decission
forId
(
int
id
)
{
if
(
id
<
0
||
id
>=
finalData
.
length
||
totalCounts
==
0
)
{
throw
new
IllegalArgumentException
(
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
finishedSums
.
length
));
}
return
finalData
[
id
];
}
@Override
public
Decission
average
()
{
if
(
totalCounts
==
0
)
{
return
null
;
}
return
finalData
[
this
.
finishedSums
.
length
-
1
];
}
@Override
public
Set
<
Token
>
allPossible
()
{
return
Arrays
.
stream
(
finalData
).
map
(
Decission:
:
getToken
).
collect
(
Collectors
.
toSet
());
}
@Override
public
void
finishCollection
()
{
throw
new
IllegalStateException
(
"already finished"
);
}
@Override
public
int
getTotalCounts
()
{
return
totalCounts
;
}
}
@Override
public
int
getDistinctTokens
()
{
return
innerLookup
.
getDistinctTokens
();
}
@Override
public
void
add
(
Token
t
)
{
innerLookup
.
add
(
t
);
}
@Override
public
Decission
forRandom
(
double
random
)
{
return
innerLookup
.
forRandom
(
random
);
}
@Override
public
Decission
forId
(
int
id
)
{
return
innerLookup
.
forId
(
id
);
}
@Override
public
Decission
average
()
{
return
innerLookup
.
average
();
}
@Override
public
Set
<
Token
>
allPossible
()
{
return
innerLookup
.
allPossible
();
}
@Override
public
void
finishCollection
()
{
innerLookup
.
finishCollection
();
}
@Override
public
int
getTotalCounts
()
{
return
innerLookup
.
getTotalCounts
();
}
}
src/main/java/markov/Mail.java
View file @
f6319bee
...
@@ -237,10 +237,8 @@ public class Mail {
...
@@ -237,10 +237,8 @@ public class Mail {
statistics
.
getAverage
(),
statistics
.
getAverage
(),
statistics
.
getMin
(),
statistics
.
getMin
(),
statistics
.
getMax
()));
statistics
.
getMax
()));
System
.
out
.
println
(
"SD: "
+
Math
.
sqrt
(
stats
.
stream
()
System
.
out
.
println
(
"SD: "
+
Math
.
sqrt
(
stats
.
stream
().
mapToDouble
(
e
->
Math
.
pow
(
.
mapToDouble
(
e
->
(
double
)
(
Math
.
pow
(
e
-
statistics
.
getAverage
(),
2
))
e
-
statistics
.
getAverage
(),
2
)))
.
sum
()
/
iterations
));
.
sum
()
/
iterations
));
}
}
}
}
...
...
src/main/java/markov/Prefix.java
View file @
f6319bee
package
markov
;
package
markov
;
import
java.io.Serializable
;
import
java.io.Serializable
;
import
java.util.Arrays
;
import
java.util.Arrays
;
import
java.util.List
;
import
java.util.List
;
...
...
src/main/java/markov/Token.java
View file @
f6319bee
package
markov
;
package
markov
;
import
java.io.Serializable
;
import
java.io.Serializable
;
public
class
Token
implements
Serializable
,
Comparable
<
Token
>
{
public
class
Token
implements
Serializable
,
Comparable
<
Token
>
{
static
final
Token
START
=
new
SpecialToken
(
"START"
);
static
final
Token
START
=
new
SpecialToken
(
"START"
);
static
final
Token
END
=
new
SpecialToken
(
"END"
);
static
final
Token
END
=
new
SpecialToken
(
"END"
);
private
final
int
hashCode
;
private
String
content
;
private
Glyph
.
Type
type
;
public
Token
(
String
content
)
{
public
Token
(
String
content
)
{
this
(
content
,
Glyph
.
Type
.
word
);
this
(
content
,
Glyph
.
Type
.
word
);
}
}
...
@@ -17,18 +20,15 @@ public class Token implements Serializable, Comparable<Token> {
...
@@ -17,18 +20,15 @@ public class Token implements Serializable, Comparable<Token> {
this
.
hashCode
=
generateHashCode
();
this
.
hashCode
=
generateHashCode
();
}
}
private
String
content
;
private
int
generateHashCode
()
{
private
Glyph
.
Type
type
;
int
result
=
content
!=
null
?
content
.
hashCode
()
:
0
;
result
=
31
*
result
+
(
type
!=
null
?
type
.
hashCode
()
:
0
);
return
result
;
}
@Override
@Override
public
String
toString
()
{
public
int
hashCode
()
{
if
(
this
.
equals
(
END
))
{
return
hashCode
;
return
"Token <END>"
;
}
if
(
this
.
equals
(
START
))
{
return
"TOKEN <Start>"
;
}
return
"T=["
+
content
+
"]"
;
}
}
@Override
@Override
...
@@ -42,17 +42,15 @@ public class Token implements Serializable, Comparable<Token> {
...
@@ -42,17 +42,15 @@ public class Token implements Serializable, Comparable<Token> {
type
==
token
.
type
;
type
==
token
.
type
;
}
}
private
final
int
hashCode
;
private
int
generateHashCode
()
{
int
result
=
content
!=
null
?
content
.
hashCode
()
:
0
;
result
=
31
*
result
+
(
type
!=
null
?
type
.
hashCode
()
:
0
);
return
result
;
}
@Override
@Override
public
int
hashCode
()
{
public
String
toString
()
{
return
hashCode
;
if
(
this
.
equals
(
END
))
{
return
"Token <END>"
;
}
if
(
this
.
equals
(
START
))
{
return
"TOKEN <Start>"
;
}
return
"T=["
+
content
+
"]"
;
}
}
public
String
render
(
String
prefix
)
{
public
String
render
(
String
prefix
)
{
...
...
src/main/java/markov/shortener/ShortenerByteHUffmanTrainerImpl.java
View file @
f6319bee
...
@@ -42,7 +42,7 @@ public class ShortenerByteHUffmanTrainerImpl extends ShortenerSimpleImpl {
...
@@ -42,7 +42,7 @@ public class ShortenerByteHUffmanTrainerImpl extends ShortenerSimpleImpl {
if
(
i
<
128
)
{
if
(
i
<
128
)
{
return
Stream
.
of
((
byte
)
(
i
|
0b10000000
));
return
Stream
.
of
((
byte
)
(
i
|
0b10000000
));
}
else
{
}
else
{
Stream
.
Builder
<
Byte
>
builder
=
Stream
.
<
Byte
>
builder
();
Stream
.
Builder
<
Byte
>
builder
=
Stream
.
builder
();
for
(
byte
b
:
Utils
.
toByteArray
(
i
))
{
for
(
byte
b
:
Utils
.
toByteArray
(
i
))
{
builder
.
add
(
b
);
builder
.
add
(
b
);
}
}
...
...
src/main/java/markov/shortener/ShortenerByteHuffmanImpl.java
View file @
f6319bee
...
@@ -28,7 +28,7 @@ public class ShortenerByteHuffmanImpl extends ShortenerSimpleImpl {
...
@@ -28,7 +28,7 @@ public class ShortenerByteHuffmanImpl extends ShortenerSimpleImpl {
if
(
i
<
128
)
{
if
(
i
<
128
)
{
return
Stream
.
of
((
byte
)
(
i
|
0b10000000
));
return
Stream
.
of
((
byte
)
(
i
|
0b10000000
));
}
else
{
}
else
{
Stream
.
Builder
<
Byte
>
builder
=
Stream
.
<
Byte
>
builder
();
Stream
.
Builder
<
Byte
>
builder
=
Stream
.
builder
();
for
(
byte
b
:
Utils
.
toByteArray
(
i
))
{
for
(
byte
b
:
Utils
.
toByteArray
(
i
))
{
builder
.
add
(
b
);
builder
.
add
(
b
);
}
}
...
...
src/test/java/markov/GlyphTest.java
View file @
f6319bee
...
@@ -2,7 +2,8 @@ package markov;
...
@@ -2,7 +2,8 @@ package markov;
import
org.junit.jupiter.api.Test
;
import
org.junit.jupiter.api.Test
;
import
static
org
.
junit
.
jupiter
.
api
.
Assertions
.*;
import
static
org
.
junit
.
jupiter
.
api
.
Assertions
.
assertEquals
;
import
static
org
.
junit
.
jupiter
.
api
.
Assertions
.
assertNotEquals
;
class
GlyphTest
{
class
GlyphTest
{
...
@@ -16,11 +17,6 @@ class GlyphTest {
...
@@ -16,11 +17,6 @@ class GlyphTest {
assertEquals
(
a
,
a2
);
assertEquals
(
a
,
a2
);
assertNotEquals
(
a
,
w
);
assertNotEquals
(
a
,
w
);
assertNotEquals
(
a
,
c
);
assertNotEquals
(
a
,
c
);
// assertNotEquals(Glyph.End, c);
// assertNotEquals(Glyph.Start, c);
// assertNotEquals(Glyph.Start, Glyph.End);
// assertEquals(Glyph.Start, Glyph.Start);
// assertEquals(Glyph.End, Glyph.End);
}
}
}
}
\ No newline at end of file
src/test/java/markov/LookupTests.java
View file @
f6319bee
This diff is collapsed.
Click to expand it.
src/test/java/markov/ParserStreamStyleTest.java
deleted
100644 → 0
View file @
c64692b3
package
markov
;
import
org.junit.jupiter.api.Assertions
;
import
org.junit.jupiter.api.Test
;
import
java.util.Arrays
;
import
java.util.stream.Collectors
;
import
java.util.stream.Stream
;
public
class
ParserStreamStyleTest
{
@Test
void
tests
()
{
// Collector parserStreamStyle = new Collector(1);
// Assertions.assertIterableEquals(
// Arrays.asList(
// new Token(Glyph.Start.getContent(), Glyph.Type.control),
// new Token("ab"),
// new Token(" ", Glyph.Type.whitespace),
// new Token("c"),
// new Token(Glyph.End.getContent(), Glyph.Type.control)),
// parserStreamStyle.combineToTokens(Stream.of(Glyph.Start,
// new Glyph(Glyph.Type.word, "a"),
// new Glyph(Glyph.Type.word, "b"),
// new Glyph(Glyph.Type.whitespace, " "),
// new Glyph(Glyph.Type.word, "c"),
// Glyph.End, new Glyph(Glyph.Type.empty, "EMPTY")))
// .collect(Collectors.toList()));
}
// @Test
// void test2() {
// Collector parserStreamStyle = new Collector(2);
// Data data = parserStreamStyle.learn(Stream.<String>builder().add("a bc a d_e ").build());
// System.out.println(data.dumpStats());
// System.out.println(data.fetch(new Prefix(Arrays.asList(new Token("e")))).allPossible());
// }
}
\ No newline at end of file
src/test/java/markov/ShortenerByteHuffmanImplTest.java
deleted
100644 → 0
View file @
c64692b3
package
markov
;
import
org.junit.jupiter.api.Test
;
import
static
org
.
junit
.
jupiter
.
api
.
Assertions
.*;
class
ShortenerByteHuffmanImplTest
{
@Test
void
random
()
{
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment