Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
markov
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Patrick Friedel
markov
Commits
6613e459
Commit
6613e459
authored
Mar 13, 2018
by
Hut
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
performance stuff
parent
e42c4630
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
151 additions
and
260 deletions
+151
-260
Builder.java
src/main/java/markov/Builder.java
+1
-3
Collector.java
src/main/java/markov/Collector.java
+8
-8
Data.java
src/main/java/markov/Data.java
+3
-0
Decission.java
src/main/java/markov/Decission.java
+7
-15
Lookup.java
src/main/java/markov/Lookup.java
+66
-37
Mail.java
src/main/java/markov/Mail.java
+3
-8
Main.java
src/main/java/markov/Main.java
+0
-4
Parser.java
src/main/java/markov/Parser.java
+1
-1
Prefix.java
src/main/java/markov/Prefix.java
+8
-11
Sentence.java
src/main/java/markov/Sentence.java
+0
-7
ShortenerByte2Impl.java
src/main/java/markov/ShortenerByte2Impl.java
+0
-42
ShortenerByte64Impl.java
src/main/java/markov/ShortenerByte64Impl.java
+0
-39
ShortenerByteImpl.java
src/main/java/markov/ShortenerByteImpl.java
+0
-54
ShortenerSimpleImpl.java
src/main/java/markov/ShortenerSimpleImpl.java
+0
-2
ShortenerStringImpl.java
src/main/java/markov/ShortenerStringImpl.java
+0
-16
LookupTests.java
src/test/java/markov/LookupTests.java
+29
-12
PrefixTests.java
src/test/java/markov/PrefixTests.java
+25
-1
No files found.
src/main/java/markov/Builder.java
View file @
6613e459
...
@@ -31,12 +31,11 @@ public class Builder {
...
@@ -31,12 +31,11 @@ public class Builder {
Function
<
Prefix
,
Decission
>
producer
)
{
Function
<
Prefix
,
Decission
>
producer
)
{
Sentence
sentence
=
new
Sentence
(
data
);
Sentence
sentence
=
new
Sentence
(
data
);
Prefix
p
=
initPrefix
();
Prefix
p
=
initPrefix
();
Decission
d
=
new
Decission
(
Token
.
START
,
0
);
Decission
d
=
producer
.
apply
(
p
);
for
(
int
i
=
0
;
i
<
1000
;
i
++)
{
for
(
int
i
=
0
;
i
<
1000
;
i
++)
{
if
(
Token
.
END
.
equals
(
d
.
getToken
()))
if
(
Token
.
END
.
equals
(
d
.
getToken
()))
break
;
break
;
d
=
producer
.
apply
(
p
);
d
=
producer
.
apply
(
p
);
d
.
setP
(
p
);
sentence
.
add
(
d
);
sentence
.
add
(
d
);
p
=
p
.
slide
(
d
.
getToken
(),
prefix_length
);
p
=
p
.
slide
(
d
.
getToken
(),
prefix_length
);
}
}
...
@@ -53,7 +52,6 @@ public class Builder {
...
@@ -53,7 +52,6 @@ public class Builder {
throw
new
NullPointerException
(
String
.
format
(
"could not find a lookup for %s"
,
prefix
));
throw
new
NullPointerException
(
String
.
format
(
"could not find a lookup for %s"
,
prefix
));
}
}
Decission
decission
=
l
.
forRandom
(
nextRandomNumber
());
Decission
decission
=
l
.
forRandom
(
nextRandomNumber
());
decission
.
setLookup
(
l
);
return
decission
;
return
decission
;
}
}
...
...
src/main/java/markov/Collector.java
View file @
6613e459
package
markov
;
package
markov
;
import
java.util.Collection
;
import
java.util.stream.Stream
;
public
class
Collector
{
public
class
Collector
{
private
final
int
prefixLength
;
private
final
int
prefixLength
;
private
final
Prefix
[]
slider
=
new
Prefix
[]{
getNewPrefix
()}
;
private
Prefix
slider
=
getNewPrefix
()
;
private
final
Data
data
;
private
final
Data
data
;
...
@@ -17,12 +14,12 @@ public class Collector {
...
@@ -17,12 +14,12 @@ public class Collector {
}
}
public
void
learn
(
Token
token
)
{
public
void
learn
(
Token
token
)
{
data
.
add
(
slider
[
0
]
,
token
);
data
.
add
(
slider
,
token
);
slider
[
0
]
=
slider
[
0
]
.
slide
(
token
,
this
.
prefixLength
);
slider
=
slider
.
slide
(
token
,
this
.
prefixLength
);
}
}
public
void
reset
()
{
public
void
reset
()
{
slider
[
0
]
=
getNewPrefix
();
slider
=
getNewPrefix
();
}
}
private
Prefix
getNewPrefix
()
{
private
Prefix
getNewPrefix
()
{
...
@@ -33,7 +30,10 @@ public class Collector {
...
@@ -33,7 +30,10 @@ public class Collector {
return
prefixLength
;
return
prefixLength
;
}
}
public
Data
getData
()
{
public
Data
finishAndGetData
()
{
data
.
finish
();
return
data
;
return
data
;
}
}
}
}
src/main/java/markov/Data.java
View file @
6613e459
...
@@ -70,4 +70,7 @@ public class Data implements Serializable{
...
@@ -70,4 +70,7 @@ public class Data implements Serializable{
}
}
public
void
finish
()
{
data
.
values
().
forEach
(
l
->
l
.
finishCollection
());
}
}
}
src/main/java/markov/Decission.java
View file @
6613e459
...
@@ -3,27 +3,19 @@ package markov;
...
@@ -3,27 +3,19 @@ package markov;
public
class
Decission
{
public
class
Decission
{
private
final
Token
token
;
private
final
Token
token
;
private
final
int
id
;
private
final
int
id
;
private
Prefix
p
;
private
final
Lookup
lookup
;
private
Lookup
lookup
;
public
Decission
(
Token
token
,
int
id
,
Lookup
lookup
)
{
public
Decission
(
Token
key
,
int
id
)
{
this
.
token
=
token
;
token
=
key
;
this
.
id
=
id
;
this
.
id
=
id
;
}
public
void
setLookup
(
Lookup
lookup
)
{
this
.
lookup
=
lookup
;
this
.
lookup
=
lookup
;
}
}
public
void
setP
(
Prefix
p
)
{
// public Decission(Token key, int id) {
// token = key;
// this.id = id;
// }
this
.
p
=
p
;
}
public
Prefix
getP
()
{
return
p
;
}
public
Lookup
getLookup
()
{
public
Lookup
getLookup
()
{
return
lookup
;
return
lookup
;
...
...
src/main/java/markov/Lookup.java
View file @
6613e459
package
markov
;
package
markov
;
import
java.io.Serializable
;
import
java.io.Serializable
;
import
java.util.Arrays
;
import
java.util.Comparator
;
import
java.util.LinkedHashMap
;
import
java.util.LinkedHashMap
;
import
java.util.Map.Entry
;
import
java.util.Map.Entry
;
import
java.util.Set
;
import
java.util.Set
;
...
@@ -9,7 +11,13 @@ import java.util.stream.Collectors;
...
@@ -9,7 +11,13 @@ import java.util.stream.Collectors;
public
class
Lookup
implements
Serializable
{
public
class
Lookup
implements
Serializable
{
private
final
LinkedHashMap
<
Token
,
Integer
>
tokens
=
new
LinkedHashMap
<
Token
,
Integer
>();
private
final
LinkedHashMap
<
Token
,
Integer
>
tokens
=
new
LinkedHashMap
<
Token
,
Integer
>();
private
int
totalCounts
=
0
;
private
Decission
[]
finalData
;
private
long
[]
finishedSums
;
private
boolean
isFinishedCollecting
=
false
;
private
int
totalCounts
=
0
;
int
getTotalCounts
()
{
int
getTotalCounts
()
{
return
totalCounts
;
return
totalCounts
;
}
}
...
@@ -23,67 +31,88 @@ public class Lookup implements Serializable {
...
@@ -23,67 +31,88 @@ public class Lookup implements Serializable {
}
}
public
void
add
(
Token
t
)
{
public
void
add
(
Token
t
)
{
if
(
isFinishedCollecting
)
{
throw
new
IllegalStateException
(
"already finished"
);
}
Integer
i
=
tokens
.
getOrDefault
(
t
,
0
);
Integer
i
=
tokens
.
getOrDefault
(
t
,
0
);
tokens
.
put
(
t
,
i
+
1
);
tokens
.
put
(
t
,
i
+
1
);
totalCounts
++;
totalCounts
++;
}
}
public
Decission
forRandom
(
double
random
)
{
public
Decission
forRandom
(
double
random
)
{
if
(!
isFinishedCollecting
)
{
throw
new
IllegalStateException
(
"not jet finished"
);
}
if
(
random
<
0
||
random
>=
1
)
{
if
(
random
<
0
||
random
>=
1
)
{
throw
new
IllegalArgumentException
(
"expected double [0; 1)"
);
throw
new
IllegalArgumentException
(
"expected double [0; 1)"
);
}
}
int
id
=
(
int
)
Math
.
floor
(
random
*
getTotalCounts
());
int
id
=
(
int
)
Math
.
floor
(
random
*
totalCounts
);
int
i
=
0
;
int
i
=
Arrays
.
binarySearch
(
finishedSums
,
id
);
for
(
Entry
<
Token
,
Integer
>
entry
:
tokens
.
entrySet
())
{
i
=
Math
.
min
(
i
>=
0
?
i
+
1
:
-
1
*
(
i
+
1
),
finishedSums
.
length
-
1
);
i
+=
entry
.
getValue
();
return
finalData
[
i
];
if
(
id
<
i
)
{
return
new
Decission
(
entry
.
getKey
(),
id
);
}
}
throw
new
IllegalStateException
(
String
.
format
(
"failed to find a random token with seed %d within %s"
,
id
,
this
));
}
}
public
Decission
forId
(
int
id
)
{
public
Decission
forId
(
int
id
)
{
if
(
id
<
0
||
id
>
getTotalCounts
()
)
{
if
(
!
isFinishedCollecting
)
{
throw
new
Illegal
ArgumentException
(
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
getTotalCounts
())
);
throw
new
Illegal
StateException
(
"not jet finished"
);
}
}
int
i
=
0
;
if
(
id
<
0
||
id
>
finishedSums
.
length
)
{
for
(
Entry
<
Token
,
Integer
>
entry
:
tokens
.
entrySet
())
{
throw
new
IllegalArgumentException
(
i
+=
entry
.
getValue
();
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
finishedSums
.
length
));
if
(
id
<
i
)
{
return
new
Decission
(
entry
.
getKey
(),
id
);
}
}
}
return
finalData
[
id
];
throw
new
IllegalStateException
(
String
.
format
(
"failed to find a random token with seed %d within %s"
,
id
,
this
));
}
}
public
Decission
average
()
{
public
Decission
average
()
{
return
tokens
if
(!
isFinishedCollecting
)
{
.
entrySet
().
stream
().
sorted
((
e1
,
e2
)
->
Integer
throw
new
IllegalStateException
(
"not jet finished"
);
.
compare
(
e2
.
getValue
(),
e1
.
getValue
()))
}
.
findFirst
().
map
(
e
->
new
Decission
(
e
.
getKey
(),
e
.
getValue
())).
get
()
;
return
finalData
[
this
.
finishedSums
.
length
-
1
]
;
}
}
public
Set
<
Token
>
allPossible
()
{
public
Set
<
Token
>
allPossible
()
{
return
this
.
tokens
.
keySet
();
return
this
.
tokens
.
keySet
();
}
}
void
finishCollection
()
{
if
(!
isFinishedCollecting
)
{
int
size
=
tokens
.
size
();
finishedSums
=
new
long
[
size
];
finalData
=
new
Decission
[
size
];
final
int
[]
i
=
new
int
[
1
];
i
[
0
]
=
0
;
final
long
[]
sum
=
new
long
[
1
];
sum
[
0
]
=
0
;
tokens
.
entrySet
()
.
stream
()
.
sequential
()
.
sorted
(
Comparator
.
comparingInt
(
Entry:
:
getValue
))
.
forEach
(
e
->
{
sum
[
0
]
+=
e
.
getValue
();
finishedSums
[
i
[
0
]]
=
sum
[
0
];
finalData
[
i
[
0
]]
=
new
Decission
(
e
.
getKey
(),
i
[
0
],
this
);
i
[
0
]++;
});
this
.
tokens
.
clear
();
isFinishedCollecting
=
true
;
}
}
@Override
@Override
public
String
toString
()
{
public
String
toString
()
{
return
"Lookup [tokens= "
return
"Lookup [tokens= "
+
tokens
.
entrySet
().
stream
().
sorted
(
+
tokens
.
entrySet
().
stream
()
(
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
(),
e1
.
getValue
())).
map
(
.
sorted
((
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
(),
e
->
String
.
format
(
"%d*%s"
,
e
.
getValue
(),
e
.
getKey
())).
collect
(
e1
.
getValue
()))
Collectors
.
joining
(
", "
))
+
"]"
;
.
map
(
e
->
String
.
format
(
"%d*%s"
,
e
.
getValue
(),
}
e
.
getKey
()))
.
collect
(
Collectors
.
joining
(
", "
))
/**
+
"]"
;
* Not thread save!
*/
void
resetFinishding
()
{
this
.
isFinishedCollecting
=
false
;
this
.
finishedSums
=
null
;
this
.
finalData
=
null
;
}
}
}
}
src/main/java/markov/Mail.java
View file @
6613e459
...
@@ -32,11 +32,11 @@ public class Mail {
...
@@ -32,11 +32,11 @@ public class Mail {
"huffmanbytes.file"
,
"classpath:huffmanBytes"
);
"huffmanbytes.file"
,
"classpath:huffmanBytes"
);
private
static
final
int
iterations
=
1_000_000
;
private
static
final
int
iterations
=
1_000_000
;
// private static final int iterations =
50
0_000;
// private static final int iterations =
1
0_000;
public
static
void
main
(
String
[]
args
)
public
static
void
main
(
String
[]
args
)
throws
IOException
,
ClassNotFoundException
,
NoSuchFieldException
,
IllegalAccessException
{
throws
IOException
,
ClassNotFoundException
,
NoSuchFieldException
,
IllegalAccessException
{
//
System.in.read();
System
.
in
.
read
();
new
Mail
().
timeCreation
();
new
Mail
().
timeCreation
();
}
}
...
@@ -55,8 +55,7 @@ public class Mail {
...
@@ -55,8 +55,7 @@ public class Mail {
Stopwatch
s
=
Stopwatch
.
createStarted
();
Stopwatch
s
=
Stopwatch
.
createStarted
();
for
(
int
i
=
0
;
i
<
iterations
;
i
++)
{
for
(
int
i
=
0
;
i
<
iterations
;
i
++)
{
Utils
.
maybePrintPercentages
(
i
,
iterations
);
Utils
.
maybePrintPercentages
(
i
,
iterations
);
IntStream
.
range
(
1
,
maxPrefix
+
1
).
parallel
().
forEach
(
IntStream
.
range
(
1
,
maxPrefix
+
1
).
parallel
().
forEach
(
j
->
builder
[
j
].
random
());
j
->
builder
[
j
].
random
());
}
}
s
.
stop
();
s
.
stop
();
System
.
out
.
println
(
"took: "
+
s
.
toString
());
System
.
out
.
println
(
"took: "
+
s
.
toString
());
...
@@ -204,10 +203,6 @@ public class Mail {
...
@@ -204,10 +203,6 @@ public class Mail {
Builder
b
=
new
Builder
(
prefixLength
,
dataMap
.
get
(
prefixLength
));
Builder
b
=
new
Builder
(
prefixLength
,
dataMap
.
get
(
prefixLength
));
List
<
ShortenerStats
>
shorter
=
Arrays
.
asList
(
List
<
ShortenerStats
>
shorter
=
Arrays
.
asList
(
new
ShortenerStats
(
new
ShortenerSimpleImpl
(
dataMap
.
get
(
prefixLength
))),
new
ShortenerStats
(
new
ShortenerSimpleImpl
(
dataMap
.
get
(
prefixLength
))),
new
ShortenerStats
(
new
ShortenerByteImpl
(
dataMap
.
get
(
prefixLength
))),
new
ShortenerStats
(
new
ShortenerStringImpl
(
dataMap
.
get
(
prefixLength
))),
new
ShortenerStats
(
new
ShortenerByte2Impl
(
dataMap
.
get
(
prefixLength
))),
new
ShortenerStats
(
new
ShortenerByte64Impl
(
dataMap
.
get
(
prefixLength
))),
new
ShortenerStats
(
new
ShortenerIntHuffmanImpl
(
dataMap
.
get
(
prefixLength
),
intCode
)),
new
ShortenerStats
(
new
ShortenerIntHuffmanImpl
(
dataMap
.
get
(
prefixLength
),
intCode
)),
new
ShortenerStats
(
new
ShortenerStats
(
new
ShortenerByteHuffmanImpl
(
dataMap
.
get
(
prefixLength
),
byteCode
)));
new
ShortenerByteHuffmanImpl
(
dataMap
.
get
(
prefixLength
),
byteCode
)));
...
...
src/main/java/markov/Main.java
View file @
6613e459
...
@@ -19,10 +19,6 @@ public class Main {
...
@@ -19,10 +19,6 @@ public class Main {
for
(
int
i
=
0
;
i
<
1
;
i
++)
{
for
(
int
i
=
0
;
i
<
1
;
i
++)
{
System
.
out
.
println
(
r2
.
render
(
sentence
));
System
.
out
.
println
(
r2
.
render
(
sentence
));
}
}
ShortenerByteImpl
shortener
=
new
ShortenerByteImpl
(
data
);
String
id
=
shortener
.
getId
(
sentence
);
System
.
out
.
println
(
id
);
System
.
out
.
println
(
r
.
render
(
shortener
.
getSentence
(
id
)));
// Collection<Map.Entry<Prefix, Decission>> a = b.average();
// Collection<Map.Entry<Prefix, Decission>> a = b.average();
...
...
src/main/java/markov/Parser.java
View file @
6613e459
...
@@ -25,6 +25,6 @@ public class Parser {
...
@@ -25,6 +25,6 @@ public class Parser {
c
.
reset
();
c
.
reset
();
}
}
});
});
return
collectors
.
stream
().
collect
(
Collectors
.
toMap
(
c
->
c
.
getPrefixLength
(),
c
->
c
.
g
etData
()));
return
collectors
.
stream
().
collect
(
Collectors
.
toMap
(
c
->
c
.
getPrefixLength
(),
c
->
c
.
finishAndG
etData
()));
}
}
}
}
src/main/java/markov/Prefix.java
View file @
6613e459
package
markov
;
package
markov
;
import
java.io.Serializable
;
import
java.io.Serializable
;
import
java.util.ArrayList
;
import
java.util.Arrays
;
import
java.util.Arrays
;
import
java.util.List
;
import
java.util.List
;
...
@@ -23,19 +22,17 @@ public class Prefix implements Serializable, Comparable<Prefix> {
...
@@ -23,19 +22,17 @@ public class Prefix implements Serializable, Comparable<Prefix> {
}
}
public
Prefix
slide
(
Token
newToken
,
int
maxLength
)
{
public
Prefix
slide
(
Token
newToken
,
int
maxLength
)
{
List
<
Token
>
newTokens
=
new
ArrayList
<
Token
>(
Arrays
.
asList
(
this
.
tokens
.
clone
())
);
int
newLength
=
Math
.
min
(
tokens
.
length
+
1
,
maxLength
);
newTokens
.
add
(
newToken
)
;
Token
[]
newTokens
=
new
Token
[
newLength
]
;
while
(
newTokens
.
size
()
>
maxLength
)
{
System
.
arraycopy
(
tokens
,
Math
.
max
(
0
,
tokens
.
length
-
newLength
+
1
),
newTokens
,
0
,
newTokens
.
remove
(
0
);
newLength
-
1
);
}
newTokens
[
newLength
-
1
]
=
newToken
;
return
new
Prefix
(
newTokens
);
return
new
Prefix
(
newTokens
);
}
}
@Override
@Override
public
String
toString
()
{
public
String
toString
()
{
return
"Prefix{"
+
return
"Prefix{"
+
"tokens="
+
Arrays
.
toString
(
tokens
)
+
'}'
;
"tokens="
+
Arrays
.
toString
(
tokens
)
+
'}'
;
}
}
...
@@ -62,12 +59,12 @@ public class Prefix implements Serializable, Comparable<Prefix> {
...
@@ -62,12 +59,12 @@ public class Prefix implements Serializable, Comparable<Prefix> {
@Override
@Override
public
int
compareTo
(
Prefix
o
)
{
public
int
compareTo
(
Prefix
o
)
{
if
(
this
.
tokens
.
length
!=
o
.
tokens
.
length
)
{
if
(
this
.
tokens
.
length
!=
o
.
tokens
.
length
)
{
return
Integer
.
compare
(
o
.
tokens
.
length
,
this
.
tokens
.
length
);
return
Integer
.
compare
(
o
.
tokens
.
length
,
this
.
tokens
.
length
);
}
}
for
(
int
i
=
0
;
i
<
this
.
tokens
.
length
;
i
++)
{
for
(
int
i
=
0
;
i
<
this
.
tokens
.
length
;
i
++)
{
int
c
=
this
.
tokens
[
i
].
compareTo
(
o
.
tokens
[
i
]);
int
c
=
this
.
tokens
[
i
].
compareTo
(
o
.
tokens
[
i
]);
if
(
c
!=
0
)
{
if
(
c
!=
0
)
{
return
c
;
return
c
;
}
}
}
}
...
...
src/main/java/markov/Sentence.java
View file @
6613e459
...
@@ -33,13 +33,6 @@ public class Sentence implements Iterable<Decission>, Serializable {
...
@@ -33,13 +33,6 @@ public class Sentence implements Iterable<Decission>, Serializable {
.
reduce
((
d1
,
d2
)
->
d1
*
d2
).
getAsDouble
();
.
reduce
((
d1
,
d2
)
->
d1
*
d2
).
getAsDouble
();
}
}
public
String
render
()
{
return
new
Renderer
().
render
(
this
);
}
public
String
id
()
{
return
new
ShortenerByteImpl
(
data
).
getId
(
this
);
}
@Override
@Override
public
Iterator
<
Decission
>
iterator
()
{
public
Iterator
<
Decission
>
iterator
()
{
return
decissions
.
iterator
();
return
decissions
.
iterator
();
...
...
src/main/java/markov/ShortenerByte2Impl.java
deleted
100644 → 0
View file @
e42c4630
package
markov
;
import
markov.stuff.Utils
;
import
java.nio.ByteBuffer
;
import
java.util.Base64
;
import
java.util.List
;
import
java.util.stream.Collectors
;
import
java.util.stream.Stream
;
public
class
ShortenerByte2Impl
extends
ShortenerSimpleImpl
{
public
ShortenerByte2Impl
(
Data
data
)
{
super
(
data
);
}
String
fromInts
(
Stream
<
Integer
>
ints
)
{
List
<
Byte
>
bl
=
ints
.
flatMap
(
i
->
{
Stream
.
Builder
<
Byte
>
builder
=
Stream
.<
Byte
>
builder
();
if
(
i
<
0b00111111_11111111
)
{
byte
[]
bytes
=
Utils
.
toByteArray
(
i
);
builder
.
add
((
byte
)
(
bytes
[
2
]
|
0b01000000
));
builder
.
add
(
bytes
[
3
]);
}
else
{
for
(
byte
b
:
Utils
.
toByteArray
(
i
))
{
builder
.
add
(
b
);
}
}
return
builder
.
build
();
}).
collect
(
Collectors
.
toList
());
ByteBuffer
bb
=
ByteBuffer
.
allocate
(
bl
.
size
());
for
(
Byte
b
:
bl
)
{
bb
.
put
(
b
);
}
bb
.
rewind
();
return
Base64
.
getEncoder
().
encodeToString
(
bb
.
array
());
}
}
src/main/java/markov/ShortenerByte64Impl.java
deleted
100644 → 0
View file @
e42c4630
package
markov
;
import
markov.stuff.Utils
;
import
java.nio.ByteBuffer
;
import
java.util.Base64
;
import
java.util.List
;
import
java.util.stream.Collectors
;
import
java.util.stream.Stream
;
public
class
ShortenerByte64Impl
extends
ShortenerSimpleImpl
{
public
ShortenerByte64Impl
(
Data
data
)
{
super
(
data
);
}
String
fromInts
(
Stream
<
Integer
>
ints
)
{
List
<
Byte
>
bl
=
ints
.
flatMap
(
i
->
{
if
(
i
<
64
)
{
return
Stream
.
of
((
byte
)
(
i
|
0b01000000
));
}
else
{
Stream
.
Builder
<
Byte
>
builder
=
Stream
.<
Byte
>
builder
();
for
(
byte
b
:
Utils
.
toByteArray
(
i
))
{
builder
.
add
(
b
);
}
return
builder
.
build
();
}
}).
collect
(
Collectors
.
toList
());
ByteBuffer
bb
=
ByteBuffer
.
allocate
(
bl
.
size
());
for
(
Byte
b
:
bl
)
{
bb
.
put
(
b
);
}
bb
.
rewind
();
return
Base64
.
getEncoder
().
encodeToString
(
bb
.
array
());
}
}
src/main/java/markov/ShortenerByteImpl.java
deleted
100644 → 0
View file @
e42c4630
package
markov
;
import
markov.stuff.Utils
;
import
java.nio.ByteBuffer
;
import
java.util.Base64
;
import
java.util.List
;
import
java.util.stream.Collectors
;
import
java.util.stream.Stream
;
public
class
ShortenerByteImpl
extends
ShortenerSimpleImpl
{
public
ShortenerByteImpl
(
Data
data
)
{
super
(
data
);
}
String
fromInts
(
Stream
<
Integer
>
ints
)
{
List
<
Byte
>
bl
=
ints
.
flatMap
(
i
->
{
if
(
i
<
128
)
{
return
Stream
.
of
((
byte
)
(
i
|
0b10000000
));
}
else
{
Stream
.
Builder
<
Byte
>
builder
=
Stream
.<
Byte
>
builder
();
for
(
byte
b
:
Utils
.
toByteArray
(
i
))
{
builder
.
add
(
b
);
}
return
builder
.
build
();
}
}).
collect
(
Collectors
.
toList
());
ByteBuffer
bb
=
ByteBuffer
.
allocate
(
bl
.
size
());
for
(
Byte
b
:
bl
)
{
bb
.
put
(
b
);
}
bb
.
rewind
();
return
Base64
.
getUrlEncoder
().
encodeToString
(
bb
.
array
());
}
@Override
Stream
<
Integer
>
toInts
(
String
hash
)
{
byte
[]
hashBytes
=
Base64
.
getUrlDecoder
().
decode
(
hash
);
Stream
.
Builder
<
Integer
>
builder
=
Stream
.
builder
();
for
(
int
i
=
0
;
i
<
hashBytes
.
length
;
)
{
if
(
hashBytes
[
i
]
<
128
)
{
builder
.
add
((
int
)
hashBytes
[
i
++]);
}
else
{
byte
[]
newBytes
=
new
byte
[
4
];
System
.
arraycopy
(
hashBytes
,
i
,
newBytes
,
0
,
4
);
builder
.
add
(
Utils
.
fromByteArray
(
newBytes
));
i
+=
4
;
}
}
return
builder
.
build
();
}
}
src/main/java/markov/ShortenerSimpleImpl.java
View file @
6613e459
...
@@ -50,8 +50,6 @@ public class ShortenerSimpleImpl implements Shortener {
...
@@ -50,8 +50,6 @@ public class ShortenerSimpleImpl implements Shortener {
while
(
i
<
ids
.
length
)
{
while
(
i
<
ids
.
length
)
{
Lookup
lookup
=
getData
().
fetch
(
prefix
);
Lookup
lookup
=
getData
().
fetch
(
prefix
);
Decission
d
=
lookup
.
getDistinctTokens
()
==
1
?
lookup
.
average
()
:
lookup
.
forId
(
ids
[
i
++]);
Decission
d
=
lookup
.
getDistinctTokens
()
==
1
?
lookup
.
average
()
:
lookup
.
forId
(
ids
[
i
++]);
d
.
setP
(
prefix
);
d
.
setLookup
(
lookup
);
sentence
.
add
(
d
);
sentence
.
add
(
d
);
prefix
=
prefix
.
slide
(
d
.
getToken
(),
getData
().
getPrefixLength
());
prefix
=
prefix
.
slide
(
d
.
getToken
(),
getData
().
getPrefixLength
());
}
}
...
...
src/main/java/markov/ShortenerStringImpl.java
deleted
100644 → 0
View file @
e42c4630
package
markov
;
import
java.util.Base64
;
import
java.util.stream.Collectors
;
import
java.util.stream.Stream
;
public
class
ShortenerStringImpl
extends
ShortenerSimpleImpl
{
public
ShortenerStringImpl
(
Data
data
)
{
super
(
data
);
}
String
fromInts
(
Stream
<
Integer
>
ints
)
{
String
cps
=
ints
.
map
(
i
->
new
String
(
Character
.
toChars
(
i
))).
collect
(
Collectors
.
joining
());
return
Base64
.
getEncoder
().
encodeToString
(
cps
.
getBytes
());
}
}
src/test/java/markov/LookupTests.java
View file @
6613e459
...
@@ -33,52 +33,69 @@ public class LookupTests {
...
@@ -33,52 +33,69 @@ public class LookupTests {
public
void
forRandom
()
{
public
void
forRandom
()
{
assertEquals
(
lookup
.
forRandom
(
0
).
getToken
(),
new
Token
(
"a"
));
assertEquals
(
lookup
.
forRandom
(
0
).
getToken
(),
new
Token
(
"a"
));
assertEquals
(
lookup
.
forRandom
(
0.5d
).
getToken
(),
new
Token
(
"a"
));
assertEquals
(
lookup
.
forRandom
(
0.5d
).
getToken
(),
new
Token
(
"a"
));
assertEquals
(
lookup
.
forRandom
(
0.999d
).
getToken
(),
new
Token
(
"a"
));
// assertEquals(lookup.forRandom(1), new Token("a"));
// assertEquals(lookup.forRandom(1), new Token("a"));
assertThrows
(
IllegalArgumentException
.
class
,
()
->
lookup
.
forRandom
(
1
));
assertThrows
(
IllegalArgumentException
.
class
,
()
->
lookup
.
forRandom
(
1
));
assertThrows
(
IllegalArgumentException
.
class
,
()
->
lookup
.
forRandom
(-
1
));
assertThrows
(
IllegalArgumentException
.
class
,
()
->
lookup
.
forRandom
(-
1
));
assertThrows
(
IllegalArgumentException
.
class
,
()
->
lookup
.
forRandom
(
1.001d
));
assertThrows
(
IllegalArgumentException
.
class
,
()
->
lookup
.
forRandom
(
1.001d
));
assertThrows
(
IllegalArgumentException
.
class
,
()
->
lookup
.
forRandom
(
2
));
assertThrows
(
IllegalArgumentException
.
class
,
()
->
lookup
.
forRandom
(
2
));
lookup
.
resetFinishding
();
lookup
.
add
(
new
Token
(
"b"
));
lookup
.
add
(
new
Token
(
"b"
));
Decission
a
=
lookup
.
forRandom
(
0
);
Decission
a
=
lookup
.
forRandom
(
0
);
Decission
b
=
lookup
.
forRandom
(
0.9d
);
Decission
b
=
lookup
.
forRandom
(
0.9d
);
assertNotEquals
(
a
,
b
);
assertNotEquals
(
a
,
b
);
lookup
.
resetFinishding
();
lookup
.
add
(
new
Token
(
"c"
));
lookup
.
add
(
new
Token
(
"c"
));
a
=
lookup
.
forRandom
(
0
);
a
=
lookup
.
forRandom
(
0
);
b
=
lookup
.
forRandom
(
0.51d
);
b
=
lookup
.
forRandom
(
0.51d
);
Decission
c
=
lookup
.
forRandom
(
0.9d
);
Decission
c
=
lookup
.
forRandom
(
0.9d
);
assertNotEquals
(
a
,
b
);
assertEquals
(
new
Token
(
"a"
),
a
.
getToken
());
assertNotEquals
(
a
,
c
);
assertEquals
(
new
Token
(
"b"
),
b
.
getToken
());
assertNotEquals
(
b
,
c
);
assertEquals
(
new
Token
(
"c"
),
c
.
getToken
());
lookup
.
resetFinishding
();
lookup
.
add
(
new
Token
(
"c"
));
lookup
.
add
(
new
Token
(
"c"
));
assertEquals
(
new
Token
(
"a"
),
lookup
.
forRandom
(
0
).
getToken
());
assertEquals
(
new
Token
(
"a"
),
lookup
.
forRandom
(
0.19d
).
getToken
());
assertEquals
(
new
Token
(
"b"
),
lookup
.
forRandom
(
0.2d
).
getToken
());
assertEquals
(
new
Token
(
"b"
),
lookup
.
forRandom
(
0.39d
).
getToken
());
assertEquals
(
new
Token
(
"c"
),
lookup
.
forRandom
(
0.4d
).
getToken
());
assertEquals
(
new
Token
(
"c"
),
lookup
.
forRandom
(
0.99d
).
getToken
());
}
}
@Test
@Test
public
void
average
()
{
public
void
average
()
{
assertEquals
(
lookup
.
average
().
getToken
(),
new
Token
(
"a"
));
assertEquals
(
new
Token
(
"a"
),
lookup
.
average
().
getToken
());
lookup
.
resetFinishding
();
lookup
.
add
(
new
Token
(
"b"
));
lookup
.
add
(
new
Token
(
"b"
));
Token
token
=
lookup
.
average
().
getToken
();
Token
token
=
lookup
.
average
().
getToken
();
assertTrue
(()
->
token
.
equals
(
new
Token
(
"a"
))
||
token
.
equals
(
new
Token
(
"b"
)));
assertTrue
(()
->
token
.
equals
(
new
Token
(
"a"
))
||
token
.
equals
(
new
Token
(
"b"
)));
lookup
.
resetFinishding
();
lookup
.
add
(
new
Token
(
"a"
));
lookup
.
add
(
new
Token
(
"a"
));
assertEquals
(
lookup
.
average
().
getToken
(),
new
Token
(
"a"
));
assertEquals
(
new
Token
(
"a"
),
lookup
.
average
().
getToken
());
lookup
.
resetFinishding
();
lookup
.
add
(
new
Token
(
"b"
));
lookup
.
add
(
new
Token
(
"b"
));
lookup
.
add
(
new
Token
(
"b"
));
lookup
.
add
(
new
Token
(
"b"
));
assertEquals
(
lookup
.
average
().
getToken
(),
new
Token
(
"b"
));
assertEquals
(
new
Token
(
"b"
),
lookup
.
average
().
getToken
(
));
}
}
@Test
@Test
public
void
allPossible
()
{
public
void
allPossible
()
{
assertIterableEquals
(
lookup
.
allPossible
(),
assertIterableEquals
(
lookup
.
allPossible
(),
Arrays
.
asList
(
new
Token
(
"a"
)));
Arrays
.
asList
(
new
Token
(
"a"
)));
lookup
.
add
(
new
Token
(
"b"
));
lookup
.
add
(
new
Token
(
"b"
));
assertIterableEquals
(
lookup
.
allPossible
(),
assertIterableEquals
(
lookup
.
allPossible
(),
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
)));
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
)));
lookup
.
add
(
new
Token
(
"c"
));
lookup
.
add
(
new
Token
(
"c"
));
assertIterableEquals
(
lookup
.
allPossible
(),
assertIterableEquals
(
lookup
.
allPossible
(),
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
),
new
Token
(
"c"
)));
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
),
new
Token
(
"c"
)));
lookup
.
add
(
new
Token
(
"a"
));
lookup
.
add
(
new
Token
(
"a"
));
assertIterableEquals
(
lookup
.
allPossible
(),
assertIterableEquals
(
lookup
.
allPossible
(),
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
),
new
Token
(
"c"
)));
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
),
new
Token
(
"c"
)));
}
}
...
...
src/test/java/markov/PrefixTests.java
View file @
6613e459
...
@@ -32,9 +32,32 @@ public class PrefixTests {
...
@@ -32,9 +32,32 @@ public class PrefixTests {
@Test
@Test
public
void
testFancySlide
()
{
public
void
testFancySlide
()
{
prefix
=
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
)));
prefix
=
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
)));
Prefix
p2
=
prefix
.
slide
(
new
Token
(
"b"
),
2
);
Prefix
p2
=
prefix
.
slide
(
new
Token
(
"b"
),
2
);
assertEquals
(
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
))),
p2
);
assertEquals
(
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
))),
p2
);
}
}
@Test
void
testExplicitGrow
()
{
prefix
=
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
)));
Prefix
p2
=
prefix
.
slide
(
new
Token
(
"c"
),
3
);
assertEquals
(
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
),
new
Token
(
"c"
))
),
p2
);
}
@Test
void
testToLargeGrow
()
{
prefix
=
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
)));
Prefix
p2
=
prefix
.
slide
(
new
Token
(
"c"
),
4
);
assertEquals
(
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
),
new
Token
(
"c"
))
),
p2
);
}
@Test
public
void
testShrink
()
{
prefix
=
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"a"
),
new
Token
(
"b"
),
new
Token
(
"c"
)));
Prefix
p2
=
prefix
.
slide
(
new
Token
(
"d"
),
2
);
assertEquals
(
new
Prefix
(
Arrays
.
asList
(
new
Token
(
"c"
),
new
Token
(
"d"
))),
p2
);
}
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment