Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
markov
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Patrick Friedel
markov
Commits
218bdd37
Commit
218bdd37
authored
Mar 25, 2018
by
Hut
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
some cleanup
parent
0d976c4d
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
489 additions
and
486 deletions
+489
-486
pom.xml
pom.xml
+11
-1
Data.java
src/main/java/markov/Data.java
+0
-1
Lookup.java
src/main/java/markov/Lookup.java
+17
-17
Mail.java
src/main/java/markov/Mail.java
+102
-78
ShortenerByteHuffmanImpl.java
src/main/java/markov/ShortenerByteHuffmanImpl.java
+3
-5
ShortenerIntHuffmanImpl.java
src/main/java/markov/ShortenerIntHuffmanImpl.java
+3
-5
Tokenizer.java
src/main/java/markov/Tokenizer.java
+35
-31
BitConverter.java
src/main/java/markov/huffman/BitConverter.java
+63
-0
ByteHuffmanCodeBuilder.java
src/main/java/markov/huffman/ByteHuffmanCodeBuilder.java
+0
-26
FrequenceType.java
src/main/java/markov/huffman/FrequenceType.java
+0
-6
HuffmanCode.java
src/main/java/markov/huffman/HuffmanCode.java
+0
-52
HuffmanCodeBuilder.java
src/main/java/markov/huffman/HuffmanCodeBuilder.java
+0
-90
HuffmanLeaf.java
src/main/java/markov/huffman/HuffmanLeaf.java
+0
-15
HuffmanNode.java
src/main/java/markov/huffman/HuffmanNode.java
+0
-23
HuffmanTree.java
src/main/java/markov/huffman/HuffmanTree.java
+0
-29
SimpleCountMap.java
src/main/java/markov/stuff/SimpleCountMap.java
+43
-35
BitConverterTest.java
src/test/java/markov/stuff/BitConverterTest.java
+212
-72
No files found.
pom.xml
View file @
218bdd37
...
...
@@ -17,7 +17,11 @@
</properties>
<dependencies>
<dependency>
<groupId>
directory.passive
</groupId>
<artifactId>
huffman
</artifactId>
<version>
1.0
</version>
</dependency>
<dependency>
<groupId>
com.tomgibara.bits
</groupId>
<artifactId>
bits
</artifactId>
...
...
@@ -35,6 +39,12 @@
<version>
${junit.jupiter.version}
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.junit.jupiter
</groupId>
<artifactId>
junit-jupiter-params
</artifactId>
<version>
${junit.jupiter.version}
</version>
<scope>
test
</scope>
</dependency>
<!-- To avoid compiler warnings about @API annotations in JUnit code -->
<dependency>
<groupId>
org.apiguardian
</groupId>
...
...
src/main/java/markov/Data.java
View file @
218bdd37
...
...
@@ -23,7 +23,6 @@ public class Data implements Serializable{
public
void
add
(
Prefix
p
,
Token
t
)
{
Lookup
l
=
data
.
getOrDefault
(
p
,
new
Lookup
());
// System.out.println(String.format("adding %s - %s", p, t, l));
l
.
add
(
t
);
data
.
put
(
p
,
l
);
}
...
...
src/main/java/markov/Lookup.java
View file @
218bdd37
...
...
@@ -4,6 +4,7 @@ import java.io.Serializable;
import
java.util.Arrays
;
import
java.util.Comparator
;
import
java.util.LinkedHashMap
;
import
java.util.List
;
import
java.util.Map.Entry
;
import
java.util.Set
;
import
java.util.stream.Collectors
;
...
...
@@ -57,8 +58,9 @@ public class Lookup implements Serializable {
throw
new
IllegalStateException
(
"not jet finished"
);
}
if
(
id
<
0
||
id
>
finishedSums
.
length
)
{
throw
new
IllegalArgumentException
(
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
finishedSums
.
length
));
throw
new
IllegalArgumentException
(
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
finishedSums
.
length
));
}
return
finalData
[
id
];
}
...
...
@@ -79,20 +81,18 @@ public class Lookup implements Serializable {
int
size
=
tokens
.
size
();
finishedSums
=
new
long
[
size
];
finalData
=
new
Decission
[
size
];
final
int
[]
i
=
new
int
[
1
];
i
[
0
]
=
0
;
final
long
[]
sum
=
new
long
[
1
];
sum
[
0
]
=
0
;
tokens
.
entrySet
()
List
<
Entry
<
Token
,
Integer
>>
orderedEntries
=
tokens
.
entrySet
()
.
stream
()
.
sequential
()
.
sorted
(
Comparator
.
comparingInt
(
Entry:
:
getValue
))
.
forEach
(
e
->
{
sum
[
0
]
+=
e
.
getValue
();
finishedSums
[
i
[
0
]]
=
sum
[
0
];
finalData
[
i
[
0
]]
=
new
Decission
(
e
.
getKey
(),
i
[
0
],
this
);
i
[
0
]++;
});
.
collect
(
Collectors
.
toList
());
int
sum
=
0
;
for
(
int
i
=
0
;
i
<
orderedEntries
.
size
();
i
++)
{
Entry
<
Token
,
Integer
>
entry
=
orderedEntries
.
get
(
i
);
sum
+=
entry
.
getValue
();
finishedSums
[
i
]
=
sum
;
finalData
[
i
]
=
new
Decission
(
entry
.
getKey
(),
i
,
this
);
}
this
.
tokens
.
clear
();
isFinishedCollecting
=
true
;
}
...
...
@@ -100,10 +100,10 @@ public class Lookup implements Serializable {
@Override
public
String
toString
()
{
return
"Lookup [tokens= "
+
tokens
.
entrySet
().
stream
().
sorted
(
(
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
(),
e1
.
getValue
())).
map
(
e
->
String
.
format
(
"%d*%s"
,
e
.
getValue
(),
e
.
getKey
())).
collect
(
Collectors
.
joining
(
", "
))
+
"]"
;
return
"Lookup [tokens= "
+
tokens
.
entrySet
().
stream
().
sorted
(
(
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
(),
e1
.
getValue
())).
map
(
e
->
String
.
format
(
"%d*%s"
,
e
.
getValue
(),
e
.
getKey
())).
collect
(
Collectors
.
joining
(
", "
))
+
"]"
;
}
/**
...
...
src/main/java/markov/Mail.java
View file @
218bdd37
package
markov
;
import
com.google.common.base.Stopwatch
;
import
markov
.huffman.ByteHuffmanCodeBuilder
;
import
markov
.huffman.HuffmanCode
;
import
directory.passive
.huffman.ByteHuffmanCodeBuilder
;
import
directory.passive
.huffman.HuffmanCode
;
import
markov.huffman.HuffmanStore
;
import
markov.stuff.CountMap
;
import
markov.stuff.Inspector
;
...
...
@@ -26,10 +26,12 @@ import java.util.stream.IntStream;
public
class
Mail
{
private
static
final
String
intsFileLocation
=
System
.
getProperties
().
getProperty
(
"huffmanints.file"
,
"classpath:huffmanInts"
);
private
static
final
String
bytesFileLocation
=
System
.
getProperties
().
getProperty
(
"huffmanbytes.file"
,
"classpath:huffmanBytes"
);
private
static
final
String
intsFileLocation
=
System
.
getProperties
().
getProperty
(
"huffmanints.file"
,
"classpath:huffmanInts"
);
private
static
final
String
bytesFileLocation
=
System
.
getProperties
().
getProperty
(
"huffmanbytes.file"
,
"classpath:huffmanBytes"
);
private
static
final
int
iterations
=
1_000_000
;
// private static final int iterations = 10_000;
...
...
@@ -79,8 +81,9 @@ public class Mail {
Stopwatch
s
=
Stopwatch
.
createStarted
();
for
(
int
i
=
0
;
i
<
iterations
;
i
++)
{
Utils
.
maybePrintPercentages
(
i
,
iterations
);
IntStream
.
range
(
1
,
maxPrefix
+
1
).
parallel
().
forEach
(
j
->
trainer
[
j
].
getId
(
builder
[
j
].
random
()));
IntStream
.
range
(
1
,
maxPrefix
+
1
)
.
parallel
()
.
forEach
(
j
->
trainer
[
j
].
getId
(
builder
[
j
].
random
()));
// for (int j = 1; j <= maxPrefix; j++) {
// trainer[j].getId(builder[j].random());
// }
...
...
@@ -99,31 +102,6 @@ public class Mail {
}
@SuppressWarnings
(
"unchecked"
)
private
Map
<
Integer
,
CountMap
<
Integer
>>
readIntMap
()
{
HuffmanStore
<
Integer
>
store
=
readHUffmanMap
(
intsFileLocation
);
return
store
.
getInternal
();
}
@SuppressWarnings
(
"unchecked"
)
private
Map
<
Integer
,
CountMap
<
Byte
>>
readByteMap
()
{
HuffmanStore
<
Byte
>
store
=
readHUffmanMap
(
bytesFileLocation
);
return
store
.
getInternal
();
}
// we controll whats in the file
@SuppressWarnings
(
"unchecked"
)
private
<
X
extends
Serializable
>
HuffmanStore
<
X
>
readHUffmanMap
(
String
location
)
{
File
file
=
Utils
.
getFile
(
location
);
System
.
out
.
println
(
"reading from: "
+
file
.
getAbsolutePath
());
try
(
ObjectInputStream
in
=
new
ObjectInputStream
(
new
FileInputStream
(
file
)))
{
return
(
HuffmanStore
<
X
>)
in
.
readObject
();
}
catch
(
IOException
|
ClassNotFoundException
e
)
{
throw
new
RuntimeException
(
e
);
}
}
private
void
saveHuffmanMaps
(
Map
<
Integer
,
CountMap
<
Integer
>>
intCounts
,
Map
<
Integer
,
CountMap
<
Byte
>>
byteCounts
)
{
...
...
@@ -143,69 +121,61 @@ public class Mail {
private
void
printPrefixes
()
throws
NoSuchFieldException
,
IllegalAccessException
{
Map
<
Integer
,
Data
>
data
=
Utils
.
createDataMap
(
4
);
System
.
out
.
println
(
"\n\n\n1:"
);
Inspector
.
extractMap
(
data
.
get
(
1
)).
keySet
().
stream
().
sorted
().
limit
(
20
).
forEach
(
System
.
out
::
println
);
Inspector
.
extractMap
(
data
.
get
(
1
))
.
keySet
()
.
stream
()
.
sorted
()
.
limit
(
20
)
.
forEach
(
System
.
out
::
println
);
System
.
out
.
println
(
"\n\n\n2:"
);
Inspector
.
extractMap
(
data
.
get
(
2
)).
keySet
().
stream
().
sorted
().
limit
(
20
).
forEach
(
System
.
out
::
println
);
Inspector
.
extractMap
(
data
.
get
(
2
))
.
keySet
()
.
stream
()
.
sorted
()
.
limit
(
20
)
.
forEach
(
System
.
out
::
println
);
System
.
out
.
println
(
"\n\n\n3:"
);
Inspector
.
extractMap
(
data
.
get
(
3
)).
keySet
().
stream
().
sorted
().
limit
(
20
).
forEach
(
System
.
out
::
println
);
Inspector
.
extractMap
(
data
.
get
(
3
))
.
keySet
()
.
stream
()
.
sorted
()
.
limit
(
20
)
.
forEach
(
System
.
out
::
println
);
System
.
out
.
println
(
"\n\n\n4:"
);
Inspector
.
extractMap
(
data
.
get
(
4
)).
keySet
().
stream
().
sorted
().
limit
(
20
).
forEach
(
System
.
out
::
println
);
Inspector
.
extractMap
(
data
.
get
(
4
))
.
keySet
()
.
stream
()
.
sorted
()
.
limit
(
20
)
.
forEach
(
System
.
out
::
println
);
}
private
void
printDataHashStats
()
throws
NoSuchFieldException
,
IllegalAccessException
{
Map
<
Integer
,
Data
>
data
=
Utils
.
createDataMap
(
7
);
Inspector
.
printDataHashStats
(
data
);
}
private
static
class
ShortenerStats
{
private
final
Shortener
shortener
;
private
final
List
<
Integer
>
stats
=
new
ArrayList
<>(
iterations
);
private
ShortenerStats
(
Shortener
shortener
)
{
this
.
shortener
=
shortener
;
}
public
void
record
(
Sentence
sentence
)
{
stats
.
add
(
shortener
.
getId
(
sentence
).
length
());
}
public
void
printStats
()
{
System
.
out
.
println
(
shortener
.
getClass
().
getName
());
System
.
out
.
println
(
"distinct: "
+
stats
.
stream
().
distinct
().
count
());
IntSummaryStatistics
statistics
=
stats
.
stream
()
.
mapToInt
(
Integer:
:
intValue
)
.
summaryStatistics
();
System
.
out
.
println
(
String
.
format
(
"avg, min, high: %f, %d, %d"
,
statistics
.
getAverage
(),
statistics
.
getMin
(),
statistics
.
getMax
()));
System
.
out
.
println
(
"SD: "
+
Math
.
sqrt
(
stats
.
stream
()
.
mapToDouble
(
e
->
(
double
)
(
Math
.
pow
(
e
-
statistics
.
getAverage
(),
2
)))
.
sum
()
/
iterations
));
}
}
public
void
printShorterStats
()
throws
IOException
,
ClassNotFoundException
{
int
prefixLength
=
4
;
HuffmanCode
<
Integer
,
List
<
Boolean
>,
Boolean
,
SimpleCountMap
.
MutableInt
>
intCode
=
new
ByteHuffmanCodeBuilder
<
Integer
,
SimpleCountMap
.
MutableInt
>().
generateCode
(
readIntMap
().
get
(
prefixLength
).
asMap
());
HuffmanCode
<
Byte
,
List
<
Boolean
>,
Boolean
,
SimpleCountMap
.
MutableInt
>
byteCode
=
new
ByteHuffmanCodeBuilder
<
Byte
,
SimpleCountMap
.
MutableInt
>().
generateCode
(
readByteMap
().
get
(
prefixLength
).
asMap
());
HuffmanCode
<
Integer
,
List
<
Boolean
>>
intCode
=
ByteHuffmanCodeBuilder
.<
Integer
,
SimpleCountMap
.
MutableInt
>
createFactory
()
.
generateCode
(
readIntMap
().
get
(
prefixLength
).
asMap
(),
new
SimpleCountMap
.
MutableInt
.
FrequencySupport
());
HuffmanCode
<
Byte
,
List
<
Boolean
>>
byteCode
=
ByteHuffmanCodeBuilder
.<
Byte
,
SimpleCountMap
.
MutableInt
>
createFactory
()
.
generateCode
(
readByteMap
().
get
(
prefixLength
).
asMap
(),
new
SimpleCountMap
.
MutableInt
.
FrequencySupport
());
Map
<
Integer
,
Data
>
dataMap
=
Utils
.
createDataMap
(
prefixLength
);
Builder
b
=
new
Builder
(
prefixLength
,
dataMap
.
get
(
prefixLength
));
List
<
ShortenerStats
>
shorter
=
Arrays
.
asList
(
new
ShortenerStats
(
new
ShortenerSimpleImpl
(
dataMap
.
get
(
prefixLength
))),
new
ShortenerStats
(
new
ShortenerIntHuffmanImpl
(
dataMap
.
get
(
prefixLength
),
intCode
)),
new
ShortenerStats
(
new
ShortenerByteHuffmanImpl
(
dataMap
.
get
(
prefixLength
),
byteCode
)));
new
ShortenerStats
(
new
ShortenerByteHuffmanImpl
(
dataMap
.
get
(
prefixLength
),
byteCode
)));
System
.
out
.
println
(
"creating..."
);
for
(
int
i
=
0
;
i
<
iterations
;
i
++)
{
...
...
@@ -221,5 +191,59 @@ public class Mail {
}
}
@SuppressWarnings
(
"unchecked"
)
private
Map
<
Integer
,
CountMap
<
Integer
>>
readIntMap
()
{
HuffmanStore
<
Integer
>
store
=
readHUffmanMap
(
intsFileLocation
);
return
store
.
getInternal
();
}
@SuppressWarnings
(
"unchecked"
)
private
Map
<
Integer
,
CountMap
<
Byte
>>
readByteMap
()
{
HuffmanStore
<
Byte
>
store
=
readHUffmanMap
(
bytesFileLocation
);
return
store
.
getInternal
();
}
// we controll whats in the file
@SuppressWarnings
(
"unchecked"
)
private
<
X
extends
Serializable
>
HuffmanStore
<
X
>
readHUffmanMap
(
String
location
)
{
File
file
=
Utils
.
getFile
(
location
);
System
.
out
.
println
(
"reading from: "
+
file
.
getAbsolutePath
());
try
(
ObjectInputStream
in
=
new
ObjectInputStream
(
new
FileInputStream
(
file
)))
{
return
(
HuffmanStore
<
X
>)
in
.
readObject
();
}
catch
(
IOException
|
ClassNotFoundException
e
)
{
throw
new
RuntimeException
(
e
);
}
}
private
static
class
ShortenerStats
{
private
final
Shortener
shortener
;
private
final
List
<
Integer
>
stats
=
new
ArrayList
<>(
iterations
);
private
ShortenerStats
(
Shortener
shortener
)
{
this
.
shortener
=
shortener
;
}
public
void
record
(
Sentence
sentence
)
{
stats
.
add
(
shortener
.
getId
(
sentence
).
length
());
}
public
void
printStats
()
{
System
.
out
.
println
(
shortener
.
getClass
().
getName
());
System
.
out
.
println
(
"distinct: "
+
stats
.
stream
().
distinct
().
count
());
IntSummaryStatistics
statistics
=
stats
.
stream
().
mapToInt
(
Integer:
:
intValue
).
summaryStatistics
();
System
.
out
.
println
(
String
.
format
(
"avg, min, high: %f, %d, %d"
,
statistics
.
getAverage
(),
statistics
.
getMin
(),
statistics
.
getMax
()));
System
.
out
.
println
(
"SD: "
+
Math
.
sqrt
(
stats
.
stream
()
.
mapToDouble
(
e
->
(
double
)
(
Math
.
pow
(
e
-
statistics
.
getAverage
(),
2
)))
.
sum
()
/
iterations
));
}
}
}
src/main/java/markov/ShortenerByteHuffmanImpl.java
View file @
218bdd37
package
markov
;
import
markov
.huffman.HuffmanCode
;
import
directory.passive
.huffman.HuffmanCode
;
import
markov.stuff.BitConverter
;
import
markov.stuff.SimpleCountMap
;
import
markov.stuff.Utils
;
import
java.util.Base64
;
...
...
@@ -14,12 +13,11 @@ import java.util.stream.Stream;
public
class
ShortenerByteHuffmanImpl
extends
ShortenerSimpleImpl
{
private
final
HuffmanCode
<
Byte
,
List
<
Boolean
>
,
Boolean
,
SimpleCountMap
.
MutableInt
>
byteCode
;
private
final
HuffmanCode
<
Byte
,
List
<
Boolean
>>
byteCode
;
public
ShortenerByteHuffmanImpl
(
Data
data
,
HuffmanCode
<
Byte
,
List
<
Boolean
>,
Boolean
,
SimpleCountMap
.
MutableInt
>
byteCode
)
{
Data
data
,
HuffmanCode
<
Byte
,
List
<
Boolean
>>
byteCode
)
{
super
(
data
);
this
.
byteCode
=
byteCode
;
}
...
...
src/main/java/markov/ShortenerIntHuffmanImpl.java
View file @
218bdd37
package
markov
;
import
markov
.huffman.HuffmanCode
;
import
directory.passive
.huffman.HuffmanCode
;
import
markov.stuff.BitConverter
;
import
markov.stuff.SimpleCountMap
;
import
java.util.Base64
;
import
java.util.List
;
...
...
@@ -12,12 +11,11 @@ import java.util.stream.Stream;
public
class
ShortenerIntHuffmanImpl
extends
ShortenerSimpleImpl
{
private
final
HuffmanCode
<
Integer
,
List
<
Boolean
>
,
Boolean
,
SimpleCountMap
.
MutableInt
>
intCode
;
private
final
HuffmanCode
<
Integer
,
List
<
Boolean
>>
intCode
;
public
ShortenerIntHuffmanImpl
(
Data
data
,
HuffmanCode
<
Integer
,
List
<
Boolean
>,
Boolean
,
SimpleCountMap
.
MutableInt
>
byteCode
)
{
Data
data
,
HuffmanCode
<
Integer
,
List
<
Boolean
>>
byteCode
)
{
super
(
data
);
this
.
intCode
=
byteCode
;
}
...
...
src/main/java/markov/Tokenizer.java
View file @
218bdd37
...
...
@@ -10,29 +10,10 @@ public class Tokenizer {
public
Stream
<
Stream
<
Token
>>
tokenizeCombined
(
Stream
<
Stream
<
String
>>
input
)
{
return
input
.
map
(
stringStream
->
putMarkers
(
combineToTokens
(
stringStream
.
flatMap
(
s
->
s
.
codePoints
().
boxed
().
map
(
this
::
glyphFromCodePoint
)))));
}
public
Stream
<
Stream
<
Token
>>
tokenize
(
Stream
<
String
>
input
)
{
return
input
.
map
(
s
->
putMarkers
(
combineToTokens
(
s
.
codePoints
().
boxed
().
map
(
this
::
glyphFromCodePoint
))));
}
private
Glyph
glyphFromCodePoint
(
int
codePoint
)
{
Glyph
.
Type
type
=
Glyph
.
Type
.
other
;
if
(
Character
.
isWhitespace
(
codePoint
))
{
type
=
Glyph
.
Type
.
whitespace
;
}
else
if
(
Character
.
isAlphabetic
(
codePoint
))
{
type
=
Glyph
.
Type
.
word
;
}
else
if
(
Arrays
.
asList
(
Character
.
START_PUNCTUATION
,
Character
.
END_PUNCTUATION
,
Character
.
INITIAL_QUOTE_PUNCTUATION
,
Character
.
FINAL_QUOTE_PUNCTUATION
,
Character
.
OTHER_PUNCTUATION
).
contains
(
Character
.
getType
(
codePoint
)))
{
type
=
Glyph
.
Type
.
punctuation
;
}
String
value
=
new
String
(
Character
.
toChars
(
codePoint
));
return
new
Glyph
(
type
,
value
);
return
input
.
map
(
stringStream
->
putMarkers
(
combineToTokens
(
stringStream
.
flatMap
(
s
->
s
.
codePoints
()
.
boxed
()
.
map
(
this
::
glyphFromCodePoint
)))));
}
private
Stream
<
Token
>
putMarkers
(
Stream
<
Token
>
input
)
{
...
...
@@ -41,21 +22,20 @@ public class Tokenizer {
public
Stream
<
Token
>
combineToTokens
(
Stream
<
Glyph
>
glyphs
)
{
final
Container
[]
previous
=
new
Container
[]{
null
};
return
Stream
.
concat
(
glyphs
,
Stream
.
of
(
new
Glyph
(
Glyph
.
Type
.
empty
,
""
)))
.
map
(
g
->
{
return
Stream
.
concat
(
glyphs
,
Stream
.
of
(
new
Glyph
(
Glyph
.
Type
.
empty
,
""
))).
map
(
g
->
{
Container
o
=
new
Container
(
g
);
o
.
p
=
previous
[
0
];
previous
[
0
]
=
o
;
return
o
;
}).
flatMap
(
go
->
{
if
(
go
.
p
!=
null
&&
go
.
p
.
self
!=
null
&&
!
go
.
self
.
getType
().
equals
(
go
.
p
.
self
.
getType
()))
{
if
(
go
.
p
!=
null
&&
go
.
p
.
self
!=
null
&&
!
go
.
self
.
getType
().
equals
(
go
.
p
.
self
.
getType
()))
{
List
<
Glyph
>
tokenGlyphs
=
new
ArrayList
<>();
Container
c
=
go
;
do
{
c
=
c
.
p
;
tokenGlyphs
.
add
(
0
,
c
.
self
);
}
while
(
c
!=
null
&&
c
.
p
!=
null
&&
c
.
p
.
self
!=
null
&&
}
while
(
c
!=
null
&&
c
.
p
!=
null
&&
c
.
p
.
self
!=
null
&&
c
.
self
.
getType
().
equals
(
c
.
p
.
self
.
getType
()));
go
.
p
=
null
;
// memory optimization
return
Stream
.<
List
<
Glyph
>>
builder
().
add
(
tokenGlyphs
).
build
();
...
...
@@ -65,6 +45,30 @@ public class Tokenizer {
}).
map
(
this
::
getTokenFromGlyphs
);
}
public
Stream
<
Stream
<
Token
>>
tokenize
(
Stream
<
String
>
input
)
{
return
input
.
map
(
s
->
putMarkers
(
combineToTokens
(
s
.
codePoints
()
.
boxed
()
.
map
(
this
::
glyphFromCodePoint
))));
}
private
Glyph
glyphFromCodePoint
(
int
codePoint
)
{
Glyph
.
Type
type
=
Glyph
.
Type
.
other
;
if
(
Character
.
isWhitespace
(
codePoint
))
{
type
=
Glyph
.
Type
.
whitespace
;
}
else
if
(
Character
.
isAlphabetic
(
codePoint
))
{
type
=
Glyph
.
Type
.
word
;
}
else
if
(
Arrays
.
asList
(
Character
.
START_PUNCTUATION
,
Character
.
END_PUNCTUATION
,
Character
.
INITIAL_QUOTE_PUNCTUATION
,
Character
.
FINAL_QUOTE_PUNCTUATION
,
Character
.
OTHER_PUNCTUATION
).
contains
(
Character
.
getType
(
codePoint
)))
{
type
=
Glyph
.
Type
.
punctuation
;
}
String
value
=
new
String
(
Character
.
toChars
(
codePoint
));
return
new
Glyph
(
type
,
value
);
}
private
Token
getTokenFromGlyphs
(
List
<
Glyph
>
l
)
{
String
content
=
l
.
stream
().
map
(
Glyph:
:
getContent
).
collect
(
Collectors
.
joining
());
Glyph
.
Type
type
=
l
.
get
(
0
).
getType
();
...
...
@@ -72,11 +76,11 @@ public class Tokenizer {
}
private
static
class
Container
{
final
Glyph
self
;
Container
p
;
Container
(
Glyph
self
)
{
this
.
self
=
self
;
}
final
Glyph
self
;
Container
p
;
}
}
src/main/java/markov/huffman/BitConverter.java
0 → 100644
View file @
218bdd37
package
markov
.
huffman
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
public
class
BitConverter
{
public
byte
[]
toBytes
(
List
<
Boolean
>
list
)
{
int
listLength
=
list
.
size
();
int
lengthInBit
=
listLength
+
3
;
int
arrayLength
=
Math
.
max
(
1
,
(
int
)
Math
.
ceil
(
lengthInBit
/
8
d
));
byte
[]
bytes
=
new
byte
[
arrayLength
];
int
lastByteNotEmpty
=
lengthInBit
%
8
;
// temp variable to not constantly write to heap
byte
tmpByte
=
((
byte
)
lastByteNotEmpty
);
int
i
=
0
;
// set 3 bits for the lastByteNotEmpty number
for
(
int
j
=
3
;
j
<
8
&&
i
<
listLength
;
j
++,
i
++)
{
tmpByte
=
setBit
(
list
,
tmpByte
,
i
,
j
);
}
bytes
[
0
]
=
tmpByte
;
// set rest of the bits
for
(
int
j
=
1
;
j
<
arrayLength
;
j
++)
{
tmpByte
=
0
;
for
(
int
l
=
0
;
l
<
8
&&
i
<
listLength
;
l
++,
i
++)
{
tmpByte
=
setBit
(
list
,
tmpByte
,
i
,
l
);
}
bytes
[
j
]
=
tmpByte
;
}
return
bytes
;
}
private
byte
setBit
(
List
<
Boolean
>
list
,
byte
tmpByte
,
int
i
,
int
j
)
{
tmpByte
|=
(
byte
)
(
list
.
get
(
i
)
?
1
:
0
)
<<
j
;
return
tmpByte
;
}
public
List
<
Boolean
>
toBits
(
byte
[]
array
)
{
int
arrayLength
=
array
.
length
;
if
(
arrayLength
==
0
)
{
return
Collections
.
emptyList
();
}
// determine lastByteNotEmpty bits
byte
tmpByte
=
array
[
0
];
int
lastByteNotEmpty
=
tmpByte
&
0b00000111
;
int
listSize
=
8
*
arrayLength
+
(
lastByteNotEmpty
==
0
?
-
3
:
lastByteNotEmpty
-
11
);
if
(
arrayLength
==
1
&&
lastByteNotEmpty
<
4
&&
lastByteNotEmpty
!=
0
)
{
throw
new
IllegalArgumentException
(
"corrupted data"
);
}
return
toListPrimitive
(
listSize
,
array
);
}
private
static
List
<
Boolean
>
toListPrimitive
(
int
listSize
,
byte
[]
array
)
{
List
<
Boolean
>
list
=
new
ArrayList
<>(
listSize
);
for
(
int
i
=
3
;
i
<
listSize
+
3
;
i
++)
{
list
.
add
((
array
[
i
/
8
]
&
1
<<
(
i
%
8
))
>>
i
%
8
==
1
);
}
return
list
;
}
}
src/main/java/markov/huffman/ByteHuffmanCodeBuilder.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
import
java.util.ArrayList
;
import
java.util.Iterator
;
import
java.util.List
;
import
java.util.function.BiFunction
;
import
java.util.function.Function
;
import
java.util.function.Supplier
;
public
class
ByteHuffmanCodeBuilder
<
ContentType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
extends
HuffmanCodeBuilder
<
ContentType
,
List
<
Boolean
>,
Boolean
,
FrequencyType
>
{
private
static
final
Supplier
<
List
<
Boolean
>>
rootCodeSupplier
=
ArrayList:
:
new
;
private
static
final
Supplier
<
Boolean
>
leftGlyph
=
()
->
false
;
private
static
final
Supplier
<
Boolean
>
rightGlyph
=
()
->
true
;
private
static
final
BiFunction
<
List
<
Boolean
>,
Boolean
,
List
<
Boolean
>>
combiner
=
(
l
,
g
)
->
{
l
=
new
ArrayList
<>(
l
);
l
.
add
(
g
);
return
l
;
};
private
static
final
Function
<
List
<
Boolean
>,
Iterator
<
Boolean
>>
splitter
=
List:
:
iterator
;
public
ByteHuffmanCodeBuilder
()
{
super
(
rootCodeSupplier
,
leftGlyph
,
rightGlyph
,
combiner
,
splitter
,
(
n
,
b
)
->
b
?
n
.
getRight
()
:
n
.
getLeft
());
}
}
src/main/java/markov/huffman/FrequenceType.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
public
interface
FrequenceType
<
X
extends
FrequenceType
>
extends
Comparable
<
X
>
{
X
add
(
X
b
);
boolean
isGreaterZero
();
}
src/main/java/markov/huffman/HuffmanCode.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
import
java.io.Serializable
;
import
java.util.ArrayList
;
import
java.util.Iterator
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.function.BiFunction
;
import
java.util.function.Function
;
public
class
HuffmanCode
<
ContentType
,
SequenceType
,
GlyphType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
implements
Serializable
{
private
final
Map
<
ContentType
,
SequenceType
>
codes
;
private
final
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
tree
;
private
final
Function
<
SequenceType
,
Iterator
<
GlyphType
>>
splitter
;
private
final
BiFunction
<
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>,
GlyphType
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
leftRightDecider
;
public
HuffmanCode
(
Map
<
ContentType
,
SequenceType
>
codes
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
tree
,
Function
<
SequenceType
,
Iterator
<
GlyphType
>>
splitter
,
BiFunction
<
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>,
GlyphType
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
leftRightDecider
)
{
this
.
codes
=
codes
;
this
.
tree
=
tree
;
this
.
splitter
=
splitter
;
this
.
leftRightDecider
=
leftRightDecider
;
}
public
SequenceType
encode
(
ContentType
t
)
{
return
codes
.
get
(
t
);
}
public
List
<
ContentType
>
decode
(
SequenceType
v
)
{
List
<
ContentType
>
ret
=
new
ArrayList
<>();
Iterator
<
GlyphType
>
iterator
=
splitter
.
apply
(
v
);
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
node
=
tree
;
while
(
iterator
.
hasNext
())
{
GlyphType
glyph
=
iterator
.
next
();
node
=
leftRightDecider
.
apply
((
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>)
node
,
glyph
);
if
(
node
instanceof
HuffmanLeaf
)
{
ret
.
add
(((
HuffmanLeaf
<
ContentType
,
SequenceType
,
FrequencyType
>)
node
).
getValue
());
node
=
tree
;
}
}
return
ret
;
}
}
\ No newline at end of file
src/main/java/markov/huffman/HuffmanCodeBuilder.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
import
java.util.HashMap
;
import
java.util.Iterator
;
import
java.util.Map
;
import
java.util.PriorityQueue
;
import
java.util.function.BiFunction
;
import
java.util.function.Function
;
import
java.util.function.Supplier
;
/**
* Used to build a HUffman Code instance to code a given ContentType into a SequenceType,
* and decode a sequence of SequenceTypes into a sequence of ContentTypes,
* using the given Frequency type to determine the distribution.
*
* @param <ContentType>
* @param <SequenceType>
* @param <GlyphType>
* @param <FrequencyType>
*/
public
class
HuffmanCodeBuilder
<
ContentType
,
SequenceType
,
GlyphType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
{
private
final
Supplier
<
SequenceType
>
rootCodeSupplier
;
private
final
Supplier
<
GlyphType
>
leftGlyph
;
private
final
Supplier
<
GlyphType
>
rightGlyph
;
private
final
BiFunction
<
SequenceType
,
GlyphType
,
SequenceType
>
combiner
;
private
final
Function
<
SequenceType
,
Iterator
<
GlyphType
>>
splitter
;
private
final
BiFunction
<
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>,
GlyphType
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
leftRightDecider
;
private
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>
tree
;
private
final
Map
<
ContentType
,
SequenceType
>
codes
=
new
HashMap
<>();
public
HuffmanCodeBuilder
(
Supplier
<
SequenceType
>
rootCodeSupplier
,
Supplier
<
GlyphType
>
leftGlyph
,
Supplier
<
GlyphType
>
rightGlyph
,
BiFunction
<
SequenceType
,
GlyphType
,
SequenceType
>
combiner
,
Function
<
SequenceType
,
Iterator
<
GlyphType
>>
splitter
,
BiFunction
<
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>,
GlyphType
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
leftRightDecider
)
{
this
.
rootCodeSupplier
=
rootCodeSupplier
;
this
.
leftGlyph
=
leftGlyph
;
this
.
rightGlyph
=
rightGlyph
;
this
.
combiner
=
combiner
;
this
.
splitter
=
splitter
;
this
.
leftRightDecider
=
leftRightDecider
;
}
public
HuffmanCode
<
ContentType
,
SequenceType
,
GlyphType
,
FrequencyType
>
generateCode
(
Map
<
ContentType
,
FrequencyType
>
frequencies
)
{
tree
=
buildTree
(
frequencies
);
tree
.
setCode
(
rootCodeSupplier
.
get
());
generateCodes
(
tree
);
return
new
HuffmanCode
<>(
codes
,
tree
,
splitter
,
leftRightDecider
);
}
private
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>
buildTree
(
Map
<
ContentType
,
FrequencyType
>
frequencies
)
{
PriorityQueue
<
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
trees
=
new
PriorityQueue
<>();
for
(
Map
.
Entry
<
ContentType
,
FrequencyType
>
e
:
frequencies
.
entrySet
())
{
if
(
e
.
getValue
().
isGreaterZero
())
trees
.
offer
(
new
HuffmanLeaf
<>(
e
.
getValue
(),
e
.
getKey
()));
}
while
(
trees
.
size
()
>
1
)
{
HuffmanTree
a
=
trees
.
poll
();
HuffmanTree
b
=
trees
.
poll
();
trees
.
offer
(
new
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>(
a
,
b
));
}
return
(
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>)
trees
.
poll
();
}
private
void
generateCodes
(
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
child
)
{
if
(
child
==
null
)
throw
new
IllegalArgumentException
(
new
NullPointerException
());
if
(
child
instanceof
HuffmanLeaf
)
{
HuffmanLeaf
<
ContentType
,
SequenceType
,
FrequencyType
>
leaf
=
(
HuffmanLeaf
<
ContentType
,
SequenceType
,
FrequencyType
>)
child
;
codes
.
put
(
leaf
.
getValue
(),
leaf
.
getCode
());
}
else
if
(
child
instanceof
HuffmanNode
)
{
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>
node
=
(
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>)
child
;
// traverse left
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
left
=
node
.
getLeft
();
SequenceType
leftCode
=
combiner
.
apply
(
child
.
getCode
(),
leftGlyph
.
get
());
left
.
setCode
(
leftCode
);
generateCodes
(
left
);
// traverse right
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
right
=
node
.
getRight
();
SequenceType
rightCode
=
combiner
.
apply
(
child
.
getCode
(),
rightGlyph
.
get
());
right
.
setCode
(
rightCode
);
generateCodes
(
right
);
}
}
}
src/main/java/markov/huffman/HuffmanLeaf.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
class
HuffmanLeaf
<
ContentType
,
SequenceType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
extends
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
{
private
final
ContentType
value
;
// the character this leaf represents
ContentType
getValue
()
{
return
value
;
}
HuffmanLeaf
(
FrequencyType
freq
,
ContentType
val
)
{
super
(
freq
);
value
=
val
;
}
}
src/main/java/markov/huffman/HuffmanNode.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
class
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
extends
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
{
private
final
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
left
;
private
final
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
right
;
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
getLeft
()
{
return
left
;
}
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
getRight
()
{
return
right
;
}
HuffmanNode
(
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
l
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
r
)
{
super
(
l
.
getFrequency
().
add
(
r
.
getFrequency
()));
left
=
l
;
right
=
r
;
}
}
src/main/java/markov/huffman/HuffmanTree.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
abstract
class
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
implements
Comparable
<
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
{
private
final
FrequencyType
frequency
;
private
SequenceType
code
=
null
;
HuffmanTree
(
FrequencyType
freq
)
{
frequency
=
freq
;
}
FrequencyType
getFrequency
()
{
return
frequency
;
}
public
SequenceType
getCode
()
{
return
code
;
}
public
void
setCode
(
SequenceType
code
)
{
this
.
code
=
code
;
}
@Override
public
int
compareTo
(
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
o
)
{
return
this
.
frequency
.
compareTo
(
o
.
frequency
);
}
}
src/main/java/markov/stuff/SimpleCountMap.java
View file @
218bdd37
package
markov
.
stuff
;
import
markov.huffman.FrequenceType
;
import
java.io.Serializable
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.stream.Stream
;
public
class
SimpleCountMap
<
T
>
extends
HashMap
<
T
,
SimpleCountMap
.
MutableInt
>
implements
CountMap
<
T
>
{
public
static
class
MutableInt
implements
FrequenceType
<
MutableInt
>,
Serializable
{
private
int
value
=
1
;
public
MutableInt
(
int
value
)
{
this
.
value
=
value
;
}
public
void
increment
()
{
++
value
;
}
public
int
get
()
{
return
value
;
}
@Override
public
MutableInt
add
(
MutableInt
b
)
{
return
new
MutableInt
(
get
()
+
b
.
get
());
}
@Override
public
boolean
isGreaterZero
()
{
return
get
()
>
0
;
}
@Override
public
int
compareTo
(
MutableInt
o
)
{
return
Integer
.
compare
(
get
(),
o
.
get
());
}
}
public
class
SimpleCountMap
<
T
>
extends
HashMap
<
T
,
SimpleCountMap
.
MutableInt
>
implements
CountMap
<
T
>
{
public
SimpleCountMap
(
int
initialCapacity
,
float
loadFactor
)
{
super
(
initialCapacity
,
loadFactor
);
}
public
SimpleCountMap
(
int
initialCapacity
)
{
super
(
initialCapacity
);
}
...
...
@@ -66,11 +35,49 @@ public class SimpleCountMap<T> extends HashMap<T, SimpleCountMap.MutableInt> imp
@Override
public
Stream
<
Map
.
Entry
<
T
,
MutableInt
>>
result
()
{
return
entrySet
().
stream
().
sorted
((
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
().
get
(),
e1
.
getValue
().
get
()));
return
entrySet
().
stream
().
sorted
((
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
().
get
(),
e1
.
getValue
().
get
()));
}
@Override
public
HashMap
<
T
,
MutableInt
>
asMap
()
{
return
this
;
}
public
static
class
MutableInt
implements
Serializable
{
private
int
value
=
1
;
public
MutableInt
(
int
value
)
{
this
.
value
=
value
;
}
public
void
increment
()
{
++
value
;
}
public
int
get
()
{
return
value
;
}
public
static
class
FrequencySupport
implements
directory
.
passive
.
huffman
.
FrequencySupport
<
MutableInt
>
{
@Override
public
MutableInt
add
(
MutableInt
mutableInt
,
MutableInt
x1
)
{
return
new
MutableInt
(
mutableInt
.
get
()
+
x1
.
get
());
}
@Override
public
int
compare
(
MutableInt
mutableInt
,
MutableInt
x1
)
{
return
Integer
.
compare
(
mutableInt
.
get
(),
x1
.
get
());
}
@Override
public
boolean
isGreaterZero
(
MutableInt
mutableInt
)
{
return
mutableInt
.
get
()
>
0
;
}
}
}
}
\ No newline at end of file
src/test/java/markov/stuff/BitConverterTest.java
View file @
218bdd37
...
...
@@ -4,13 +4,15 @@ import org.junit.jupiter.api.Disabled;
import
org.junit.jupiter.api.Nested
;
import
org.junit.jupiter.api.Tag
;
import
org.junit.jupiter.api.Test
;
import
org.junit.jupiter.params.ParameterizedTest
;
import
org.junit.jupiter.params.provider.ValueSource
;
import
java.io.IOException
;
import
java.util.Arrays
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.SplittableRandom
;
import
java.util.stream.Collectors
;
import
java.util.stream.IntStream
;
import
static
org
.
junit
.
jupiter
.
api
.
Assertions
.
assertEquals
;
import
static
org
.
junit
.
jupiter
.
api
.
Assertions
.
assertIterableEquals
;
...
...
@@ -20,8 +22,43 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
class
BitConverterTest
{
private
byte
[]
convert
(
Boolean
...
booleans
)
{
BitConverter
converter
=
new
BitConverter
();
List
<
Boolean
>
empty
=
Arrays
.
asList
(
booleans
);
return
converter
.
toBytes
(
empty
);
}
private
List
<
Boolean
>
convert
(
byte
...
bytes
)
{
BitConverter
converter
=
new
BitConverter
();
return
converter
.
toBits
(
bytes
);
}
@Nested
class
Both
{
@Disabled
@Tag
(
"long"
)
@Tag
(
"smoke"
)
@ParameterizedTest
@ValueSource
(
ints
=
10_000
)
void
firstN
(
int
max
)
throws
IOException
,
InterruptedException
{
BitConverter
converter
=
new
BitConverter
();
SplittableRandom
r
=
new
SplittableRandom
();
for
(
int
i
=
1
;
i
<
max
;
i
++)
{
List
<
Boolean
>
list
=
r
.
ints
(
i
,
0
,
2
)
.
mapToObj
(
n
->
n
==
0
?
Boolean
.
FALSE
:
Boolean
.
TRUE
)
.
collect
(
Collectors
.
toList
());
List
<
Boolean
>
second
=
converter
.
toBits
(
converter
.
toBytes
(
converter
.
toBits
(
converter
.
toBytes
(
list
))));
assertNotSame
(
list
,
second
);
assertEquals
(
list
,
second
);
}
}
}
@Nested
public
class
toBytes
{
class
toBytes
{
@Test
void
convertEmpty
()
{
...
...
@@ -33,7 +70,6 @@ class BitConverterTest {
@Test
void
convert0
()
{
Utils
.
toByteArray
(
1
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
);
assertEquals
(
1
,
convertedEmpty
.
length
);
assertEquals
(
0b00000100
,
convertedEmpty
[
0
]);
...
...
@@ -41,7 +77,6 @@ class BitConverterTest {
@Test
void
convert1
()
{
Utils
.
toByteArray
(
1
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
);
assertEquals
(
1
,
convertedEmpty
.
length
);
assertEquals
(
0b00001100
,
convertedEmpty
[
0
]);
...
...
@@ -49,14 +84,24 @@ class BitConverterTest {
@Test
void
convert16
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
);
assertEquals
(
1
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b10000000
,
convertedEmpty
[
0
]);
}
@Test
void
convert31
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
assertEquals
(
1
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b11111000
,
convertedEmpty
[
0
]);
}
...
...
@@ -64,7 +109,12 @@ class BitConverterTest {
@Test
void
convert32
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
);
assertEquals
(
2
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b00000001
,
convertedEmpty
[
0
]);
assertEquals
((
byte
)
0b00000001
,
convertedEmpty
[
1
]);
...
...
@@ -72,7 +122,12 @@ class BitConverterTest {
@Test
void
convert63
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
assertEquals
(
2
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b11111001
,
convertedEmpty
[
0
]);
assertEquals
((
byte
)
0b00000001
,
convertedEmpty
[
1
]);
...
...
@@ -80,7 +135,13 @@ class BitConverterTest {
@Test
void
convert127
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
assertEquals
(
2
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b11111010
,
convertedEmpty
[
0
]);
assertEquals
((
byte
)
0b00000011
,
convertedEmpty
[
1
]);
...
...
@@ -88,7 +149,13 @@ class BitConverterTest {
@Test
void
convert128
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
);
assertEquals
(
2
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b00000010
,
convertedEmpty
[
0
]);
assertEquals
((
byte
)
0b00000010
,
convertedEmpty
[
1
]);
...
...
@@ -96,7 +163,19 @@ class BitConverterTest {
@Test
void
convert4k
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
);
assertEquals
(
2
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b00000000
,
convertedEmpty
[
0
]);
assertEquals
((
byte
)
0b10000000
,
convertedEmpty
[
1
]);
...
...
@@ -104,7 +183,19 @@ class BitConverterTest {
@Test
void
convert8k_1
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
assertEquals
(
2
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b11111000
,
convertedEmpty
[
0
]);
assertEquals
((
byte
)
0b11111111
,
convertedEmpty
[
1
]);
...
...
@@ -112,7 +203,20 @@ class BitConverterTest {
@Test
void
convert16k_1
()
{
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
byte
[]
convertedEmpty
=
convert
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
);
assertEquals
(
3
,
convertedEmpty
.
length
);
assertEquals
((
byte
)
0b11111001
,
convertedEmpty
[
0
]);
assertEquals
((
byte
)
0b11111111
,
convertedEmpty
[
1
]);
...
...
@@ -121,7 +225,7 @@ class BitConverterTest {
}
@Nested
public
class
toBits
{
class
toBits
{
@Test
void
convertEmpty
()
{
byte
[]
bytes
=
new
byte
[
0
];
...
...
@@ -139,33 +243,43 @@ class BitConverterTest {
@Test
void
convertOneByteWithNotEmptyWorks
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b100
);
assertIterableEquals
(
Arrays
.
as
List
(
Boolean
.
FALSE
),
result
);
assertIterableEquals
(
Collections
.
singleton
List
(
Boolean
.
FALSE
),
result
);
result
=
convert
((
byte
)
0b101
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
),
result
);
result
=
convert
((
byte
)
0b110
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
),
result
);
result
=
convert
((
byte
)
0b111
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
),
result
);
result
=
convert
((
byte
)
0b0
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
),
result
);
}
@Test
void
convert0
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b00000100
);
assertEquals
(
1
,
result
.
size
());
assertIterableEquals
(
Arrays
.
as
List
(
Boolean
.
FALSE
),
result
);
assertIterableEquals
(
Collections
.
singleton
List
(
Boolean
.
FALSE
),
result
);
}
@Test
void
convert1
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b00001100
);
assertEquals
(
1
,
result
.
size
());
assertIterableEquals
(
Arrays
.
as
List
(
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Collections
.
singleton
List
(
Boolean
.
TRUE
),
result
);
}
@Test
...
...
@@ -173,7 +287,13 @@ class BitConverterTest {
List
<
Boolean
>
result
=
convert
((
byte
)
0b10000000
);
assertEquals
(
5
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
),
result
);
}
...
...
@@ -182,7 +302,11 @@ class BitConverterTest {
List
<
Boolean
>
result
=
convert
((
byte
)
0b11111000
);
assertEquals
(
5
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
}
...
...
@@ -190,14 +314,24 @@ class BitConverterTest {
void
convert32
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b00000001
,
(
byte
)
0b00000001
);
assertEquals
(
6
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
),
result
);
}
@Test
void
convert63
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b11111001
,
(
byte
)
0b00000001
);
assertEquals
(
6
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
}
...
...
@@ -205,7 +339,13 @@ class BitConverterTest {
void
convert127
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b11111010
,
(
byte
)
0b00000011
);
assertEquals
(
7
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
}
...
...
@@ -213,7 +353,13 @@ class BitConverterTest {
void
convert128
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b00000010
,
(
byte
)
0b00000010
);
assertEquals
(
7
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
),
result
);
}
...
...
@@ -221,14 +367,38 @@ class BitConverterTest {
void
convert4k
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b00000000
,
(
byte
)
0b10000000
);
assertEquals
(
13
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
FALSE
,
Boolean
.
TRUE
),
result
);
}
@Test
void
convert8k_1
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b11111000
,
(
byte
)
0b11111111
);
assertEquals
(
13
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
}
...
...
@@ -236,51 +406,22 @@ class BitConverterTest {
void
convert16k_1
()
{
List
<
Boolean
>
result
=
convert
((
byte
)
0b11111001
,
(
byte
)
0b11111111
,
(
byte
)
0b00000001
);
assertEquals
(
14
,
result
.
size
());
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
}
assertIterableEquals
(
Arrays
.
asList
(
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
,
Boolean
.
TRUE
),
result
);
}
public
static
void
main
(
String
[]
args
)
throws
IOException
,
InterruptedException
{
System
.
in
.
read
();
System
.
out
.
println
(
"here we go"
);
new
Both
().
firstN
();
}
@Nested
public
static
class
Both
{
@Test
@Disabled
@Tag
(
"long"
)
void
firstN
()
throws
IOException
,
InterruptedException
{
BitConverter
converter
=
new
BitConverter
();
SplittableRandom
r
=
new
SplittableRandom
();
final
int
N
=
10_000_000
;
// IntStream.range(6, N).boxed().forEach(i -> {
List
<
Boolean
>
list
=
IntStream
.
range
(
0
,
50
).
mapToObj
(
n
->
(
n
%
2
==
0
)?
Boolean
.
TRUE
:
Boolean
.
FALSE
).
collect
(
Collectors
.
toList
());
for
(
int
i
=
1
;
i
<
N
;
i
++)
{
// List<Boolean> list = r.ints(i, 0, 2)
// .mapToObj(n -> n == 0 ? Boolean.FALSE : Boolean.TRUE).collect(Collectors.toList());
// List<Boolean> list = IntStream.range(0, i).mapToObj(n -> (n % 2 == 0)? Boolean.TRUE : Boolean.FALSE).collect(Collectors.toList());
List
<
Boolean
>
second
=
converter
.
toBits
(
converter
.
toBytes
(
converter
.
toBits
(
converter
.
toBytes
(
list
))));
assertNotSame
(
list
,
second
);
assertEquals
(
list
,
second
);
}
// );
}
}
private
byte
[]
convert
(
Boolean
...
booleans
)
{
BitConverter
converter
=
new
BitConverter
();
List
<
Boolean
>
empty
=
Arrays
.
asList
(
booleans
);
return
converter
.
toBytes
(
empty
);
}
private
List
<
Boolean
>
convert
(
byte
...
bytes
)
{
BitConverter
converter
=
new
BitConverter
();
return
converter
.
toBits
(
bytes
);
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment