Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
markov
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Patrick Friedel
markov
Commits
218bdd37
Commit
218bdd37
authored
Mar 25, 2018
by
Hut
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
some cleanup
parent
0d976c4d
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
288 additions
and
425 deletions
+288
-425
pom.xml
pom.xml
+11
-1
Data.java
src/main/java/markov/Data.java
+0
-1
Lookup.java
src/main/java/markov/Lookup.java
+20
-20
Mail.java
src/main/java/markov/Mail.java
+103
-79
ShortenerByteHuffmanImpl.java
src/main/java/markov/ShortenerByteHuffmanImpl.java
+3
-5
ShortenerIntHuffmanImpl.java
src/main/java/markov/ShortenerIntHuffmanImpl.java
+3
-5
Tokenizer.java
src/main/java/markov/Tokenizer.java
+42
-38
BitConverter.java
src/main/java/markov/huffman/BitConverter.java
+63
-0
ByteHuffmanCodeBuilder.java
src/main/java/markov/huffman/ByteHuffmanCodeBuilder.java
+0
-26
FrequenceType.java
src/main/java/markov/huffman/FrequenceType.java
+0
-6
HuffmanCode.java
src/main/java/markov/huffman/HuffmanCode.java
+0
-52
HuffmanCodeBuilder.java
src/main/java/markov/huffman/HuffmanCodeBuilder.java
+0
-90
HuffmanLeaf.java
src/main/java/markov/huffman/HuffmanLeaf.java
+0
-15
HuffmanNode.java
src/main/java/markov/huffman/HuffmanNode.java
+0
-23
HuffmanTree.java
src/main/java/markov/huffman/HuffmanTree.java
+0
-29
SimpleCountMap.java
src/main/java/markov/stuff/SimpleCountMap.java
+43
-35
BitConverterTest.java
src/test/java/markov/stuff/BitConverterTest.java
+0
-0
No files found.
pom.xml
View file @
218bdd37
...
...
@@ -17,7 +17,11 @@
</properties>
<dependencies>
<dependency>
<groupId>
directory.passive
</groupId>
<artifactId>
huffman
</artifactId>
<version>
1.0
</version>
</dependency>
<dependency>
<groupId>
com.tomgibara.bits
</groupId>
<artifactId>
bits
</artifactId>
...
...
@@ -35,6 +39,12 @@
<version>
${junit.jupiter.version}
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.junit.jupiter
</groupId>
<artifactId>
junit-jupiter-params
</artifactId>
<version>
${junit.jupiter.version}
</version>
<scope>
test
</scope>
</dependency>
<!-- To avoid compiler warnings about @API annotations in JUnit code -->
<dependency>
<groupId>
org.apiguardian
</groupId>
...
...
src/main/java/markov/Data.java
View file @
218bdd37
...
...
@@ -23,7 +23,6 @@ public class Data implements Serializable{
public
void
add
(
Prefix
p
,
Token
t
)
{
Lookup
l
=
data
.
getOrDefault
(
p
,
new
Lookup
());
// System.out.println(String.format("adding %s - %s", p, t, l));
l
.
add
(
t
);
data
.
put
(
p
,
l
);
}
...
...
src/main/java/markov/Lookup.java
View file @
218bdd37
...
...
@@ -4,6 +4,7 @@ import java.io.Serializable;
import
java.util.Arrays
;
import
java.util.Comparator
;
import
java.util.LinkedHashMap
;
import
java.util.List
;
import
java.util.Map.Entry
;
import
java.util.Set
;
import
java.util.stream.Collectors
;
...
...
@@ -57,8 +58,9 @@ public class Lookup implements Serializable {
throw
new
IllegalStateException
(
"not jet finished"
);
}
if
(
id
<
0
||
id
>
finishedSums
.
length
)
{
throw
new
IllegalArgumentException
(
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
finishedSums
.
length
));
throw
new
IllegalArgumentException
(
String
.
format
(
"got %d but expected id [0; %d)"
,
id
,
finishedSums
.
length
));
}
return
finalData
[
id
];
}
...
...
@@ -79,20 +81,18 @@ public class Lookup implements Serializable {
int
size
=
tokens
.
size
();
finishedSums
=
new
long
[
size
];
finalData
=
new
Decission
[
size
];
final
int
[]
i
=
new
int
[
1
];
i
[
0
]
=
0
;
final
long
[]
sum
=
new
long
[
1
];
sum
[
0
]
=
0
;
tokens
.
entrySet
()
.
stream
()
.
sequential
()
.
sorted
(
Comparator
.
comparingInt
(
Entry:
:
getValue
))
.
forEach
(
e
->
{
sum
[
0
]
+=
e
.
getValue
();
finishedSums
[
i
[
0
]]
=
sum
[
0
];
finalData
[
i
[
0
]]
=
new
Decission
(
e
.
getKey
(),
i
[
0
],
this
);
i
[
0
]++;
});
List
<
Entry
<
Token
,
Integer
>>
orderedEntries
=
tokens
.
entrySet
()
.
stream
()
.
sequential
()
.
sorted
(
Comparator
.
comparingInt
(
Entry:
:
getValue
))
.
collect
(
Collectors
.
toList
());
int
sum
=
0
;
for
(
int
i
=
0
;
i
<
orderedEntries
.
size
();
i
++)
{
Entry
<
Token
,
Integer
>
entry
=
orderedEntries
.
get
(
i
);
sum
+=
entry
.
getValue
();
finishedSums
[
i
]
=
sum
;
finalData
[
i
]
=
new
Decission
(
entry
.
getKey
(),
i
,
this
);
}
this
.
tokens
.
clear
();
isFinishedCollecting
=
true
;
}
...
...
@@ -100,10 +100,10 @@ public class Lookup implements Serializable {
@Override
public
String
toString
()
{
return
"Lookup [tokens= "
+
tokens
.
entrySet
().
stream
().
sorted
(
(
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
(),
e1
.
getValue
())).
map
(
e
->
String
.
format
(
"%d*%s"
,
e
.
getValue
(),
e
.
getKey
())).
collect
(
Collectors
.
joining
(
", "
))
+
"]"
;
return
"Lookup [tokens= "
+
tokens
.
entrySet
().
stream
().
sorted
(
(
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
(),
e1
.
getValue
())).
map
(
e
->
String
.
format
(
"%d*%s"
,
e
.
getValue
(),
e
.
getKey
())).
collect
(
Collectors
.
joining
(
", "
))
+
"]"
;
}
/**
...
...
src/main/java/markov/Mail.java
View file @
218bdd37
This diff is collapsed.
Click to expand it.
src/main/java/markov/ShortenerByteHuffmanImpl.java
View file @
218bdd37
package
markov
;
import
markov
.huffman.HuffmanCode
;
import
directory.passive
.huffman.HuffmanCode
;
import
markov.stuff.BitConverter
;
import
markov.stuff.SimpleCountMap
;
import
markov.stuff.Utils
;
import
java.util.Base64
;
...
...
@@ -14,12 +13,11 @@ import java.util.stream.Stream;
public
class
ShortenerByteHuffmanImpl
extends
ShortenerSimpleImpl
{
private
final
HuffmanCode
<
Byte
,
List
<
Boolean
>
,
Boolean
,
SimpleCountMap
.
MutableInt
>
byteCode
;
private
final
HuffmanCode
<
Byte
,
List
<
Boolean
>>
byteCode
;
public
ShortenerByteHuffmanImpl
(
Data
data
,
HuffmanCode
<
Byte
,
List
<
Boolean
>,
Boolean
,
SimpleCountMap
.
MutableInt
>
byteCode
)
{
Data
data
,
HuffmanCode
<
Byte
,
List
<
Boolean
>>
byteCode
)
{
super
(
data
);
this
.
byteCode
=
byteCode
;
}
...
...
src/main/java/markov/ShortenerIntHuffmanImpl.java
View file @
218bdd37
package
markov
;
import
markov
.huffman.HuffmanCode
;
import
directory.passive
.huffman.HuffmanCode
;
import
markov.stuff.BitConverter
;
import
markov.stuff.SimpleCountMap
;
import
java.util.Base64
;
import
java.util.List
;
...
...
@@ -12,12 +11,11 @@ import java.util.stream.Stream;
public
class
ShortenerIntHuffmanImpl
extends
ShortenerSimpleImpl
{
private
final
HuffmanCode
<
Integer
,
List
<
Boolean
>
,
Boolean
,
SimpleCountMap
.
MutableInt
>
intCode
;
private
final
HuffmanCode
<
Integer
,
List
<
Boolean
>>
intCode
;
public
ShortenerIntHuffmanImpl
(
Data
data
,
HuffmanCode
<
Integer
,
List
<
Boolean
>,
Boolean
,
SimpleCountMap
.
MutableInt
>
byteCode
)
{
Data
data
,
HuffmanCode
<
Integer
,
List
<
Boolean
>>
byteCode
)
{
super
(
data
);
this
.
intCode
=
byteCode
;
}
...
...
src/main/java/markov/Tokenizer.java
View file @
218bdd37
...
...
@@ -10,15 +10,45 @@ public class Tokenizer {
public
Stream
<
Stream
<
Token
>>
tokenizeCombined
(
Stream
<
Stream
<
String
>>
input
)
{
return
input
.
map
(
stringStream
->
putMarkers
(
combineToTokens
(
stringStream
.
flatMap
(
s
->
s
.
codePoints
().
boxed
().
map
(
return
input
.
map
(
stringStream
->
putMarkers
(
combineToTokens
(
stringStream
.
flatMap
(
s
->
s
.
codePoints
()
.
boxed
()
.
map
(
this
::
glyphFromCodePoint
)))));
}
private
Stream
<
Token
>
putMarkers
(
Stream
<
Token
>
input
)
{
return
Stream
.
concat
(
Stream
.
of
(
Token
.
START
),
Stream
.
concat
(
input
,
Stream
.
of
(
Token
.
END
)));
}
this
::
glyphFromCodePoint
)))));
public
Stream
<
Token
>
combineToTokens
(
Stream
<
Glyph
>
glyphs
)
{
final
Container
[]
previous
=
new
Container
[]{
null
};
return
Stream
.
concat
(
glyphs
,
Stream
.
of
(
new
Glyph
(
Glyph
.
Type
.
empty
,
""
))).
map
(
g
->
{
Container
o
=
new
Container
(
g
);
o
.
p
=
previous
[
0
];
previous
[
0
]
=
o
;
return
o
;
}).
flatMap
(
go
->
{
if
(
go
.
p
!=
null
&&
go
.
p
.
self
!=
null
&&
!
go
.
self
.
getType
().
equals
(
go
.
p
.
self
.
getType
()))
{
List
<
Glyph
>
tokenGlyphs
=
new
ArrayList
<>();
Container
c
=
go
;
do
{
c
=
c
.
p
;
tokenGlyphs
.
add
(
0
,
c
.
self
);
}
while
(
c
!=
null
&&
c
.
p
!=
null
&&
c
.
p
.
self
!=
null
&&
c
.
self
.
getType
().
equals
(
c
.
p
.
self
.
getType
()));
go
.
p
=
null
;
// memory optimization
return
Stream
.<
List
<
Glyph
>>
builder
().
add
(
tokenGlyphs
).
build
();
}
else
{
return
Stream
.
empty
();
}
}).
map
(
this
::
getTokenFromGlyphs
);
}
public
Stream
<
Stream
<
Token
>>
tokenize
(
Stream
<
String
>
input
)
{
return
input
.
map
(
s
->
putMarkers
(
combineToTokens
(
s
.
codePoints
().
boxed
().
map
(
this
::
glyphFromCodePoint
))));
return
input
.
map
(
s
->
putMarkers
(
combineToTokens
(
s
.
codePoints
()
.
boxed
()
.
map
(
this
::
glyphFromCodePoint
))));
}
private
Glyph
glyphFromCodePoint
(
int
codePoint
)
{
...
...
@@ -27,7 +57,11 @@ public class Tokenizer {
type
=
Glyph
.
Type
.
whitespace
;
}
else
if
(
Character
.
isAlphabetic
(
codePoint
))
{
type
=
Glyph
.
Type
.
word
;
}
else
if
(
Arrays
.
asList
(
Character
.
START_PUNCTUATION
,
Character
.
END_PUNCTUATION
,
Character
.
INITIAL_QUOTE_PUNCTUATION
,
Character
.
FINAL_QUOTE_PUNCTUATION
,
}
else
if
(
Arrays
.
asList
(
Character
.
START_PUNCTUATION
,
Character
.
END_PUNCTUATION
,
Character
.
INITIAL_QUOTE_PUNCTUATION
,
Character
.
FINAL_QUOTE_PUNCTUATION
,
Character
.
OTHER_PUNCTUATION
).
contains
(
Character
.
getType
(
codePoint
)))
{
type
=
Glyph
.
Type
.
punctuation
;
}
...
...
@@ -35,36 +69,6 @@ public class Tokenizer {
return
new
Glyph
(
type
,
value
);
}
private
Stream
<
Token
>
putMarkers
(
Stream
<
Token
>
input
)
{
return
Stream
.
concat
(
Stream
.
of
(
Token
.
START
),
Stream
.
concat
(
input
,
Stream
.
of
(
Token
.
END
)));
}
public
Stream
<
Token
>
combineToTokens
(
Stream
<
Glyph
>
glyphs
)
{
final
Container
[]
previous
=
new
Container
[]{
null
};
return
Stream
.
concat
(
glyphs
,
Stream
.
of
(
new
Glyph
(
Glyph
.
Type
.
empty
,
""
)))
.
map
(
g
->
{
Container
o
=
new
Container
(
g
);
o
.
p
=
previous
[
0
];
previous
[
0
]
=
o
;
return
o
;
}).
flatMap
(
go
->
{
if
(
go
.
p
!=
null
&&
go
.
p
.
self
!=
null
&&
!
go
.
self
.
getType
().
equals
(
go
.
p
.
self
.
getType
()))
{
List
<
Glyph
>
tokenGlyphs
=
new
ArrayList
<>();
Container
c
=
go
;
do
{
c
=
c
.
p
;
tokenGlyphs
.
add
(
0
,
c
.
self
);
}
while
(
c
!=
null
&&
c
.
p
!=
null
&&
c
.
p
.
self
!=
null
&&
c
.
self
.
getType
().
equals
(
c
.
p
.
self
.
getType
()));
go
.
p
=
null
;
// memory optimization
return
Stream
.<
List
<
Glyph
>>
builder
().
add
(
tokenGlyphs
).
build
();
}
else
{
return
Stream
.
empty
();
}
}).
map
(
this
::
getTokenFromGlyphs
);
}
private
Token
getTokenFromGlyphs
(
List
<
Glyph
>
l
)
{
String
content
=
l
.
stream
().
map
(
Glyph:
:
getContent
).
collect
(
Collectors
.
joining
());
Glyph
.
Type
type
=
l
.
get
(
0
).
getType
();
...
...
@@ -72,11 +76,11 @@ public class Tokenizer {
}
private
static
class
Container
{
final
Glyph
self
;
Container
p
;
Container
(
Glyph
self
)
{
this
.
self
=
self
;
}
final
Glyph
self
;
Container
p
;
}
}
src/main/java/markov/huffman/BitConverter.java
0 → 100644
View file @
218bdd37
package
markov
.
huffman
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
public
class
BitConverter
{
public
byte
[]
toBytes
(
List
<
Boolean
>
list
)
{
int
listLength
=
list
.
size
();
int
lengthInBit
=
listLength
+
3
;
int
arrayLength
=
Math
.
max
(
1
,
(
int
)
Math
.
ceil
(
lengthInBit
/
8
d
));
byte
[]
bytes
=
new
byte
[
arrayLength
];
int
lastByteNotEmpty
=
lengthInBit
%
8
;
// temp variable to not constantly write to heap
byte
tmpByte
=
((
byte
)
lastByteNotEmpty
);
int
i
=
0
;
// set 3 bits for the lastByteNotEmpty number
for
(
int
j
=
3
;
j
<
8
&&
i
<
listLength
;
j
++,
i
++)
{
tmpByte
=
setBit
(
list
,
tmpByte
,
i
,
j
);
}
bytes
[
0
]
=
tmpByte
;
// set rest of the bits
for
(
int
j
=
1
;
j
<
arrayLength
;
j
++)
{
tmpByte
=
0
;
for
(
int
l
=
0
;
l
<
8
&&
i
<
listLength
;
l
++,
i
++)
{
tmpByte
=
setBit
(
list
,
tmpByte
,
i
,
l
);
}
bytes
[
j
]
=
tmpByte
;
}
return
bytes
;
}
private
byte
setBit
(
List
<
Boolean
>
list
,
byte
tmpByte
,
int
i
,
int
j
)
{
tmpByte
|=
(
byte
)
(
list
.
get
(
i
)
?
1
:
0
)
<<
j
;
return
tmpByte
;
}
public
List
<
Boolean
>
toBits
(
byte
[]
array
)
{
int
arrayLength
=
array
.
length
;
if
(
arrayLength
==
0
)
{
return
Collections
.
emptyList
();
}
// determine lastByteNotEmpty bits
byte
tmpByte
=
array
[
0
];
int
lastByteNotEmpty
=
tmpByte
&
0b00000111
;
int
listSize
=
8
*
arrayLength
+
(
lastByteNotEmpty
==
0
?
-
3
:
lastByteNotEmpty
-
11
);
if
(
arrayLength
==
1
&&
lastByteNotEmpty
<
4
&&
lastByteNotEmpty
!=
0
)
{
throw
new
IllegalArgumentException
(
"corrupted data"
);
}
return
toListPrimitive
(
listSize
,
array
);
}
private
static
List
<
Boolean
>
toListPrimitive
(
int
listSize
,
byte
[]
array
)
{
List
<
Boolean
>
list
=
new
ArrayList
<>(
listSize
);
for
(
int
i
=
3
;
i
<
listSize
+
3
;
i
++)
{
list
.
add
((
array
[
i
/
8
]
&
1
<<
(
i
%
8
))
>>
i
%
8
==
1
);
}
return
list
;
}
}
src/main/java/markov/huffman/ByteHuffmanCodeBuilder.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
import
java.util.ArrayList
;
import
java.util.Iterator
;
import
java.util.List
;
import
java.util.function.BiFunction
;
import
java.util.function.Function
;
import
java.util.function.Supplier
;
public
class
ByteHuffmanCodeBuilder
<
ContentType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
extends
HuffmanCodeBuilder
<
ContentType
,
List
<
Boolean
>,
Boolean
,
FrequencyType
>
{
private
static
final
Supplier
<
List
<
Boolean
>>
rootCodeSupplier
=
ArrayList:
:
new
;
private
static
final
Supplier
<
Boolean
>
leftGlyph
=
()
->
false
;
private
static
final
Supplier
<
Boolean
>
rightGlyph
=
()
->
true
;
private
static
final
BiFunction
<
List
<
Boolean
>,
Boolean
,
List
<
Boolean
>>
combiner
=
(
l
,
g
)
->
{
l
=
new
ArrayList
<>(
l
);
l
.
add
(
g
);
return
l
;
};
private
static
final
Function
<
List
<
Boolean
>,
Iterator
<
Boolean
>>
splitter
=
List:
:
iterator
;
public
ByteHuffmanCodeBuilder
()
{
super
(
rootCodeSupplier
,
leftGlyph
,
rightGlyph
,
combiner
,
splitter
,
(
n
,
b
)
->
b
?
n
.
getRight
()
:
n
.
getLeft
());
}
}
src/main/java/markov/huffman/FrequenceType.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
public
interface
FrequenceType
<
X
extends
FrequenceType
>
extends
Comparable
<
X
>
{
X
add
(
X
b
);
boolean
isGreaterZero
();
}
src/main/java/markov/huffman/HuffmanCode.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
import
java.io.Serializable
;
import
java.util.ArrayList
;
import
java.util.Iterator
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.function.BiFunction
;
import
java.util.function.Function
;
public
class
HuffmanCode
<
ContentType
,
SequenceType
,
GlyphType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
implements
Serializable
{
private
final
Map
<
ContentType
,
SequenceType
>
codes
;
private
final
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
tree
;
private
final
Function
<
SequenceType
,
Iterator
<
GlyphType
>>
splitter
;
private
final
BiFunction
<
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>,
GlyphType
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
leftRightDecider
;
public
HuffmanCode
(
Map
<
ContentType
,
SequenceType
>
codes
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
tree
,
Function
<
SequenceType
,
Iterator
<
GlyphType
>>
splitter
,
BiFunction
<
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>,
GlyphType
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
leftRightDecider
)
{
this
.
codes
=
codes
;
this
.
tree
=
tree
;
this
.
splitter
=
splitter
;
this
.
leftRightDecider
=
leftRightDecider
;
}
public
SequenceType
encode
(
ContentType
t
)
{
return
codes
.
get
(
t
);
}
public
List
<
ContentType
>
decode
(
SequenceType
v
)
{
List
<
ContentType
>
ret
=
new
ArrayList
<>();
Iterator
<
GlyphType
>
iterator
=
splitter
.
apply
(
v
);
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
node
=
tree
;
while
(
iterator
.
hasNext
())
{
GlyphType
glyph
=
iterator
.
next
();
node
=
leftRightDecider
.
apply
((
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>)
node
,
glyph
);
if
(
node
instanceof
HuffmanLeaf
)
{
ret
.
add
(((
HuffmanLeaf
<
ContentType
,
SequenceType
,
FrequencyType
>)
node
).
getValue
());
node
=
tree
;
}
}
return
ret
;
}
}
\ No newline at end of file
src/main/java/markov/huffman/HuffmanCodeBuilder.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
import
java.util.HashMap
;
import
java.util.Iterator
;
import
java.util.Map
;
import
java.util.PriorityQueue
;
import
java.util.function.BiFunction
;
import
java.util.function.Function
;
import
java.util.function.Supplier
;
/**
* Used to build a HUffman Code instance to code a given ContentType into a SequenceType,
* and decode a sequence of SequenceTypes into a sequence of ContentTypes,
* using the given Frequency type to determine the distribution.
*
* @param <ContentType>
* @param <SequenceType>
* @param <GlyphType>
* @param <FrequencyType>
*/
public
class
HuffmanCodeBuilder
<
ContentType
,
SequenceType
,
GlyphType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
{
private
final
Supplier
<
SequenceType
>
rootCodeSupplier
;
private
final
Supplier
<
GlyphType
>
leftGlyph
;
private
final
Supplier
<
GlyphType
>
rightGlyph
;
private
final
BiFunction
<
SequenceType
,
GlyphType
,
SequenceType
>
combiner
;
private
final
Function
<
SequenceType
,
Iterator
<
GlyphType
>>
splitter
;
private
final
BiFunction
<
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>,
GlyphType
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
leftRightDecider
;
private
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>
tree
;
private
final
Map
<
ContentType
,
SequenceType
>
codes
=
new
HashMap
<>();
public
HuffmanCodeBuilder
(
Supplier
<
SequenceType
>
rootCodeSupplier
,
Supplier
<
GlyphType
>
leftGlyph
,
Supplier
<
GlyphType
>
rightGlyph
,
BiFunction
<
SequenceType
,
GlyphType
,
SequenceType
>
combiner
,
Function
<
SequenceType
,
Iterator
<
GlyphType
>>
splitter
,
BiFunction
<
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>,
GlyphType
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
leftRightDecider
)
{
this
.
rootCodeSupplier
=
rootCodeSupplier
;
this
.
leftGlyph
=
leftGlyph
;
this
.
rightGlyph
=
rightGlyph
;
this
.
combiner
=
combiner
;
this
.
splitter
=
splitter
;
this
.
leftRightDecider
=
leftRightDecider
;
}
public
HuffmanCode
<
ContentType
,
SequenceType
,
GlyphType
,
FrequencyType
>
generateCode
(
Map
<
ContentType
,
FrequencyType
>
frequencies
)
{
tree
=
buildTree
(
frequencies
);
tree
.
setCode
(
rootCodeSupplier
.
get
());
generateCodes
(
tree
);
return
new
HuffmanCode
<>(
codes
,
tree
,
splitter
,
leftRightDecider
);
}
private
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>
buildTree
(
Map
<
ContentType
,
FrequencyType
>
frequencies
)
{
PriorityQueue
<
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
trees
=
new
PriorityQueue
<>();
for
(
Map
.
Entry
<
ContentType
,
FrequencyType
>
e
:
frequencies
.
entrySet
())
{
if
(
e
.
getValue
().
isGreaterZero
())
trees
.
offer
(
new
HuffmanLeaf
<>(
e
.
getValue
(),
e
.
getKey
()));
}
while
(
trees
.
size
()
>
1
)
{
HuffmanTree
a
=
trees
.
poll
();
HuffmanTree
b
=
trees
.
poll
();
trees
.
offer
(
new
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>(
a
,
b
));
}
return
(
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>)
trees
.
poll
();
}
private
void
generateCodes
(
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
child
)
{
if
(
child
==
null
)
throw
new
IllegalArgumentException
(
new
NullPointerException
());
if
(
child
instanceof
HuffmanLeaf
)
{
HuffmanLeaf
<
ContentType
,
SequenceType
,
FrequencyType
>
leaf
=
(
HuffmanLeaf
<
ContentType
,
SequenceType
,
FrequencyType
>)
child
;
codes
.
put
(
leaf
.
getValue
(),
leaf
.
getCode
());
}
else
if
(
child
instanceof
HuffmanNode
)
{
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>
node
=
(
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
>)
child
;
// traverse left
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
left
=
node
.
getLeft
();
SequenceType
leftCode
=
combiner
.
apply
(
child
.
getCode
(),
leftGlyph
.
get
());
left
.
setCode
(
leftCode
);
generateCodes
(
left
);
// traverse right
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
right
=
node
.
getRight
();
SequenceType
rightCode
=
combiner
.
apply
(
child
.
getCode
(),
rightGlyph
.
get
());
right
.
setCode
(
rightCode
);
generateCodes
(
right
);
}
}
}
src/main/java/markov/huffman/HuffmanLeaf.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
class
HuffmanLeaf
<
ContentType
,
SequenceType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
extends
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
{
private
final
ContentType
value
;
// the character this leaf represents
ContentType
getValue
()
{
return
value
;
}
HuffmanLeaf
(
FrequencyType
freq
,
ContentType
val
)
{
super
(
freq
);
value
=
val
;
}
}
src/main/java/markov/huffman/HuffmanNode.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
class
HuffmanNode
<
ContentType
,
SequenceType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
extends
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
{
private
final
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
left
;
private
final
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
right
;
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
getLeft
()
{
return
left
;
}
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
getRight
()
{
return
right
;
}
HuffmanNode
(
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
l
,
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
r
)
{
super
(
l
.
getFrequency
().
add
(
r
.
getFrequency
()));
left
=
l
;
right
=
r
;
}
}
src/main/java/markov/huffman/HuffmanTree.java
deleted
100644 → 0
View file @
0d976c4d
package
markov
.
huffman
;
abstract
class
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
extends
FrequenceType
<
FrequencyType
>>
implements
Comparable
<
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>>
{
private
final
FrequencyType
frequency
;
private
SequenceType
code
=
null
;
HuffmanTree
(
FrequencyType
freq
)
{
frequency
=
freq
;
}
FrequencyType
getFrequency
()
{
return
frequency
;
}
public
SequenceType
getCode
()
{
return
code
;
}
public
void
setCode
(
SequenceType
code
)
{
this
.
code
=
code
;
}
@Override
public
int
compareTo
(
HuffmanTree
<
ContentType
,
SequenceType
,
FrequencyType
>
o
)
{
return
this
.
frequency
.
compareTo
(
o
.
frequency
);
}
}
src/main/java/markov/stuff/SimpleCountMap.java
View file @
218bdd37
package
markov
.
stuff
;
import
markov.huffman.FrequenceType
;
import
java.io.Serializable
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.stream.Stream
;
public
class
SimpleCountMap
<
T
>
extends
HashMap
<
T
,
SimpleCountMap
.
MutableInt
>
implements
CountMap
<
T
>
{
public
static
class
MutableInt
implements
FrequenceType
<
MutableInt
>,
Serializable
{
private
int
value
=
1
;
public
MutableInt
(
int
value
)
{
this
.
value
=
value
;
}
public
void
increment
()
{
++
value
;
}
public
int
get
()
{
return
value
;
}
@Override
public
MutableInt
add
(
MutableInt
b
)
{
return
new
MutableInt
(
get
()
+
b
.
get
());
}
@Override
public
boolean
isGreaterZero
()
{
return
get
()
>
0
;
}
@Override
public
int
compareTo
(
MutableInt
o
)
{
return
Integer
.
compare
(
get
(),
o
.
get
());
}
}
public
class
SimpleCountMap
<
T
>
extends
HashMap
<
T
,
SimpleCountMap
.
MutableInt
>
implements
CountMap
<
T
>
{
public
SimpleCountMap
(
int
initialCapacity
,
float
loadFactor
)
{
super
(
initialCapacity
,
loadFactor
);
}
public
SimpleCountMap
(
int
initialCapacity
)
{
super
(
initialCapacity
);
}
...
...
@@ -66,11 +35,49 @@ public class SimpleCountMap<T> extends HashMap<T, SimpleCountMap.MutableInt> imp
@Override
public
Stream
<
Map
.
Entry
<
T
,
MutableInt
>>
result
()
{
return
entrySet
().
stream
().
sorted
((
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
().
get
(),
e1
.
getValue
().
get
()));
return
entrySet
().
stream
().
sorted
((
e1
,
e2
)
->
Integer
.
compare
(
e2
.
getValue
().
get
(),
e1
.
getValue
().
get
()));
}
@Override
public
HashMap
<
T
,
MutableInt
>
asMap
()
{
return
this
;
}
public
static
class
MutableInt
implements
Serializable
{
private
int
value
=
1
;
public
MutableInt
(
int
value
)
{
this
.
value
=
value
;
}
public
void
increment
()
{
++
value
;
}
public
int
get
()
{
return
value
;
}
public
static
class
FrequencySupport
implements
directory
.
passive
.
huffman
.
FrequencySupport
<
MutableInt
>
{
@Override
public
MutableInt
add
(
MutableInt
mutableInt
,
MutableInt
x1
)
{
return
new
MutableInt
(
mutableInt
.
get
()
+
x1
.
get
());
}
@Override
public
int
compare
(
MutableInt
mutableInt
,
MutableInt
x1
)
{
return
Integer
.
compare
(
mutableInt
.
get
(),
x1
.
get
());
}
@Override
public
boolean
isGreaterZero
(
MutableInt
mutableInt
)
{
return
mutableInt
.
get
()
>
0
;
}
}
}
}
\ No newline at end of file
src/test/java/markov/stuff/BitConverterTest.java
View file @
218bdd37
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment