1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
|
package parallelai.spyglass.hbase;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Serializable;
import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.InputSplit;
import com.sun.tools.javac.resources.version;
import parallelai.spyglass.hbase.HBaseConstants.SourceMode;
public class HBaseTableSplit implements InputSplit, Comparable<HBaseTableSplit>, Serializable {
private final Log LOG = LogFactory.getLog(HBaseTableSplit.class);
private byte [] m_tableName = null;
private byte [] m_startRow = null;
private byte [] m_endRow = null;
private String m_regionLocation = null;
private TreeSet<String> m_keyList = null;
private SourceMode m_sourceMode = SourceMode.EMPTY;
private boolean m_endRowInclusive = true;
private int m_versions = 1;
private boolean m_useSalt = false;
/** default constructor */
public HBaseTableSplit() {
this(HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY,
HConstants.EMPTY_BYTE_ARRAY, "", SourceMode.EMPTY, false);
}
/**
* Constructor
* @param tableName
* @param startRow
* @param endRow
* @param location
*/
public HBaseTableSplit(final byte [] tableName, final byte [] startRow, final byte [] endRow,
final String location, final SourceMode sourceMode, final boolean useSalt) {
this.m_tableName = tableName;
this.m_startRow = startRow;
this.m_endRow = endRow;
this.m_regionLocation = location;
this.m_sourceMode = sourceMode;
this.m_useSalt = useSalt;
}
public HBaseTableSplit( final byte [] tableName, final TreeSet<String> keyList, int versions, final String location, final SourceMode sourceMode, final boolean useSalt ) {
this.m_tableName = tableName;
this.m_keyList = keyList;
this.m_versions = versions;
this.m_sourceMode = sourceMode;
this.m_regionLocation = location;
this.m_useSalt = useSalt;
}
/** @return table name */
public byte [] getTableName() {
return this.m_tableName;
}
/** @return starting row key */
public byte [] getStartRow() {
return this.m_startRow;
}
/** @return end row key */
public byte [] getEndRow() {
return this.m_endRow;
}
public boolean getEndRowInclusive() {
return m_endRowInclusive;
}
public void setEndRowInclusive(boolean isInclusive) {
m_endRowInclusive = isInclusive;
}
/** @return list of keys to get */
public TreeSet<String> getKeyList() {
return m_keyList;
}
public int getVersions() {
return m_versions;
}
/** @return get the source mode */
public SourceMode getSourceMode() {
return m_sourceMode;
}
public boolean getUseSalt() {
return m_useSalt;
}
/** @return the region's hostname */
public String getRegionLocation() {
LOG.debug("REGION GETTER : " + m_regionLocation);
return this.m_regionLocation;
}
public String[] getLocations() {
LOG.debug("REGION ARRAY : " + m_regionLocation);
return new String[] {this.m_regionLocation};
}
@Override
public long getLength() {
// Not clear how to obtain this... seems to be used only for sorting splits
return 0;
}
@Override
public void readFields(DataInput in) throws IOException {
LOG.debug("READ ME : " + in.toString());
this.m_tableName = Bytes.readByteArray(in);
this.m_regionLocation = Bytes.toString(Bytes.readByteArray(in));
this.m_sourceMode = SourceMode.valueOf(Bytes.toString(Bytes.readByteArray(in)));
this.m_useSalt = Bytes.toBoolean(Bytes.readByteArray(in));
switch(this.m_sourceMode) {
case SCAN_RANGE:
this.m_startRow = Bytes.readByteArray(in);
this.m_endRow = Bytes.readByteArray(in);
this.m_endRowInclusive = Bytes.toBoolean(Bytes.readByteArray(in));
break;
case GET_LIST:
this.m_versions = Bytes.toInt(Bytes.readByteArray(in));
this.m_keyList = new TreeSet<String>();
int m = Bytes.toInt(Bytes.readByteArray(in));
for( int i = 0; i < m; i++) {
this.m_keyList.add(Bytes.toString(Bytes.readByteArray(in)));
}
break;
}
LOG.debug("READ and CREATED : " + this);
}
@Override
public void write(DataOutput out) throws IOException {
LOG.debug("WRITE : " + this);
Bytes.writeByteArray(out, this.m_tableName);
Bytes.writeByteArray(out, Bytes.toBytes(this.m_regionLocation));
Bytes.writeByteArray(out, Bytes.toBytes(this.m_sourceMode.name()));
Bytes.writeByteArray(out, Bytes.toBytes(this.m_useSalt));
switch( this.m_sourceMode ) {
case SCAN_RANGE:
Bytes.writeByteArray(out, this.m_startRow);
Bytes.writeByteArray(out, this.m_endRow);
Bytes.writeByteArray(out, Bytes.toBytes(this.m_endRowInclusive));
break;
case GET_LIST:
Bytes.writeByteArray(out, Bytes.toBytes(m_versions));
Bytes.writeByteArray(out, Bytes.toBytes(this.m_keyList.size()));
for( String k: this.m_keyList ) {
Bytes.writeByteArray(out, Bytes.toBytes(k));
}
break;
}
LOG.debug("WROTE : " + out.toString());
}
@Override
public String toString() {
return String.format("Table Name (%s) Region (%s) Source Mode (%s) Start Key (%s) Stop Key (%s) Key List Size (%s) Versions (%s) Use Salt (%s)",
Bytes.toString(m_tableName), m_regionLocation, m_sourceMode, Bytes.toString(m_startRow), Bytes.toString(m_endRow),
(m_keyList != null) ? m_keyList.size() : "EMPTY", m_versions, m_useSalt);
}
@Override
public int compareTo(HBaseTableSplit o) {
switch(m_sourceMode) {
case SCAN_ALL:
case SCAN_RANGE:
return Bytes.compareTo(getStartRow(), o.getStartRow());
case GET_LIST:
return m_keyList.equals( o.getKeyList() ) ? 0 : -1;
default:
return -1;
}
}
}
|