1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.omid.committable.hbase;
19
20 import org.apache.phoenix.thirdparty.com.google.common.base.Preconditions;
21
22 import org.apache.commons.lang.ArrayUtils;
23 import org.apache.hadoop.conf.Configuration;
24 import org.apache.hadoop.hbase.util.Bytes;
25
26 import java.io.IOException;
27 import java.util.Arrays;
28
29 /**
30 * This class contains only the required behavior of the original
31 * org.apache.hadoop.hbase.util.RegionSplitter class to avoid
32 * having a reference to hbase-testing-util, which transitively
33 * imports hbase-server causing dependency conflicts for this module.
34 */
35 public class RegionSplitter {
36
37 /**
38 * A generic interface for the RegionSplitter code to use for all it's functionality. Note that the original authors
39 * of this code use see org.apache.hadoop.hbase.util.HexStringSplit to partition their table and set it as default, but provided this for
40 * your custom algorithm. To use, create a new derived class from this interface and call
41 * see RegionSplitter#createPresplitTable or
42 * see RegionSplitter#rollingSplit(String, SplitAlgorithm, Configuration)} with the argument splitClassName
43 * giving the name of your class.
44 */
45 public interface SplitAlgorithm {
46
47 /**
48 * Split a pre-existing region into 2 regions.
49 *
50 * @param start
51 * first row (inclusive)
52 * @param end
53 * last row (exclusive)
54 * @return the split row to use
55 */
56 byte[] split(byte[] start, byte[] end);
57
58 /**
59 * Split an entire table.
60 *
61 * @param numRegions
62 * number of regions to split the table into
63 *
64 * @throws RuntimeException
65 * user input is validated at this time. may throw a runtime exception in response to a parse
66 * failure
67 * @return array of split keys for the initial regions of the table. The length of the returned array should be
68 * numRegions-1.
69 */
70 byte[][] split(int numRegions);
71
72 /**
73 * In HBase, the first row is represented by an empty byte array. This might cause problems with your split
74 * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
75 *
76 * @return your representation of your first row
77 */
78 byte[] firstRow();
79
80 /**
81 * In HBase, the last row is represented by an empty byte array. This might cause problems with your split
82 * algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
83 *
84 * @return your representation of your last row
85 */
86 byte[] lastRow();
87
88 /**
89 * In HBase, the last row is represented by an empty byte array. Set this value to help the split code
90 * understand how to evenly divide the first region.
91 *
92 * @param userInput
93 * raw user input (may throw RuntimeException on parse failure)
94 */
95 void setFirstRow(String userInput);
96
97 /**
98 * In HBase, the last row is represented by an empty byte array. Set this value to help the split code
99 * understand how to evenly divide the last region. Note that this last row is inclusive for all rows sharing
100 * the same prefix.
101 *
102 * @param userInput raw user input (may throw RuntimeException on parse failure)
103 */
104 void setLastRow(String userInput);
105
106 /**
107 * @param input
108 * user or file input for row
109 * @return byte array representation of this row for HBase
110 */
111 byte[] strToRow(String input);
112
113 /**
114 * @param row byte array representing a row in HBase
115 * @return String to use for debug and file printing
116 */
117 String rowToStr(byte[] row);
118
119 /**
120 * @return the separator character to use when storing / printing the row
121 */
122 String separator();
123
124 /**
125 * Set the first row
126 *
127 * @param userInput
128 * byte array of the row key.
129 */
130 void setFirstRow(byte[] userInput);
131
132 /**
133 * Set the last row
134 *
135 * @param userInput
136 * byte array of the row key.
137 */
138 void setLastRow(byte[] userInput);
139 }
140
141 /**
142 * @param conf Hbase conf
143 * @param splitClassName split class name to be used
144 * @return an instance of SplitAlgorithm
145 * @throws IOException if the specified SplitAlgorithm class couldn't be instantiated
146 */
147 public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
148 String splitClassName) throws IOException {
149 Class<?> splitClass;
150
151 // For split algorithms builtin to RegionSplitter, the user can specify
152 // their simple class name instead of a fully qualified class name.
153 if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
154 splitClass = UniformSplit.class;
155 } else {
156 try {
157 splitClass = conf.getClassByName(splitClassName);
158 } catch (ClassNotFoundException e) {
159 throw new IOException("Couldn't load split class " + splitClassName, e);
160 }
161 if (splitClass == null) {
162 throw new IOException("Failed loading split class " + splitClassName);
163 }
164 if (!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
165 throw new IOException(
166 "Specified split class doesn't implement SplitAlgorithm");
167 }
168 }
169 try {
170 return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
171 } catch (Exception e) {
172 throw new IOException("Problem loading split algorithm: ", e);
173 }
174 }
175
176 /**
177 * A SplitAlgorithm that divides the space of possible keys evenly. Useful when the keys are approximately uniform
178 * random bytes (e.g. hashes). Rows are raw byte values in the range [00..FF] and are right-padded with zeros
179 * to keep the same memcmp() order. This is the natural algorithm to use for a byte[] environment and saves space,
180 * but is not necessarily the easiest for readability.
181 */
182 public static class UniformSplit implements SplitAlgorithm {
183
184 static final byte xFF = (byte) 0xFF;
185 byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
186 byte[] lastRowBytes =
187 new byte[]{xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
188
189 public byte[] split(byte[] start, byte[] end) {
190 return Bytes.split(start, end, 1)[1];
191 }
192
193 @Override
194 public byte[][] split(int numRegions) {
195 Preconditions.checkArgument(
196 Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
197 "last row (%s) is configured less than first row (%s)",
198 Bytes.toStringBinary(lastRowBytes),
199 Bytes.toStringBinary(firstRowBytes));
200
201 byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
202 numRegions - 1);
203 Preconditions.checkState(splits != null,
204 "Could not split region with given user input: " + this);
205
206 // remove endpoints, which are included in the splits list
207 return Arrays.copyOfRange(splits, 1, splits.length - 1);
208 }
209
210 @Override
211 public byte[] firstRow() {
212 return firstRowBytes;
213 }
214
215 @Override
216 public byte[] lastRow() {
217 return lastRowBytes;
218 }
219
220 @Override
221 public void setFirstRow(String userInput) {
222 firstRowBytes = Bytes.toBytesBinary(userInput);
223 }
224
225 @Override
226 public void setLastRow(String userInput) {
227 lastRowBytes = Bytes.toBytesBinary(userInput);
228 }
229
230 @Override
231 public void setFirstRow(byte[] userInput) {
232 firstRowBytes = userInput;
233 }
234
235 @Override
236 public void setLastRow(byte[] userInput) {
237 lastRowBytes = userInput;
238 }
239
240 @Override
241 public byte[] strToRow(String input) {
242 return Bytes.toBytesBinary(input);
243 }
244
245 @Override
246 public String rowToStr(byte[] row) {
247 return Bytes.toStringBinary(row);
248 }
249
250 @Override
251 public String separator() {
252 return ",";
253 }
254
255 @Override
256 public String toString() {
257 return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
258 + "," + rowToStr(lastRow()) + "]";
259 }
260 }
261 }