Quadcap Embeddable Database

com/quadcap/sql/index/Bnode.java

Go to the documentation of this file.
00001 package com.quadcap.sql.index; 00002 00003 /* Copyright 1997 - 2003 Quadcap Software. All rights reserved. 00004 * 00005 * This software is distributed under the Quadcap Free Software License. 00006 * This software may be used or modified for any purpose, personal or 00007 * commercial. Open Source redistributions are permitted. Commercial 00008 * redistribution of larger works derived from, or works which bundle 00009 * this software requires a "Commercial Redistribution License"; see 00010 * http://www.quadcap.com/purchase. 00011 * 00012 * Redistributions qualify as "Open Source" under one of the following terms: 00013 * 00014 * Redistributions are made at no charge beyond the reasonable cost of 00015 * materials and delivery. 00016 * 00017 * Redistributions are accompanied by a copy of the Source Code or by an 00018 * irrevocable offer to provide a copy of the Source Code for up to three 00019 * years at the cost of materials and delivery. Such redistributions 00020 * must allow further use, modification, and redistribution of the Source 00021 * Code under substantially the same terms as this license. 00022 * 00023 * Redistributions of source code must retain the copyright notices as they 00024 * appear in each source code file, these license terms, and the 00025 * disclaimer/limitation of liability set forth as paragraph 6 below. 00026 * 00027 * Redistributions in binary form must reproduce this Copyright Notice, 00028 * these license terms, and the disclaimer/limitation of liability set 00029 * forth as paragraph 6 below, in the documentation and/or other materials 00030 * provided with the distribution. 00031 * 00032 * The Software is provided on an "AS IS" basis. No warranty is 00033 * provided that the Software is free of defects, or fit for a 00034 * particular purpose. 00035 * 00036 * Limitation of Liability. Quadcap Software shall not be liable 00037 * for any damages suffered by the Licensee or any third party resulting 00038 * from use of the Software. 00039 */ 00040 00041 import java.io.IOException; 00042 import java.io.PrintStream; 00043 00044 import com.quadcap.sql.file.Block; 00045 import com.quadcap.sql.file.BlockFile; 00046 import com.quadcap.sql.file.ByteUtil; 00047 import com.quadcap.sql.file.SubPageManager; 00048 00049 import com.quadcap.util.Debug; 00050 import com.quadcap.util.Util; 00051 00052 /** 00053 * This class represents a node in the btree. 00054 * <p> 00055 * 00056 * <b>Things to think about:</b> 00057 * <p> 00058 * <UL> 00059 * <LI>Key Compression using dictionary? With a large enough node, say 00060 * 4 or 8 k, you could perform a block compression 00061 * <LI>Key Endianness (X.500 keys are LSB, most others are MSB) 00062 * <LI>Large data stored in overflow areas (blockstream) 00063 * <LI>Thread-safeness 00064 * <LI>Improve block garbage collection (it appears that some deleted 00065 * keys aren't properly counted as garbage!) 00066 * </UL> 00067 * 00068 * @author Stan Bailes 00069 */ 00070 00071 // struct node { 00072 // int count; /* number of items in this node */ 00073 // int dataBottom; /* data area -- grows downward */ 00074 // int flags; /* isRoot, */ 00075 // int[] keyIndex; /* keyrefs, grows up */ 00076 // byte[] empty; /* ... empty in the middle ... */ 00077 // byte[] data; /* key + data, grows down */ 00078 // } 00079 00080 public class Bnode { 00081 //#ifdef DEBUG 00082 static final boolean paranoid = false; 00083 //#endif 00084 00085 Btree tree; 00086 BlockFile file; 00087 long blockRef; 00088 00089 /** The number of octets in the representation of a reference to 00090 * a byte offset in the data area of this block. Currently, 00091 * bnode blocks can't be larger than 65536 because of the 00092 * two byte ref size. 00093 */ 00094 public static final int REF_SIZE = 2; 00095 00096 /** Number of key/data pairs in this node. */ 00097 static final int fCount = 0; 00098 00099 /** Bottom of data area -- grows downward. */ 00100 static final int fDataBOS = 4; 00101 00102 /** Flags */ 00103 static final int fFlags = 8; 00104 static final int IS_LEAF = 0x0001; 00105 static final int BNODE_MAGICF = 0xff00; 00106 static final int BNODE_MAGICV = 0xbd00; 00107 00108 /** 00109 * Amount of space that could be reclaimed by repacking the 00110 * node. When keys are deleted, we don't immediately remove 00111 * the key/data pair in the data area. Instead, we remove the 00112 * pointer the pair from the key index list, and record (here) 00113 * the size of the key/data pair. Later, when the block becomes 00114 * full, we reclaim this space by repacking the data area to 00115 * eliminate the holes. 00116 */ 00117 static final int fGarbage = 12; 00118 00119 static final int fParent = 16; 00120 00121 /** Start of key indices. */ 00122 static final int fIndices = 24; 00123 00124 00125 /** 00126 * Construct a node from a buffer. 00127 * @param tree the Btree containing this node 00128 * @param blockRef the block number in <b>file</b> of this node. 00129 */ 00130 public Bnode(Btree tree, long blockRef) { 00131 //#ifdef DEBUG 00132 if (Trace.bit(2)) { 00133 Debug.println("Bnode(" + blockRef + ")"); 00134 } 00135 //#endif 00136 this.tree = tree; 00137 this.file = tree.file; 00138 this.blockRef = blockRef; 00139 } 00140 00141 /** <hr><font size =+1>Index Interface</font> <!----------------------!> */ 00142 00143 /** 00144 * Return the number of keys in this subtree. This implicitly only 00145 * counts entries in the leaf nodes. 00146 */ 00147 public int size() throws IOException { 00148 Block b = getBlock(); 00149 try { 00150 return size(b); 00151 } finally { 00152 b.decrRefCount(); 00153 } 00154 } 00155 00156 public int size(Block b) throws IOException { 00157 int tot = 0; 00158 for (int i = 0; i < getCount(b); i++) { 00159 Block c = getBlock(blockRef(b, i)); 00160 try { 00161 if (isLeaf(c)) { 00162 tot += getCount(c); 00163 } else { 00164 tot += size(c); 00165 } 00166 } finally { 00167 c.decrRefCount(); 00168 } 00169 } 00170 return tot; 00171 } 00172 00173 /** 00174 * Implement the get operation. 00175 * @param key the search key 00176 * @return if the get succeeds, the data is returned as a byte array; 00177 * if the get fails, <b>null</b> is returned. 00178 */ 00179 public final byte[] get(byte[] key, int klen) throws IOException { 00180 //#ifdef DEBUG 00181 if (Trace.bit(2)) { 00182 Debug.println("Bnode[" + blockRef + "] get(" + 00183 Util.hexBytes(key) + ")"); 00184 } 00185 //#endif 00186 Block b = getBlock(); 00187 byte[] ret = null; 00188 try { 00189 ret = get(b, key, klen); 00190 } finally { 00191 b.decrRefCount(); 00192 } 00193 return ret; 00194 } 00195 00196 // /** 00197 // * Leaf level get operation 00198 // */ 00199 // public final int get(byte[] key, int koff, 00200 // byte[] data, int off, int len) throws IOException { 00201 // Block b = getBlock(); 00202 // try { 00203 // return get(b, key, koff, data, off, len); 00204 // } finally { 00205 // b.decrRefCount(); 00206 // } 00207 // } 00208 00209 /** 00210 * Implement the set operation. 00211 * @param key the key 00212 * @param data the data 00213 */ 00214 public final boolean set(byte[] key, int klen, 00215 byte[] data, int doff, int dlen, 00216 boolean insOk, boolean updOk) throws IOException { 00217 //#ifdef DEBUG 00218 if (Trace.bit(2)) { 00219 Debug.println("[" + blockRef + "] set(" + 00220 Util.hexBytes(key) + ", " + 00221 Util.hexBytes(data) + ")"); 00222 } 00223 //#endif 00224 Block b = getBlock(); 00225 try { 00226 return set(b, key, klen, data, doff, dlen, insOk, updOk); 00227 } finally { 00228 b.decrRefCount(); 00229 } 00230 } 00231 00232 /** 00233 * Implement the delete operation. 00234 * @param key the key 00235 */ 00236 public final boolean delete(byte[] key) throws IOException { 00237 Block b = getBlock(); 00238 try { 00239 return delete(b, key); 00240 } finally { 00241 b.decrRefCount(); 00242 } 00243 } 00244 00245 /** 00246 * A helper function to perform a single-level of the recursive search. 00247 * @param b the block to be searched. 00248 * @param key the search key 00249 * @return if found, the data, else null 00250 */ 00251 final byte[] get(Block b, byte[] key, int klen) throws IOException { 00252 byte[] ret = null; 00253 int bs = bsearch(b, key, klen); 00254 if (!isLeaf(b)) { 00255 Block c = getSearchBlock(b, bs); 00256 try { 00257 ret = get(c, key, klen); 00258 } finally { 00259 c.decrRefCount(); 00260 } 00261 } else if (bs >= 0) { 00262 ret = dataAtPos(b, bs); 00263 } 00264 //#ifdef DEBUG 00265 if (paranoid) { checkBlock(b); } 00266 //#endif 00267 return ret; 00268 } 00269 00270 final Block getSearchBlock(Block b, int bs) throws IOException { 00271 int index = bs < 0 ? (-1-bs) : bs; 00272 if (bs < 0) { 00273 index = index - 1; 00274 if (index < 0) { 00275 index = 0; 00276 } 00277 } 00278 Block c = getBlock(blockRef(b, index)); 00279 return c; 00280 } 00281 00282 // final int get(Block b, byte[] key, int koff, 00283 // byte[] data, int off, int len) 00284 // throws IOException 00285 // { 00286 // int ret = -1; 00287 // int bs = bsearch(b, tree.compare, key, 0, getCount(b)-1); 00288 // if (!isLeaf(b)) { 00289 // Block c = getSearchBlock(b, bs); 00290 // try { 00291 // ret = get(c, key, koff, data, off, len); 00292 // } finally { 00293 // c.decrRefCount(); 00294 // } 00295 // } else { 00296 // if (bs >= 0) { 00297 // ret = dataAtPos(b, bs, koff, data, off, len); 00298 // } else { 00299 // ret = -1; 00300 // } 00301 // } 00302 // return ret; 00303 // } 00304 00305 /** 00306 * Initialize an empty <b>BNode</b>. 00307 * <p><b>PRECONDITION:</b>Hold lock on <code>b</code>. 00308 * @param b the block to be initialized. 00309 */ 00310 final void init(Block b, boolean isLeaf) throws IOException { 00311 setCount(b, 0); 00312 setFlags(b, (isLeaf ? IS_LEAF : 0) | BNODE_MAGICV); 00313 setBos(b, file.getPageSize()); 00314 setGarbage(b, 0); 00315 setParent(b, 0); 00316 } 00317 00318 final void init(boolean f) throws IOException { 00319 Block b = getBlock(); 00320 try { 00321 init(b, f); 00322 } finally { 00323 b.decrRefCount(); 00324 } 00325 } 00326 00327 final void checkMagic() throws IOException { 00328 Block b = getBlock(); 00329 try { 00330 if ((getFlags(b) & BNODE_MAGICF) != BNODE_MAGICV) { 00331 throw new IOException("Not a valid index node: " + 00332 SubPageManager.toString(b.getPageNum())); 00333 } 00334 } finally { 00335 b.decrRefCount(); 00336 } 00337 } 00338 00339 /** 00340 * Free the resources associated with this block. If the block is a 00341 * non-leaf node, free the entire subtree. 00342 */ 00343 public final void free() throws IOException { 00344 free(blockRef); 00345 } 00346 00347 /** 00348 * Free the resources associated with the specified block. If the block is a 00349 * non-leaf node, free the entire subtree. 00350 */ 00351 private final void free(long blockNum) throws IOException { 00352 //#ifdef DEBUG 00353 if (blockNum == 0) { 00354 Debug.println("************************** Trying to free block 0!"); 00355 return; 00356 } 00357 //#endif 00358 Block b = getBlock(blockNum); 00359 //#ifdef DEBUG 00360 if (Trace.bit(2)) { 00361 Debug.println("free(" + blockNum + ")"); 00362 display(b, Debug.debugStream, false); 00363 } 00364 //#endif 00365 try { 00366 if (!isLeaf(b)) { 00367 for (int i = 0; i < getCount(b); i++) { 00368 free(blockRef(b, i)); 00369 } 00370 } 00371 file.freePage(b.getPageNum()); 00372 } finally { 00373 b.decrRefCount(); 00374 } 00375 } 00376 00377 /** 00378 * Perform a binary search of the keys in the specified block, searching 00379 * for the given key. If the key is found in the block, return the 00380 * index of the matching key. If the key isn't 00381 * found, return < 0, and for the key index which is the smallest 00382 * existing key greater than the given key, return 00383 * <code>0 - (index+1)</code>. 00384 * @param b the block to be searched. 00385 * @param key the search key 00386 * @param lo index of smallest element in search region 00387 * @param hi index of largest element in search region 00388 * @return if found, the index of the matching key/data pair. If not 00389 * found, return the index where the data would be found 00390 * as an expression of the form:<p> 00391 * <code>0 - (index + 1)</code> 00392 */ 00393 final static int bsearch(Block b, Comparator c, byte[] key, 00394 int klen, int lo, int hi) 00395 throws IOException 00396 { 00397 while (hi >= lo) { 00398 int mid = (hi + lo) / 2; 00399 int ret = keyCompareAtPos(c, key, klen, b, mid); 00400 if (ret < 0) hi = mid-1; 00401 else if (ret > 0) lo = mid+1; 00402 else return mid; 00403 } 00404 return 0 - (hi+2); 00405 } 00406 00407 /** 00408 * Perform a binary search of the keys in the specified block, searching 00409 * for the given key. If the key is found in the block, return the 00410 * index of the matching key. If the key isn't 00411 * found, return < 0, and for the key index which is the smallest 00412 * existing key greater than the given key, return 00413 * <code>0 - (index+1)</code>. 00414 * @param b the block to be searched. 00415 * @param key the search key 00416 * @return if found, the index of the matching key/data pair. If not 00417 * found, return the index where the data would be found 00418 * as an expression of the form:<p> 00419 * <code>0 - (index + 1)</code> 00420 */ 00421 final int bsearch(Block b, byte[] key, int klen) throws IOException { 00422 return bsearch(b, tree.compare, key, klen, 0, getCount(b)-1); 00423 } 00424 00425 /** 00426 * Return the data for a specified key as an integer 00427 */ 00428 final static long blockRef(Block b, int index) throws IOException { 00429 return longDataAtPos(b, index); 00430 } 00431 00432 /** 00433 * Recursive implementation of <code>set(key, data)</code>.<p> 00434 * <b>Precondition</b>: The caller must already have a write-lock on 00435 * <code>b</code>. 00436 * @param b the block to be searched 00437 * @param key the search key 00438 * @param data the data associated with the key 00439 * @return true if the key already existed before the set, false otherwise. 00440 */ 00441 final boolean set(Block b, byte[] key, int klen, 00442 byte[] data, int doff, int dlen, 00443 boolean insOk, boolean updOk) throws IOException { 00444 //#ifdef DEBUG 00445 byte[] save = null; 00446 if (paranoid) { 00447 byte[] d = (byte[])b.getData(); 00448 save = new byte[d.length]; 00449 System.arraycopy(d, 0, save, 0, d.length); 00450 } 00451 //#endif 00452 int bs = bsearch(b, key, klen); 00453 if (bs < 0 && !insOk) { 00454 throw new IOException("Key not found"); 00455 } else if (bs >= 0 && !updOk) { 00456 throw new IOException("Key not unique"); 00457 } 00458 int index = bs < 0 ? (-1-bs) : bs; 00459 boolean ret = false; 00460 if (isLeaf(b)) { 00461 boolean replace = (bs >= 0); 00462 setKey(b, index, key, klen, data, doff, dlen, replace); 00463 ret = replace; 00464 } else { 00465 if (bs < 0) { 00466 index = index - 1; 00467 if (index < 0) index = 0; 00468 } 00469 Block c = getBlock(blockRef(b, index)); 00470 try { 00471 ret = set(c, key, klen, data, doff, dlen, insOk, updOk); 00472 } finally { 00473 c.decrRefCount(); 00474 } 00475 } 00476 //#ifdef DEBUG 00477 if (paranoid) { 00478 try { 00479 checkBlock(b); 00480 } catch (RuntimeException e) { 00481 b.setData(save); 00482 Debug.println("bs = " + bs); 00483 Debug.println("index = " + index); 00484 Debug.println("isLeaf() = " + isLeaf(b)); 00485 Debug.println("key = " + Util.hexBytes(key)); 00486 Debug.println("data = " + Util.hexBytes(data)); 00487 Debug.println("checkSpace = " + 00488 debugcheckSpace(b, index, klen, dlen, bs>=0)); 00489 Debug.println("old block:"); 00490 display(b, Debug.debugStream, false); 00491 } 00492 } 00493 //#endif 00494 return ret; 00495 } 00496 00497 /** 00498 * Recursive implementation of <code>delete(key)</code>. 00499 * @param b the block to be searched 00500 * @param key the search key 00501 * @return true if the key already existed before the delete, 00502 * false otherwise. 00503 */ 00504 final boolean delete(Block b, byte[] key) throws IOException { 00505 boolean ret = false; 00506 boolean del = false; 00507 int bs = bsearch(b, key, key.length); 00508 int index = bs < 0 ? (-1-bs) : bs; 00509 // Debug.println(2, "delete([" + b.getPageNum() + "] " + 00510 // keybytes(key) + 00511 // "): bs = " + bs + ", index = " + index); 00512 if (isLeaf(b)) { 00513 if (bs >= 0) { 00514 del = deleteKeyAtPos(b, index, false); 00515 ret = true; 00516 } 00517 } else { 00518 if (bs < 0) { 00519 index = index - 1; 00520 if (index < 0) index = 0; 00521 } 00522 Block c = getBlock(blockRef(b, index)); 00523 try { 00524 ret = delete(c, key); 00525 } finally { 00526 c.decrRefCount(); 00527 } 00528 } 00529 //#ifdef DEBUG 00530 if (paranoid && !del) checkBlock(b); 00531 //#endif 00532 return ret; 00533 } 00534 00535 //#ifdef DEBUG 00536 final void checkBlock(Block b) throws IOException { 00537 int g1 = getGarbage(b); 00538 int tk = 0; 00539 for (int i = 0; i < getCount(b); i++) { 00540 tk += existingKeyLength(b, i); 00541 if (i > 0) { 00542 byte[] k1 = keyAtPos(b, i-1); 00543 byte[] k2 = keyAtPos(b, i); 00544 try { 00545 if (tree.compare.compare(k1, 0, k1.length, 00546 k2, 0, k2.length) >= 0) { 00547 try { 00548 display(b, System.out, false); 00549 } catch (Exception e) {} 00550 throw new RuntimeException("Bad key ordering"); 00551 } 00552 } catch (RuntimeException e) { 00553 try { 00554 display(b, System.out, false); 00555 } catch (Exception ee) { 00556 } 00557 throw e; 00558 } 00559 } 00560 } 00561 int sk = file.getPageSize() - getBos(b); 00562 if (tk + g1 != sk) { 00563 try { 00564 display(b, System.out, false); 00565 } catch (Exception e) {} 00566 throw new RuntimeException("checkBlock: failure, " + (tk+g1) + " != " + 00567 sk); 00568 } 00569 00570 } 00571 //#endif 00572 00573 /** 00574 * Given a block and a key index, return the data for that index as a new 00575 * byte array. 00576 * @param b the block on which to operate. 00577 * @param index the index of the item for which the data is to be retrieved 00578 * @return the data for the key at the specified position in the node. 00579 */ 00580 final static byte[] dataAtPos(Block b, int index) { 00581 int keyIndex = getKeyIndex(b, index); 00582 00583 int[] start = new int[1]; 00584 int keylen = getKeyLen(b, keyIndex, start); 00585 int keystart = start[0]; 00586 00587 int datastart = keystart + keylen; 00588 int datalen = getKeyLen(b, datastart, start); 00589 datastart = start[0]; 00590 byte[] data = new byte[datalen]; 00591 b.read(datastart, data, 0, data.length); 00592 return data; 00593 } 00594 00595 final static int dataAtPos(Block b, int index, int koff, 00596 byte[] data, int off, int len) 00597 { 00598 int keyIndex = getKeyIndex(b, index); 00599 00600 int[] start = new int[1]; 00601 int keylen = getKeyLen(b, keyIndex, start); 00602 int keystart = start[0]; 00603 00604 int datastart = keystart + keylen; 00605 datastart = start[0]; 00606 00607 return b.read(datastart + koff, data, off, len); 00608 } 00609 00610 /** 00611 * Given a block and a key index, return the data for that index as 00612 * a long. 00613 * @param b the block on which to operate. 00614 * @param index the index of the item for which the data is to be retrieved 00615 * @return the data for the key at the specified position in the node. 00616 */ 00617 final static long longDataAtPos(Block b, int index) { 00618 int keyIndex = getKeyIndex(b, index); 00619 00620 keyIndex = getKeyEnd(b, keyIndex); // skip to data 00621 if (b.readByte(keyIndex) != 8) { 00622 throw new RuntimeException("not a long: " + keyIndex + ": " + b.readByte(keyIndex)); 00623 } 00624 return b.readLong(keyIndex + 1); 00625 } 00626 00627 /** 00628 * Compute the total length of an existing key data pair, including 00629 * both key and data length fields. 00630 * @param b the block on which to operate. 00631 * @param index the index of the item for which the data is to be retrieved 00632 * @return the total number of data area bytes consumed by this item. 00633 */ 00634 final static int existingKeyLength(Block b, int index) { 00635 int keyIndex = getKeyIndex(b, index); 00636 00637 int[] start = new int[1]; 00638 int keylen = getKeyLen(b, keyIndex, start); 00639 int keystart = start[0]; 00640 00641 int datastart = keystart + keylen; 00642 int datalen = getKeyLen(b, datastart, start); 00643 datastart = start[0]; 00644 int end = datastart + datalen; 00645 return end - keyIndex; 00646 } 00647 00648 /** 00649 * Given a block and a key index, return the key for that index as a new 00650 * byte array. 00651 * @param b the block on which to operate. 00652 * @param index the index of the item for which the key is to be retrieved 00653 */ 00654 final static byte[] keyAtPos(Block b, int index) { 00655 int keyIndex = getKeyIndex(b, index); 00656 00657 int[] start = new int[1]; 00658 int keylen = getKeyLen(b, keyIndex, start); 00659 int keystart = start[0]; 00660 00661 byte[] key = new byte[keylen]; 00662 b.read(keystart, key, 0, key.length); 00663 return key; 00664 } 00665 00666 final static int getBytes(Block b, int pos, byte[] buf, int[] lenret) { 00667 int len = b.readByte(pos++) & 0xff; 00668 if ((len & 0x80) != 0) { 00669 int cnt = len & 0x7f; 00670 len = 0; 00671 while (cnt-- > 0) { 00672 len <<= 8; 00673 len += (b.readByte(pos++) & 0xff); 00674 } 00675 } 00676 lenret[0] = len; 00677 if (len > buf.length) return 0 - len; 00678 b.read(pos, buf, 0, len); 00679 return pos + len; 00680 } 00681 00682 /** 00683 * Given a block and a key index, return the key for that index as a new 00684 * byte array. 00685 * @param b the block on which to operate. 00686 * @param index the index of the item for which the key is to be retrieved 00687 * @param buf the buffer into which the key bytes should be places 00688 * @param off the offset into the buffer 00689 * @param len the maximum number of bytes to write to the buffer 00690 * 00691 * @return the number of bytes returned, if sucessful. If the buffer 00692 * isn't big enough, we return a negative number '0 - k', where 00693 * 'k' is the actual key length. 00694 */ 00695 public final static boolean getKeyAndData(Block b, int index, 00696 byte[] k, byte[] d, 00697 int[] lengths) { 00698 int pos = getKeyIndex(b, index); 00699 pos = getBytes(b, pos, k, lengths); 00700 int savelen = lengths[0]; 00701 if (pos < 0) return false; 00702 pos = getBytes(b, pos, d, lengths); 00703 lengths[1] = lengths[0]; 00704 lengths[0] = savelen; 00705 if (pos < 0) return false; 00706 return true; 00707 } 00708 00709 /** 00710 * Perform a key comparison with the specified key in the block. 00711 * @param c the compare function 00712 * @param key the compare key 00713 * @param b the block to be compared 00714 * @param index the position of the key to be compared to the compare key 00715 * 00716 * @return the integer compare value (usu: -1, 0, 1) 00717 */ 00718 static final int keyCompareAtPos(Comparator c, byte[] key, int klen, 00719 Block b, int index) throws IOException { 00720 int keypos = getKeyIndex(b, index); 00721 int r = getKeyLen(b, keypos); 00722 int start = (r >> 16) & 0xffff; 00723 int len = r & 0xffff; 00724 byte[] buf = (byte[])b.getData(); 00725 int ret = c.compare(key, 0, klen, buf, start, len); 00726 return ret; 00727 } 00728 00729 /** 00730 * Find the length of the specified key, as well as the byte offset 00731 * to the start of the key. 00732 * 00733 * @param b the B-node 00734 * @param keypos the buffer offset where the key/data pair starts. 00735 * @param keystart an <b>out</b> parameter, used to return the buffer 00736 * offset where the actual key begins. 00737 */ 00738 static final int getKeyLen(Block b, int keypos, int[] keystart) { 00739 int len = b.readByte(keypos++) & 0xff; 00740 if ((len & 0x80) != 0) { 00741 int cnt = len & 0x7f; 00742 len = 0; 00743 while (cnt-- > 0) { 00744 len <<= 8; len += (b.readByte(keypos++) & 0xff); 00745 } 00746 } 00747 keystart[0] = keypos; 00748 return len; 00749 } 00750 00751 /** 00752 * Find the length of the specified key, as well as the byte offset 00753 * to the start of the key. 00754 * 00755 * @param b the B-node 00756 * @param keypos the buffer offset where the key/data pair starts. 00757 * @return the key length 00758 */ 00759 final static int getKeyLen(Block b, int keypos) { 00760 int len = b.readByte(keypos++) & 0xff; 00761 if ((len & 0x80) != 0) { 00762 int cnt = len & 0x7f; 00763 len = 0; 00764 while (cnt-- > 0) { 00765 len <<= 8; len += (b.readByte(keypos++) & 0xff); 00766 } 00767 } 00768 return ((keypos & 0xffff) << 16) | (len & 0xffff); 00769 } 00770 00771 /** 00772 * Find the length of the specified key, as well as the byte offset 00773 * to the start of the key. 00774 * 00775 * @param b the B-node 00776 * @param keypos the buffer offset where the key/data pair starts. 00777 * @return two 16-bit integers {start, len} encoded in a 32 bit int. 00778 */ 00779 static final int getKeyEnd(Block b, int keypos) { 00780 int len = b.readByte(keypos++) & 0xff; 00781 if ((len & 0x80) != 0) { 00782 int cnt = len & 0x7f; 00783 len = 0; 00784 while (cnt-- > 0) { 00785 len <<= 8; len += (b.readByte(keypos++) & 0xff); 00786 } 00787 } 00788 return keypos + len; 00789 } 00790 00791 /** 00792 * Compute the total number of bytes that will be required to store 00793 * the byte array 'b' plus its length code. 00794 * @param b the byte array 00795 * @return the length of the byte array plus the length of the length 00796 * code. 00797 */ 00798 final static int totalLength(int len) { 00799 int lenlen = 1; 00800 if (len > 0x7f) { 00801 lenlen++; 00802 if (len > 0xff) { 00803 lenlen++; 00804 if (len > 0xffff) { 00805 lenlen++; 00806 if (len > 0xffffff) { 00807 lenlen++; 00808 } 00809 } 00810 } 00811 } 00812 return len + lenlen; 00813 } 00814 00815 /** 00816 * Insert a key/data pair at the bottom of the data area and 00817 * return its offset. This method does <b>NOT</b> check the 00818 * capacity of the buffer -- it is assumed that the caller has 00819 * already ensured that enough space is available. 00820 * @param b the block to operate on 00821 * @param key the search key 00822 * @param data the data associated with the key 00823 */ 00824 final static int insertKeyData(Block b, byte[] key, int klen, 00825 byte[] data, int doff, int dlen) { 00826 // ---- insert the key/data pair at the bottom of the data area 00827 int bos = getBos(b) - dlen; 00828 b.write(bos, data, doff, dlen); 00829 bos = writeLenLen(b, bos, dlen) - klen; 00830 b.write(bos, key, 0, klen); 00831 bos = writeLenLen(b, bos, klen); 00832 setBos(b, bos); // record the new data BOS. 00833 return bos; 00834 } 00835 00836 /** 00837 * Insert or replace a key in the specified block, which may or may 00838 * not be a leaf block. Handle split operations: if a split is required, 00839 * determine which post-split block this key should be added to. 00840 * Also, if the new key happens to be key zero (i.e., the lowest valued 00841 * key in this block), then adjust the parent's reference to this block 00842 * accordingly. Although, maybe that can't happen? 00843 * 00844 * <p><b>PRECONDITION:</b> Lock/ref on block <code>b</code> 00845 * <p><b>POSTCONDITION:</b> Lock/ref on block <code>b</code> 00846 * 00847 * @return the block into which the key was actually stored. This 00848 * will be different from <code>b</code> if a split was 00849 * necessary. 00850 */ 00851 final Block setKey(Block b, int index, byte[] key, int klen, 00852 byte[] data, int doff, int dlen, 00853 boolean replace) 00854 throws IOException 00855 { 00856 Block r = b; 00857 if (index < 0) { 00858 index = bsearch(b, key, klen); 00859 if (index < 0) { 00860 replace = false; 00861 index = -1 - index; 00862 } 00863 } 00864 // if it looks like we're inserting a key lower than the current 00865 // low key, remember the old low key, we'll have to update the parent 00866 // node. 00867 byte[] okey = (index == 0 && getCount(b) > 0) ? keyAtPos(b, 0) : null; 00868 00869 if (!setKeyAtPos(b, index, key, klen, data, doff, dlen, replace)) { 00870 if (replace) { 00871 if (deleteKeyAtPos(b, index, true)) { 00872 //#ifdef DEBUG 00873 Debug.println("Just lost b!"); 00874 //#endif 00875 } 00876 } 00877 Block[] ba = new Block[2]; 00878 ba[0] = b; 00879 split(ba); 00880 b = ba[0]; 00881 Block nb = ba[1]; 00882 00883 try { 00884 // After split, the key we were trying to add goes 00885 // into one of the two split blocks. 00886 int cv = keyCompareAtPos(tree.compare, key, klen, b, 0); 00887 r = cv < 0 ? nb : b; 00888 00889 try { 00890 int ret = bsearch(r, key, klen); 00891 if (ret >= 0) { 00892 //#ifdef DEBUG 00893 Debug.println("After a split operation, the key was " + 00894 "found to be already present in the " + 00895 "target block at position " + ret); 00896 Debug.println("The key: " + Util.hexBytes(key)); 00897 Debug.println("cv = " + cv); 00898 Debug.println("split L: "); 00899 display(ba[0], Debug.debugStream, false); 00900 Debug.println("split R: "); 00901 display(ba[1], Debug.debugStream, false); 00902 //#endif 00903 throw new RuntimeException( 00904 "internal error in setKey: dup, key = " + 00905 Util.strBytes(key)); 00906 } 00907 ret = -1 - ret; 00908 if (!setKeyAtPos(r, ret, key, klen, data, doff, dlen, false)) { 00909 throw new RuntimeException( 00910 "com.quadcap.sql.index.Bnode: " + 00911 "internal error in setKey: no room. " + 00912 "key length = " + key.length + 00913 ", data.length = " + data.length); 00914 } 00915 if (ret == 0 && getCount(r) > 1) { 00916 newLow(r, keyAtPos(r, 1)); 00917 } 00918 } finally { 00919 } 00920 } finally { 00921 nb.decrRefCount(); 00922 } 00923 } else { 00924 if (okey != null && index == 0) { 00925 /* && (!replace || getCount(b) > 1)) */ 00926 newLow(b, okey); 00927 } 00928 } 00929 return r; 00930 } 00931 00932 /** 00933 * Split this block, by creaing a new block and moving half of this 00934 * block's keys into the new block. Then, add a new key to the parent 00935 * block to reflect the new block, and adjust the parent's old reference 00936 * to this block to reflect the new key distribution. 00937 * 00938 * <p><b>PRECONDITION:</b> We have a ref/lock on <code>ba[0]</code> 00939 * <p><b>POSTCONDITION:</b> We have a ref on <code>ba[1]</code> 00940 * 00941 * @param ba a two-element array, with <code>ba[0]</code> being 00942 * the pre-split block, and <code>ba[1]</code> the new block 00943 */ 00944 final void split(Block[] ba) throws IOException { 00945 Block b = ba[0]; 00946 long nbno = file.newPage(); 00947 //#ifdef DEBUG 00948 if (Trace.bit(5)) { 00949 Debug.println(0, "split: new block = " + nbno); 00950 } 00951 //#endif 00952 Bnode node = this.tree.getNode(nbno); 00953 node.init(isLeaf(b)); 00954 Block nb = null; 00955 boolean gotException = true; 00956 try { 00957 nb = node.getBlock(); 00958 ba[1] = nb; 00959 splitHelp(ba, nbno); 00960 gotException = false; 00961 } finally { 00962 if (gotException) { 00963 if (nb != null) { 00964 nb.decrRefCount(); 00965 ba[1] = null; 00966 } 00967 file.freePage(nbno); 00968 } 00969 } 00970 } 00971 00972 /** 00973 * Split helper: 00974 * <p><b>PRECONDITION</b>: Ref/lock on both <code>ba[0], ba[1]</code> 00975 */ 00976 final void splitHelp(Block[] ba, long nbno) throws IOException { 00977 Block b = ba[0]; 00978 Block nb = ba[1]; 00979 setParent(nb, getParent(b)); 00980 00981 //#ifdef DEBUG 00982 if (Trace.bit(5)) { 00983 Debug.println("BEFORE SPLIT: b"); 00984 display(b, Debug.debugStream, false); 00985 Debug.println("BEFORE SPLIT: nb"); 00986 display(nb, Debug.debugStream, false); 00987 } 00988 //#endif 00989 00990 int more = capacity(b); 00991 int ncnt = 0; 00992 for (int i = 0; capacity(nb) > more + getGarbage(b); i++) { 00993 byte[] key = keyAtPos(b, i); 00994 byte[] data = dataAtPos(b, i); 00995 setKeyAtPos(nb, i, key, key.length, data, 0, data.length, false); 00996 if (!isLeaf(nb)) { 00997 Bnode child = this.tree.getNode(Util.integer(data)); 00998 Block cb = child.getBlock(); 00999 try { 01000 Bnode.setParent(cb, nbno); 01001 } finally { 01002 cb.decrRefCount(); 01003 } 01004 } 01005 forgetKeyAtPos(b, i); 01006 more += REF_SIZE; 01007 ncnt++; 01008 } 01009 int cnt = getCount(b); 01010 int len = cnt - ncnt; 01011 moveKeys(b, ncnt, 0, len); 01012 setCount(b, len); 01013 gc(b); 01014 //#ifdef DEBUG 01015 if (paranoid) try { 01016 checkBlock(b); 01017 } catch (RuntimeException e) { 01018 Debug.print(e); 01019 } 01020 //#endif 01021 01022 propogateSplit(ba); 01023 } 01024 01025 /** 01026 * After the split operation, propagate information up the tree to 01027 * the parent block about the newly created block and the new key 01028 * distribution. 01029 * 01030 * @param ba a two-element array, with <code>ba[0]</code> being 01031 * the pre-split block, and <code>ba[1]</code> the new block 01032 */ 01033 final void propogateSplit(Block[] ba) throws IOException { 01034 Block b = ba[0]; 01035 Block nb = ba[1]; 01036 long b_blk = b.getPageNum(); 01037 long parentBlock = getParent(b); 01038 Bnode pnode = null; 01039 if (parentBlock == 0) { 01040 // ---- We're splitting the root block. 01041 // ---- We'd like for block 'b', which is currently the parent, 01042 // ---- to remain so. So we'll move the data in 'b' to 'b2', 01043 // ---- then rename 'b' to 'parent', and 'b2' to 'b'. 01044 // ---- I think. 01045 long b2 = file.newPage(); 01046 Block b2b = getBlock(b2); 01047 01048 // ---- no need to synchronize here, since we already hold the 01049 // ---- lock on the parent node, and nobody else knows about 01050 // ---- b2 yet. 01051 try { 01052 b2b.takeData(b); 01053 parentBlock = b.getPageNum(); 01054 setParent(b2b, parentBlock); 01055 pnode = this.tree.getNode(parentBlock); 01056 pnode.init(false); 01057 ba[0] = b = b2b; 01058 if (!isLeaf(b2b)) for (int i = 0; i < getCount(b2b); i++) { 01059 long cref = blockRef(b2b, i); 01060 Block c = getBlock(cref); 01061 try { 01062 setParent(c, b2); 01063 } finally { 01064 c.decrRefCount(); 01065 } 01066 } 01067 b_blk = b2; 01068 } finally { 01069 b2b.decrRefCount(); 01070 } 01071 01072 } else { 01073 pnode = this.tree.getNode(parentBlock); 01074 } 01075 01076 Block pb = pnode.getBlock(); 01077 try { 01078 // ---- the parent currently has an entry for the old block, 01079 // ---- but with the key that is now associated with the new 01080 // ---- block. So we replace the old key, with the new block 01081 // ---- value, and we add a new key, with the old block value. 01082 01083 // ---- replace the old key 01084 byte[] okey = keyAtPos(nb, 0); 01085 byte[] odat = Util.bytes(nb.getPageNum()); 01086 Block r = setKey(pb, -1, okey, okey.length, odat, 0, odat.length, 01087 true); 01088 try { 01089 setParent(nb, r.getPageNum()); 01090 01091 // ---- add the new key 01092 byte[] nkey = keyAtPos(b, 0); 01093 byte[] ndat = Util.bytes(b.getPageNum()); 01094 01095 if (r != pb) { 01096 // if the parent block was split as a result of 01097 // the previous setKey, then it is possible that 01098 // 'b' and 'nb' were on the boundary of that 01099 // split, and will now have different parent 01100 // blocks. So we need to figure out which parent 01101 // 'b' should get... 01102 01103 throw new RuntimeException("internal error"); 01104 } 01105 r = setKey(r, -1, nkey, nkey.length, ndat, 0, ndat.length, 01106 false); 01107 setParent(b, r.getPageNum()); 01108 } finally { 01109 } 01110 01111 } finally { 01112 pb.decrRefCount(); 01113 } 01114 01115 } 01116 01117 /** 01118 * Reclaim the data space occupied by deleted keys. 01119 */ 01120 final void gc(Block b) throws IOException { 01121 //#ifdef DEBUG 01122 byte[] saved = null; 01123 if (paranoid) { 01124 saved = new byte[file.getPageSize()]; 01125 System.arraycopy((byte[])b.getData(), 0, saved, 0, saved.length); 01126 } 01127 //#endif 01128 01129 int initcap = capacity(b); 01130 int amt = getGarbage(b); 01131 int cnt = getCount(b); 01132 if (amt > 0) { 01133 int size = 0; 01134 Object[][] pairs = new Object[2][cnt]; 01135 for (int i = 0; i < cnt; i++) { 01136 size += existingKeyLength(b, i); 01137 pairs[0][i] = keyAtPos(b, i); 01138 pairs[1][i] = dataAtPos(b, i); 01139 } 01140 setBos(b, file.getPageSize()); 01141 for (int i = 0; i < cnt; i++) { 01142 byte[] key = (byte[])pairs[0][i]; 01143 byte[] data = (byte[])pairs[1][i]; 01144 int pos = insertKeyData(b, key, key.length, 01145 data, 0, data.length); 01146 setKeyIndex(b, i, pos); 01147 } 01148 } 01149 setGarbage(b, 0); 01150 } 01151 01152 /** 01153 * When an operation results in a change in the smallest key in a block, 01154 * it is necessary to inform the parent block of the change, since the 01155 * parent block key for this block is required to be the key of the 01156 * smallest item in this block. 01157 * 01158 * @param b the block containing the new lowest key 01159 * @param prevLow the key which was previously the lowest key. 01160 * 01161 * @return true if block <code>b</code> is now empty, and has been 01162 * freed and unlinked from the tree. Don't touch this block 01163 * any more! 01164 */ 01165 final boolean newLow(Block b, byte[] prevLow) throws IOException { 01166 boolean ret = false; 01167 long parentBlock = getParent(b); 01168 if (parentBlock != 0) { 01169 Bnode parent = tree.getNode(parentBlock); 01170 Block pb = parent.getBlock(); 01171 try { 01172 int pos = bsearch(pb, prevLow, prevLow.length); 01173 if (pos < 0) { 01174 //#ifdef DEBUG 01175 System.out.print("b = "); 01176 display(b, System.out, false); 01177 System.out.print("pb = "); 01178 display(pb, System.out, false); 01179 Debug.println(0, "prevLow = " + keybytes(prevLow)); 01180 //#endif 01181 throw new RuntimeException( 01182 "deleteKeyAtPos: deleting " + 01183 " previous low key, not found!"); 01184 } 01185 if (getCount(b) == 0) { 01186 if (deleteKeyAtPos(pb, pos, false)) { 01187 //#ifdef DEBUG 01188 //Debug.println("Just deleted pb!"); 01189 //#endif 01190 } 01191 file.freePage(b.getPageNum()); 01192 ret = true; 01193 } else { 01194 byte[] k = keyAtPos(b, 0); 01195 byte[] d = Util.bytes(b.getPageNum()); 01196 setKey(pb, pos, k, k.length, d, 0, d.length, true); 01197 //#ifdef DEBUG 01198 if (paranoid) checkBlock(b); 01199 //#endif 01200 } 01201 } finally { 01202 pb.decrRefCount(); 01203 } 01204 } 01205 return ret; 01206 } 01207 01208 /** 01209 * Helper function to move key indices around when adding or deleting 01210 * keys. 01211 */ 01212 final static void moveKeys(Block b, int from, int to, int length) { 01213 if (length > 0) { 01214 byte[] buf = (byte[])b.getData(); 01215 length *= REF_SIZE; 01216 int f = index(from); 01217 int t = index(to); 01218 if (f < t) { 01219 b.setDirty(true); 01220 for (int i = length-1; i >= 0; i--) { 01221 buf[t+i] = buf[f+i]; 01222 } 01223 } else if (f > t) { 01224 b.setDirty(true); 01225 for (int i = 0; i < length; i++) { 01226 buf[t+i] = buf[f+i]; 01227 } 01228 } 01229 } 01230 } 01231 01232 /** 01233 * Determine if there is room in the block for a new key/data pair. 01234 * @param b the block to operate on 01235 * @param index the index of the key that is being replaced, 01236 * if <code>replace</code> is <b>true</b>. 01237 * @param klen the search key length 01238 * @param dlen the length of the data associated with the key 01239 * @param replace <b>true</b> if an existing key/data pair is to 01240 * be replaced by the new key, <b>false</b> otherwise. 01241 * 01242 * @return negative number if there is no room for the new data.<p> 01243 * zero if there is room for the new data.<p> 01244 * positive number if there would be room after a garbage 01245 * collection. Included in this calculation is the deletion 01246 * of the key being replaced, if <code>replace</code> is true.<p> 01247 */ 01248 final int checkSpace(Block b, int index, int klen, int dlen, boolean replace) { 01249 int need = REF_SIZE + totalLength(klen) + totalLength(dlen); 01250 int total = capacity(b) + getGarbage(b); 01251 if (replace) total += REF_SIZE + existingKeyLength(b, index); 01252 int ret = 0; // fits 01253 if (need > total) { 01254 ret = -1; // doesn't fit 01255 } else if (need > capacity(b)) { 01256 ret = 1; // will fit if gc 01257 } 01258 return ret; 01259 } 01260 01261 //#ifdef DEBUG 01262 final int debugcheckSpace(Block b, int index, int klen, int dlen, 01263 boolean replace) { 01264 int need = REF_SIZE + totalLength(klen) + totalLength(dlen); 01265 Debug.println(0, "keylen = " + totalLength(klen)); 01266 Debug.println(0, "datalen = " + totalLength(dlen)); 01267 Debug.println(0, "need = " + need); 01268 int total = capacity(b) + getGarbage(b); 01269 Debug.println(0, "capacity = " + capacity(b)); 01270 Debug.println(0, "garbage = " + getGarbage(b)); 01271 Debug.println(0, "total = " + total); 01272 if (replace) total += REF_SIZE + existingKeyLength(b, index); 01273 int ret = 0; // fits 01274 if (need > total) { 01275 ret = -1; // doesn't fit 01276 } else if (need > capacity(b)) { 01277 ret = 1; // will fit if gc 01278 } 01279 return ret; 01280 } 01281 //#endif 01282 01283 /** 01284 * This function is scary. It accounts for the fact that we're about 01285 * to delete a key, by including the size of the key/data in the 01286 * node's <code>garbage</code> field, but it doesn't actually delete 01287 * the key. Be careful. 01288 */ 01289 final void forgetKeyAtPos(Block b, int index) { 01290 setGarbage(b, getGarbage(b) + existingKeyLength(b, index)); 01291 } 01292 01293 /** 01294 * Delete the specified key. 01295 * 01296 * @return true if the block is now empty, has been freed, and we 01297 * should avoid touching it... 01298 */ 01299 final boolean deleteKeyAtPos(Block b, int index, boolean skipNewLow) 01300 throws IOException 01301 { 01302 boolean ret = false; 01303 int cnt = getCount(b); 01304 byte[] prevLow = index == 0 ? keyAtPos(b, 0) : null; 01305 forgetKeyAtPos(b, index); 01306 if (index < cnt-1) { 01307 moveKeys(b, index+1, index, cnt-index-1); 01308 } 01309 setCount(b, cnt-1); 01310 if (!skipNewLow && getCount(b) == 0 && getParent(b) == 0) { 01311 setLeaf(b, true); 01312 } else if (!skipNewLow && index == 0) { 01313 ret = newLow(b, prevLow); 01314 } 01315 return ret; 01316 } 01317 01318 /** 01319 * Given a block and a key position, insert a new key/data pair 01320 * immediately at the specified position, returning true if the 01321 * key/data fit in the block. If the incoming data won't fit, 01322 * don't modify the block and return false.