public final class Utilities
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
static class |
Utilities.CollectionPersistenceDelegate |
static class |
Utilities.CommonTokenDelegate
Need to serialize org.antlr.runtime.CommonToken
|
static class |
Utilities.DatePersistenceDelegate
DatePersistenceDelegate.
|
static class |
Utilities.EnumDelegate
Java 1.5 workaround.
|
static class |
Utilities.ListDelegate |
static class |
Utilities.MapDelegate |
static class |
Utilities.ReduceField
ReduceField:
KEY: record key
VALUE: record value
|
static class |
Utilities.SetDelegate |
static class |
Utilities.SQLCommand<T> |
static class |
Utilities.StreamPrinter
StreamPrinter.
|
static class |
Utilities.StreamStatus
StreamStatus.
|
static class |
Utilities.TimestampPersistenceDelegate
TimestampPersistenceDelegate.
|
Modifier and Type | Field and Description |
---|---|
static int |
carriageReturnCode |
static int |
ctrlaCode |
static TableDesc |
defaultTd |
static java.lang.String |
HADOOP_LOCAL_FS
The object in the reducer are composed of these top level fields.
|
static java.lang.String |
INDENT |
static java.lang.Object |
INPUT_SUMMARY_LOCK |
static java.lang.String |
MAP_PLAN_NAME |
static int |
newLineCode |
static java.lang.String |
NSTR |
static java.lang.String |
nullStringOutput |
static java.lang.String |
nullStringStorage |
static java.util.Random |
randGen |
static java.lang.String |
REDUCE_PLAN_NAME |
static java.util.List<java.lang.String> |
reduceFieldNameList |
static char |
sqlEscapeChar |
static java.lang.String |
suffix |
static int |
tabCode |
Modifier and Type | Method and Description |
---|---|
static java.lang.String |
abbreviate(java.lang.String str,
int max)
convert "From src insert blah blah" to "From src insert ...
|
static java.lang.ClassLoader |
addToClassPath(java.lang.ClassLoader cloader,
java.lang.String[] newPaths)
Add new elements to the classpath.
|
static java.lang.String |
checkJDOPushDown(Table tab,
ExprNodeDesc expr,
GenericUDF parent)
Check if the partition pruning expression can be pushed down to JDO filtering.
|
static void |
clearWork(Configuration conf) |
static <T> T |
clonePlan(T plan)
Clones using the powers of XML.
|
static java.sql.Connection |
connectWithRetry(java.lang.String connectionString,
int waitWindow,
int maxRetries)
Retry connecting to a database with random backoff (same as the one implemented in HDFS-767).
|
static boolean |
contentsEqual(java.io.InputStream is1,
java.io.InputStream is2,
boolean ignoreWhitespace) |
static void |
copyTableJobPropertiesToConf(TableDesc tbl,
JobConf job)
Copies the storage handler properties configured for a table descriptor to a runtime job
configuration.
|
static java.io.OutputStream |
createCompressedStream(JobConf jc,
java.io.OutputStream out)
Convert an output stream to a compressed output stream based on codecs and compression options
specified in the Job Configuration.
|
static java.io.OutputStream |
createCompressedStream(JobConf jc,
java.io.OutputStream out,
boolean isCompressed)
Convert an output stream to a compressed output stream based on codecs codecs in the Job
Configuration.
|
static RCFile.Writer |
createRCFileWriter(JobConf jc,
FileSystem fs,
Path file,
boolean isCompressed)
Create a RCFile output stream based on job configuration Uses user supplied compression flag
(rather than obtaining it from the Job Configuration).
|
static SequenceFile.Writer |
createSequenceWriter(JobConf jc,
FileSystem fs,
Path file,
java.lang.Class<?> keyClass,
java.lang.Class<?> valClass)
Create a sequencefile output stream based on job configuration.
|
static SequenceFile.Writer |
createSequenceWriter(JobConf jc,
FileSystem fs,
Path file,
java.lang.Class<?> keyClass,
java.lang.Class<?> valClass,
boolean isCompressed)
Create a sequencefile output stream based on job configuration Uses user supplied compression
flag (rather than obtaining it from the Job Configuration).
|
static void |
createTmpDirs(Configuration conf,
MapWork mWork)
Hive uses tmp directories to capture the output of each FileSinkOperator.
|
static void |
createTmpDirs(Configuration conf,
ReduceWork rWork)
Hive uses tmp directories to capture the output of each FileSinkOperator.
|
static ExprNodeDesc |
deserializeExpression(java.lang.String s,
Configuration conf) |
static <T> T |
deserializeObject(java.io.InputStream in)
De-serialize an object.
|
static <T> T |
deserializePlan(java.io.InputStream in)
Deserializes the plan.
|
static java.lang.String |
escapeSqlLike(java.lang.String key)
Escape the '_', '%', as well as the escape characters inside the string key.
|
static int |
estimateNumberOfReducers(HiveConf conf,
ContentSummary inputSummary,
MapWork work,
boolean finalMapRed)
Estimate the number of reducers needed for this job, based on job input,
and configuration parameters.
|
static <T> T |
executeWithRetry(Utilities.SQLCommand<T> cmd,
java.sql.PreparedStatement stmt,
int baseWindow,
int maxRetries)
Retry SQL execution with random backoff (same as the one implemented in HDFS-767).
|
static java.lang.String |
formatBinaryString(byte[] array,
int start,
int length) |
static java.lang.String |
formatMsecToStr(long msec)
Format number of milliseconds to strings
|
static java.lang.String |
generateFileName(java.lang.Byte tag,
java.lang.String bigBucketFileName) |
static java.lang.String |
generatePath(Path baseURI,
java.lang.String filename) |
static java.lang.String |
generatePath(java.lang.String baseURI,
java.lang.String dumpFilePrefix,
java.lang.Byte tag,
java.lang.String bigBucketFileName) |
static java.lang.String |
generateTarFileName(java.lang.String name) |
static java.lang.String |
generateTarURI(Path baseURI,
java.lang.String filename) |
static java.lang.String |
generateTarURI(java.lang.String baseURI,
java.lang.String filename) |
static java.lang.String |
generateTmpURI(java.lang.String baseURI,
java.lang.String id) |
static BaseWork |
getBaseWork(Configuration conf,
java.lang.String name) |
static java.util.List<java.lang.String> |
getColumnNames(java.util.Properties props) |
static java.util.List<java.lang.String> |
getColumnNamesFromFieldSchema(java.util.List<FieldSchema> partCols) |
static java.util.List<java.lang.String> |
getColumnNamesFromSortCols(java.util.List<Order> sortCols) |
static java.util.List<java.lang.String> |
getColumnTypes(java.util.Properties props) |
static int |
getDefaultNotificationInterval(Configuration hconf)
Gets the default notification interval to send progress updates to the tracker.
|
static java.util.List<java.lang.String> |
getFieldSchemaString(java.util.List<FieldSchema> fl) |
static java.lang.String |
getFileExtension(JobConf jc,
boolean isCompressed)
Deprecated.
|
static java.lang.String |
getFileExtension(JobConf jc,
boolean isCompressed,
HiveOutputFormat<?,?> hiveOutputFormat)
Based on compression option, output format, and configured output codec -
get extension for output file.
|
static java.lang.String |
getFileNameFromDirName(java.lang.String dirName) |
static FileStatus[] |
getFileStatusRecurse(Path path,
int level,
FileSystem fs)
Get all file status from a root path and recursively go deep into certain levels.
|
static java.util.List<java.util.LinkedHashMap<java.lang.String,java.lang.String>> |
getFullDPSpecs(Configuration conf,
DynamicPartitionCtx dpCtx)
Construct a list of full partition spec from Dynamic Partition Context and the directory names
corresponding to these dynamic partitions.
|
static java.lang.String |
getHashedStatsPrefix(java.lang.String statsPrefix,
int maxPrefixLength)
If statsPrefix's length is greater than maxPrefixLength and maxPrefixLength > 0,
then it returns an MD5 hash of statsPrefix followed by path separator, otherwise
it returns statsPrefix
|
static double |
getHighestSamplePercentage(MapWork work)
Returns the highest sample percentage of any alias in the given MapWork
|
static java.util.List<Path> |
getInputPaths(JobConf job,
MapWork work,
java.lang.String hiveScratchDir,
Context ctx)
Computes a list of all input paths needed to compute the given MapWork.
|
static ContentSummary |
getInputSummary(Context ctx,
MapWork work,
PathFilter filter)
Calculate the total size of input files.
|
static MapredWork |
getMapRedWork(Configuration conf) |
static MapWork |
getMapWork(Configuration conf) |
static java.util.List<ExecDriver> |
getMRTasks(java.util.List<Task<? extends java.io.Serializable>> tasks) |
static java.lang.String |
getNameMessage(java.lang.Exception e) |
static java.lang.String |
getOpTreeSkel(Operator<?> op) |
static PartitionDesc |
getPartitionDesc(Partition part) |
static PartitionDesc |
getPartitionDescFromTableDesc(TableDesc tblDesc,
Partition part) |
static java.lang.String |
getPrefixedTaskIdFromFilename(java.lang.String filename)
Get the part-spec + task id from the filename.
|
static long |
getRandomWaitTime(int baseWindow,
int failures,
java.util.Random r)
Introducing a random factor to the wait time before another retry.
|
static ReduceWork |
getReduceWork(Configuration conf) |
static java.lang.String |
getResourceFiles(Configuration conf,
SessionState.ResourceType t) |
static StatsPublisher |
getStatsPublisher(JobConf jc) |
static TableDesc |
getTableDesc(java.lang.String cols,
java.lang.String colTypes) |
static TableDesc |
getTableDesc(Table tbl) |
static java.lang.String |
getTaskId(Configuration hconf)
Gets the task id if we are running as a Hadoop job.
|
static java.lang.String |
getTaskIdFromFilename(java.lang.String filename)
Get the task id from the filename.
|
static long |
getTotalInputFileSize(ContentSummary inputSummary,
MapWork work,
double highestSamplePercentage)
Computes the total input file size.
|
static long |
getTotalInputNumFiles(ContentSummary inputSummary,
MapWork work,
double highestSamplePercentage)
Computes the total number of input files.
|
static boolean |
isEmptyPath(JobConf job,
Path dirPath) |
static boolean |
isEmptyPath(JobConf job,
Path dirPath,
Context ctx) |
static boolean |
isTempPath(FileStatus file)
Detect if the supplied file is a temporary path.
|
static FileStatus[] |
listStatusIfExists(Path path,
FileSystem fs)
returns null if path is not exist
|
static java.util.ArrayList |
makeList(java.lang.Object... olist) |
static java.util.HashMap |
makeMap(java.lang.Object... olist) |
static java.util.Properties |
makeProperties(java.lang.String... olist) |
static java.util.List<java.lang.String> |
mergeUniqElems(java.util.List<java.lang.String> src,
java.util.List<java.lang.String> dest) |
static void |
mvFileToFinalPath(java.lang.String specPath,
Configuration hconf,
boolean success,
org.apache.commons.logging.Log log,
DynamicPartitionCtx dpCtx,
FileSinkDesc conf,
Reporter reporter) |
static java.lang.String |
now() |
static java.sql.PreparedStatement |
prepareWithRetry(java.sql.Connection conn,
java.lang.String stmt,
int waitWindow,
int maxRetries)
Retry preparing a SQL statement with random backoff (same as the one implemented in HDFS-767).
|
static Utilities.StreamStatus |
readColumn(java.io.DataInput in,
java.io.OutputStream out) |
static java.lang.String |
realFile(java.lang.String newFile,
Configuration conf)
Shamelessly cloned from GenericOptionsParser.
|
static void |
removeFromClassPath(java.lang.String[] pathsToRemove)
remove elements from the classpath.
|
static java.util.HashMap<java.lang.String,FileStatus> |
removeTempOrDuplicateFiles(FileStatus[] items,
FileSystem fs) |
static void |
removeTempOrDuplicateFiles(FileSystem fs,
Path path)
Remove all temporary files and duplicate (double-committed) files from a given directory.
|
static java.util.ArrayList<java.lang.String> |
removeTempOrDuplicateFiles(FileSystem fs,
Path path,
DynamicPartitionCtx dpCtx)
Remove all temporary files and duplicate (double-committed) files from a given directory.
|
static void |
rename(FileSystem fs,
Path src,
Path dst)
Rename src to dst, or in the case dst already exists, move files in src to dst.
|
static void |
renameOrMoveFiles(FileSystem fs,
Path src,
Path dst)
Rename src to dst, or in the case dst already exists, move files in src to dst.
|
static java.lang.String |
replaceTaskIdFromFilename(java.lang.String filename,
int bucketNum)
Replace the task id from the filename.
|
static java.lang.String |
replaceTaskIdFromFilename(java.lang.String filename,
java.lang.String fileId) |
static void |
reworkMapRedWork(Task<? extends java.io.Serializable> task,
boolean reworkMapredWork,
HiveConf conf)
The check here is kind of not clean.
|
static java.lang.String |
serializeExpression(ExprNodeDesc expr) |
static void |
serializeObject(java.lang.Object plan,
java.io.OutputStream out)
Serialize the object.
|
static void |
serializePlan(java.lang.Object plan,
java.io.OutputStream out)
Serializes the plan.
|
static void |
setColumnNameList(JobConf jobConf,
Operator op) |
static void |
setColumnTypeList(JobConf jobConf,
Operator op) |
static void |
setInputAttributes(Configuration conf,
MapWork mWork)
Set hive input format, and input format file if necessary.
|
static void |
setInputPaths(JobConf job,
java.util.List<Path> pathsToAdd)
setInputPaths add all the paths in the provided list to the Job conf object
as input paths for the job.
|
static void |
setMapRedWork(Configuration conf,
MapredWork w,
java.lang.String hiveScratchDir) |
static Path |
setMapWork(Configuration conf,
MapWork w,
java.lang.String hiveScratchDir,
boolean useCache) |
static Path |
setReduceWork(Configuration conf,
ReduceWork w,
java.lang.String hiveScratchDir,
boolean useCache) |
static void |
setWorkflowAdjacencies(Configuration conf,
QueryPlan plan) |
static double |
showTime(long time) |
static Path |
toTaskTempPath(Path orig) |
static Path |
toTaskTempPath(java.lang.String orig) |
static Path |
toTempPath(Path orig) |
static Path |
toTempPath(java.lang.String orig)
Given a path, convert to a temporary path.
|
static void |
validateColumnNames(java.util.List<java.lang.String> colNames,
java.util.List<java.lang.String> checkCols) |
static void |
validatePartSpec(Table tbl,
java.util.Map<java.lang.String,java.lang.String> partSpec) |
public static java.lang.String HADOOP_LOCAL_FS
public static java.lang.String MAP_PLAN_NAME
public static java.lang.String REDUCE_PLAN_NAME
public static java.util.List<java.lang.String> reduceFieldNameList
public static TableDesc defaultTd
public static final int carriageReturnCode
public static final int newLineCode
public static final int tabCode
public static final int ctrlaCode
public static final java.lang.String INDENT
public static java.lang.String nullStringStorage
public static java.lang.String nullStringOutput
public static java.util.Random randGen
public static final java.lang.String NSTR
public static java.lang.Object INPUT_SUMMARY_LOCK
public static java.lang.String suffix
public static final char sqlEscapeChar
public static void clearWork(Configuration conf)
public static MapredWork getMapRedWork(Configuration conf)
public static MapWork getMapWork(Configuration conf)
public static ReduceWork getReduceWork(Configuration conf)
public static BaseWork getBaseWork(Configuration conf, java.lang.String name)
public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan)
public static java.util.List<java.lang.String> getFieldSchemaString(java.util.List<FieldSchema> fl)
public static void setMapRedWork(Configuration conf, MapredWork w, java.lang.String hiveScratchDir)
public static Path setMapWork(Configuration conf, MapWork w, java.lang.String hiveScratchDir, boolean useCache)
public static Path setReduceWork(Configuration conf, ReduceWork w, java.lang.String hiveScratchDir, boolean useCache)
public static java.lang.String serializeExpression(ExprNodeDesc expr)
public static ExprNodeDesc deserializeExpression(java.lang.String s, Configuration conf)
public static void serializePlan(java.lang.Object plan, java.io.OutputStream out)
plan
- The plan, such as QueryPlan, MapredWork, etc.out
- The stream to write to.public static <T> T deserializePlan(java.io.InputStream in)
in
- The stream to read from.public static <T> T clonePlan(T plan)
plan
- The plan.public static void serializeObject(java.lang.Object plan, java.io.OutputStream out)
public static <T> T deserializeObject(java.io.InputStream in)
public static java.lang.String getTaskId(Configuration hconf)
public static java.util.HashMap makeMap(java.lang.Object... olist)
public static java.util.Properties makeProperties(java.lang.String... olist)
public static java.util.ArrayList makeList(java.lang.Object... olist)
public static TableDesc getTableDesc(java.lang.String cols, java.lang.String colTypes)
public static PartitionDesc getPartitionDesc(Partition part) throws HiveException
HiveException
public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part) throws HiveException
HiveException
public static java.lang.String getOpTreeSkel(Operator<?> op)
public static boolean contentsEqual(java.io.InputStream is1, java.io.InputStream is2, boolean ignoreWhitespace) throws java.io.IOException
java.io.IOException
public static java.lang.String abbreviate(java.lang.String str, int max)
public static Utilities.StreamStatus readColumn(java.io.DataInput in, java.io.OutputStream out) throws java.io.IOException
java.io.IOException
public static java.io.OutputStream createCompressedStream(JobConf jc, java.io.OutputStream out) throws java.io.IOException
jc
- Job Configurationout
- Output Stream to be converted into compressed output streamjava.io.IOException
public static java.io.OutputStream createCompressedStream(JobConf jc, java.io.OutputStream out, boolean isCompressed) throws java.io.IOException
jc
- Job Configurationout
- Output Stream to be converted into compressed output streamisCompressed
- whether the output stream needs to be compressed or notjava.io.IOException
@Deprecated public static java.lang.String getFileExtension(JobConf jc, boolean isCompressed)
getFileExtension(JobConf, boolean, HiveOutputFormat)
jc
- Job ConfigurationisCompressed
- Whether the output file is compressed or notpublic static java.lang.String getFileExtension(JobConf jc, boolean isCompressed, HiveOutputFormat<?,?> hiveOutputFormat)
The property hive.output.file.extension
is used to determine
the extension - if set, it will override other logic for choosing an
extension.
jc
- Job ConfigurationisCompressed
- Whether the output file is compressed or nothiveOutputFormat
- The output format, used to detect if the format is textpublic static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file, java.lang.Class<?> keyClass, java.lang.Class<?> valClass) throws java.io.IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be createdkeyClass
- Java Class for keyvalClass
- Java Class for valuejava.io.IOException
public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs, Path file, java.lang.Class<?> keyClass, java.lang.Class<?> valClass, boolean isCompressed) throws java.io.IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be createdkeyClass
- Java Class for keyvalClass
- Java Class for valuejava.io.IOException
public static RCFile.Writer createRCFileWriter(JobConf jc, FileSystem fs, Path file, boolean isCompressed) throws java.io.IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be createdjava.io.IOException
public static java.lang.String realFile(java.lang.String newFile, Configuration conf) throws java.io.IOException
java.io.IOException
public static java.util.List<java.lang.String> mergeUniqElems(java.util.List<java.lang.String> src, java.util.List<java.lang.String> dest)
public static Path toTaskTempPath(Path orig)
public static Path toTaskTempPath(java.lang.String orig)
public static Path toTempPath(Path orig)
public static Path toTempPath(java.lang.String orig)
public static boolean isTempPath(FileStatus file)
public static void rename(FileSystem fs, Path src, Path dst) throws java.io.IOException, HiveException
fs
- the FileSystem where src and dst are on.src
- the src directorydst
- the target directoryjava.io.IOException
HiveException
public static void renameOrMoveFiles(FileSystem fs, Path src, Path dst) throws java.io.IOException, HiveException
fs
- the FileSystem where src and dst are on.src
- the src directorydst
- the target directoryjava.io.IOException
HiveException
public static java.lang.String getTaskIdFromFilename(java.lang.String filename)
filename
- filename to extract taskid frompublic static java.lang.String getPrefixedTaskIdFromFilename(java.lang.String filename)
filename
- filename to extract taskid frompublic static java.lang.String getFileNameFromDirName(java.lang.String dirName)
public static java.lang.String replaceTaskIdFromFilename(java.lang.String filename, int bucketNum)
filename
- filename to replace taskid "0_0" or "0_0.gz" by 33 to "33_0" or "33_0.gz"public static java.lang.String replaceTaskIdFromFilename(java.lang.String filename, java.lang.String fileId)
public static FileStatus[] listStatusIfExists(Path path, FileSystem fs) throws java.io.IOException
java.io.IOException
public static FileStatus[] getFileStatusRecurse(Path path, int level, FileSystem fs) throws java.io.IOException
path
- the root pathlevel
- the depth of directory should explorefs
- the file systemjava.io.IOException
public static void mvFileToFinalPath(java.lang.String specPath, Configuration hconf, boolean success, org.apache.commons.logging.Log log, DynamicPartitionCtx dpCtx, FileSinkDesc conf, Reporter reporter) throws java.io.IOException, HiveException
java.io.IOException
HiveException
public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws java.io.IOException
java.io.IOException
public static java.util.ArrayList<java.lang.String> removeTempOrDuplicateFiles(FileSystem fs, Path path, DynamicPartitionCtx dpCtx) throws java.io.IOException
java.io.IOException
public static java.util.HashMap<java.lang.String,FileStatus> removeTempOrDuplicateFiles(FileStatus[] items, FileSystem fs) throws java.io.IOException
java.io.IOException
public static java.lang.String getNameMessage(java.lang.Exception e)
public static java.lang.String getResourceFiles(Configuration conf, SessionState.ResourceType t)
public static java.lang.ClassLoader addToClassPath(java.lang.ClassLoader cloader, java.lang.String[] newPaths) throws java.lang.Exception
newPaths
- Array of classpath elementsjava.lang.Exception
public static void removeFromClassPath(java.lang.String[] pathsToRemove) throws java.lang.Exception
pathsToRemove
- Array of classpath elementsjava.lang.Exception
public static java.lang.String formatBinaryString(byte[] array, int start, int length)
public static java.util.List<java.lang.String> getColumnNamesFromSortCols(java.util.List<Order> sortCols)
public static java.util.List<java.lang.String> getColumnNamesFromFieldSchema(java.util.List<FieldSchema> partCols)
public static java.util.List<java.lang.String> getColumnNames(java.util.Properties props)
public static java.util.List<java.lang.String> getColumnTypes(java.util.Properties props)
public static void validateColumnNames(java.util.List<java.lang.String> colNames, java.util.List<java.lang.String> checkCols) throws SemanticException
SemanticException
public static int getDefaultNotificationInterval(Configuration hconf)
hconf
- public static void copyTableJobPropertiesToConf(TableDesc tbl, JobConf job)
tbl
- table descriptor from which to readjob
- configuration which receives configured propertiespublic static ContentSummary getInputSummary(Context ctx, MapWork work, PathFilter filter) throws java.io.IOException
ctx
- the hadoop job contextwork
- map reduce job planfilter
- filter to apply to the input paths before calculating sizejava.io.IOException
public static boolean isEmptyPath(JobConf job, Path dirPath, Context ctx) throws java.lang.Exception
java.lang.Exception
public static boolean isEmptyPath(JobConf job, Path dirPath) throws java.lang.Exception
java.lang.Exception
public static java.util.List<ExecDriver> getMRTasks(java.util.List<Task<? extends java.io.Serializable>> tasks)
public static java.util.List<java.util.LinkedHashMap<java.lang.String,java.lang.String>> getFullDPSpecs(Configuration conf, DynamicPartitionCtx dpCtx) throws HiveException
HiveException
public static StatsPublisher getStatsPublisher(JobConf jc)
public static java.lang.String getHashedStatsPrefix(java.lang.String statsPrefix, int maxPrefixLength)
statsPrefix
- maxPrefixLength
- public static void setColumnNameList(JobConf jobConf, Operator op)
public static void setColumnTypeList(JobConf jobConf, Operator op)
public static void validatePartSpec(Table tbl, java.util.Map<java.lang.String,java.lang.String> partSpec) throws SemanticException
SemanticException
public static java.lang.String generatePath(java.lang.String baseURI, java.lang.String dumpFilePrefix, java.lang.Byte tag, java.lang.String bigBucketFileName)
public static java.lang.String generateFileName(java.lang.Byte tag, java.lang.String bigBucketFileName)
public static java.lang.String generateTmpURI(java.lang.String baseURI, java.lang.String id)
public static java.lang.String generateTarURI(java.lang.String baseURI, java.lang.String filename)
public static java.lang.String generateTarURI(Path baseURI, java.lang.String filename)
public static java.lang.String generateTarFileName(java.lang.String name)
public static java.lang.String generatePath(Path baseURI, java.lang.String filename)
public static java.lang.String now()
public static double showTime(long time)
public static java.lang.String checkJDOPushDown(Table tab, ExprNodeDesc expr, GenericUDF parent)
tab
- The table that contains the partition columns.expr
- the partition pruning expressionparent
- parent UDF of expr if parent exists and contains a UDF; otherwise null.public static void reworkMapRedWork(Task<? extends java.io.Serializable> task, boolean reworkMapredWork, HiveConf conf) throws SemanticException
task
- reworkMapredWork
- conf
- SemanticException
public static <T> T executeWithRetry(Utilities.SQLCommand<T> cmd, java.sql.PreparedStatement stmt, int baseWindow, int maxRetries) throws java.sql.SQLException
cmd
- the SQL commandstmt
- the prepared statement of SQL.baseWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(int, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.java.sql.SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static java.sql.Connection connectWithRetry(java.lang.String connectionString, int waitWindow, int maxRetries) throws java.sql.SQLException
connectionString
- the JDBC connection string.waitWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(int, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.java.sql.SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static java.sql.PreparedStatement prepareWithRetry(java.sql.Connection conn, java.lang.String stmt, int waitWindow, int maxRetries) throws java.sql.SQLException
conn
- a JDBC connection.stmt
- the SQL statement to be prepared.waitWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(int, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.java.sql.SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static long getRandomWaitTime(int baseWindow, int failures, java.util.Random r)
baseWindow
- the base waiting window.failures
- number of failures so far.r
- a random generator.public static java.lang.String escapeSqlLike(java.lang.String key)
key
- the string that will be used for the SQL LIKE operator.public static java.lang.String formatMsecToStr(long msec)
msec
- millisecondspublic static int estimateNumberOfReducers(HiveConf conf, ContentSummary inputSummary, MapWork work, boolean finalMapRed) throws java.io.IOException
java.io.IOException
public static long getTotalInputFileSize(ContentSummary inputSummary, MapWork work, double highestSamplePercentage)
inputSummary
- work
- highestSamplePercentage
- public static long getTotalInputNumFiles(ContentSummary inputSummary, MapWork work, double highestSamplePercentage)
inputSummary
- work
- highestSamplePercentage
- public static double getHighestSamplePercentage(MapWork work)
public static java.util.List<Path> getInputPaths(JobConf job, MapWork work, java.lang.String hiveScratchDir, Context ctx) throws java.lang.Exception
job
- JobConf used to run the jobwork
- MapWork encapsulating the info about the taskhiveScratchDir
- The tmp dir used to create dummy files if neededctx
- Context objectjava.lang.Exception
public static void setInputPaths(JobConf job, java.util.List<Path> pathsToAdd)
job
- pathsToAdd
- public static void setInputAttributes(Configuration conf, MapWork mWork)
public static void createTmpDirs(Configuration conf, MapWork mWork) throws java.io.IOException
conf
- Used to get the right FileSystemmWork
- Used to find FileSinkOperatorsjava.io.IOException
public static void createTmpDirs(Configuration conf, ReduceWork rWork) throws java.io.IOException
conf
- Used to get the right FileSystemrWork
- Used to find FileSinkOperatorsjava.io.IOException
Copyright © 2012 The Apache Software Foundation