Script Apache Pig (Hadoop) + UDF Ruby

# test.rb
require
'pigudf'
require
'java'
class Myudfs < PigUdf
outputSchema
"word:chararray"
def concat *input
input
.compact.inject(:+)
end
end
# test.pig
register ./test.rb using jruby as myfuncs;
t
= LOAD 'test.txt' USING PigStorage(',') AS (a:chararray, b:chararray);
v
= FOREACH t GENERATE myfuncs.concat(a,b);
STORE v INTO
'output';
# test.txt
my, phone
any
, home
$ brew install pig # OS/X
$ pig
-x local test.pig
$ output
/part-m-00000

Aproveitar!