Thursday, April 7, 2011

Useful MySQL Functions


--Validate Email address

DELIMITER $$
DROP FUNCTION IF EXISTS is_valid_email $$
CREATE FUNCTION `is_valid_email`(p_email varchar(150)) RETURNS tinyint(1)
BEGIN
CASE
WHEN NOT (SELECT p_email REGEXP '^[A-Z0-9._%-]+@[A-Z0-9-]+[\.]{1}[A-Z]{2,4}$')
THEN
RETURN FALSE;
ELSE
RETURN TRUE;
END CASE;
END $$
DELIMITER ;

--Only Alphabetical characters

DELIMITER $$
DROP FUNCTION IF EXISTS is_alphabetical_string $$
CREATE FUNCTION `is_alphabetical_string`(p_string varchar(150)) RETURNS tinyint(1)
BEGIN
CASE
WHEN NOT (SELECT p_string REGEXP '^[A-Za-z]*$')
THEN
RETURN FALSE;
ELSE
RETURN TRUE;
END CASE;
END $$
DELIMITER ;

--Only Digits

DELIMITER $$
DROP FUNCTION IF EXISTS is_digit_string $$
CREATE FUNCTION `is_digit_string`(p_string varchar(150)) RETURNS tinyint(1)
BEGIN
CASE
WHEN NOT (SELECT p_string REGEXP '^[0-9]*$')
THEN
RETURN FALSE;
ELSE
RETURN TRUE;
END CASE;
END $$
DELIMITER ;

--Only Alphanumeric characters

DELIMITER $$
DROP FUNCTION IF EXISTS strip_non_alpha $$
CREATE FUNCTION `strip_non_alpha`(_dirty_string varchar(110)) RETURNS varchar(110)
BEGIN
DECLARE _length int;
DECLARE _position int;
DECLARE _current_char varchar(1);
DECLARE _clean_string varchar(110);
SET _clean_string = '';
SET _length = LENGTH(_dirty_string);
SET _position = 1;
WHILE _position <= _length DO
SET _current_char = SUBSTRING(_dirty_string, _position, 1);
IF _current_char REGEXP '[A-Za-z0-9]' THEN
SET _clean_string = CONCAT(_clean_string, _current_char);
END IF;
SET _position = _position + 1;
END WHILE;
RETURN CONCAT('', _clean_string);
END $$
DELIMITER ;

--Get a sample from a data set

DELIMITER $$
DROP PROCEDURE IF EXISTS staging.get_random_sample $$
CREATE PROCEDURE staging.get_random_sample (p_source_schema varchar(64),p_source_table varchar(64),p_target_schema varchar(64),p_target_table varchar(64),p_percentage int)
BEGIN
DECLARE v_source_num_rows int DEFAULT 0;
DECLARE v_target_num_rows int DEFAULT 0;
DECLARE v_row_num int DEFAULT 0;
DECLARE i int DEFAULT 0;
SET @select_stmt = concat('SELECT COUNT(*) INTO @v_source_num_rows',' FROM ',p_source_schema,'.',p_source_table);
prepare select_stmt from @select_stmt;
execute select_stmt;
SET @v_target_num_rows = ROUND(@v_source_num_rows*p_percentage/100);
SET @create_stmt = concat('CREATE TABLE ',p_target_schema,'.',p_target_table,' LIKE ',p_source_schema,'.',p_source_table);
prepare create_stmt from @create_stmt;
execute create_stmt;
WHILE @v_target_num_rows > i DO
SELECT FLOOR(1+RAND() * @v_source_num_rows) - 1 INTO v_row_num;
SET @insert_stmt = concat('INSERT INTO ',p_target_schema,'.',p_target_table,' SELECT * FROM ',p_source_schema,'.',p_source_table,' LIMIT ',v_row_num,'\,1');
prepare insert_stmt from @insert_stmt;
execute insert_stmt;
SET i = i + 1;
END WHILE;
END $$
DELIMITER ;


Tuesday, October 19, 2010

mod_jk instalation on ubuntu 9.4

Assuming Tomcat and Apache are installed and running

1. Install the mod_jk
sudo su -
apt-get install libapache2-mod-jk

2. Enable md_jk loading
cd /etc/apache2/mods-enabled
ln -s jk.load ../mods-available/jk.load

3. Create the mod_jk conf file
cd /etc/apache2/mods-available
vi jk.conf
# Where to find workers.properties
# Update this path to match your conf directory location
JkWorkersFile /etc/libapache2-mod-jk/workers.properties

# Where to put jk logs
# Update this path to match your logs directory location
JkLogFile /var/log/apache2/mod_jk.log

# Set the jk log level [debug/error/info]
JkLogLevel info

# Select the log format
JkLogStampFormat "[%a %b %d %H:%M:%S %Y]"

# JkOptions indicate to send SSL KEY SIZE,
JkOptions +ForwardKeySize +ForwardURICompat -ForwardDirectories

# JkRequestLogFormat set the request format
JkRequestLogFormat "%w %V %T"

# Shm log file
JkShmFile /var/log/apache2/jk-runtime-status

4.Enable mod_jk configurations
cd ../mods-enabled
ln -s jk.conf ../mods-available/jk.conf

5. Create a worker properties file
vi /etc/libapache2-mod-jk/workers.properties
workers.tomcat_home=/var/lib/tomcat6
workers.java_home=/usr/lib/jvm/java-6-sun
ps=/
worker.ajp13_worker.port=8009
worker.ajp13_worker.host=localhost
worker.ajp13_worker.type=ajp13
worker.ajp13_worker.lbfactor=1

6.Configure url forwarding from Apache to Tomcat
cd /etc/apache2/sites-enabled
vi 000-default ##Which is a symbolic link to ../sites-available/default

...
JkMount /examples/ ajp13_worker
JkMount /examples/* ajp13_worker
JkMount /docs/ ajp13_worker
JkMount /docs/* ajp13_worker
...

##This will forward any request to /examples/ and /docs/ to Tomcat

7. Configure AJP in Tomcat Server.
vi /etc/tomcat6/server.xml
##Uncomment the following line


8. Restart Apache and Tomcat
/etc/init.d/tomcat6 restart
/etc/init.d/apache2 restart

9. Test it
Open a browser
http://localhost:8080/examples
Then remove the port
http://localhost:/examples

You must see the same content, so Apache is forwawrding the request to Tomcat to show /examples content

Friday, October 8, 2010

Postgres Stream replication set up

Master
-IP: 192.168.1.1
-TCP Port: 5432
-Data directory: /var/lib/pgsql/9.0/data
-WAL directory: /var/lib/pgsql/9.0/data/wals

Slave
-IP: 192.168.1.2
-TCP Port: 5432
-Data directory: /var/lib/pgsql/9.0/data
-WAL directory: /var/lib/pgsql/9.0/data/wals

Master configuration: ($ /var/lib/pgsql/9.0/data/postgresql.conf)
-listen_addresses = '*'
-port = 5432
-archive_mode = on
-archive_command = 'cp "%p" /var/lib/pgsql/9.0/data/wals/"%f" < /dev/null'
-max_wal_senders = 1
-wal_level = hot_standby
-hot_standby = on

Authentication Configuration: ($ /var/lib/pgsql/9.0/data/pg_hba.conf)
-host replication all 10.16.35.213/23 trust

Start Up the Master
/usr/pgsql-9.0/bin/postgres -D /var/lib/pgsql/9.0/data &

Make a Base Backup for stand by from the master (Snapshot)
$ psql -c "SELECT pg_start_backup('base_backup')"
$ cd 9.0/data
$ scp -rp x postgres@10.16.35.213:/var/lib/pgsql/9.0/data
$ psql -c "SELECT pg_stop_backup()"

Slave Configuration ($ /var/lib/pgsql/9.0/data/postgresql.conf)

-listen_addresses = '*'
-port = 5432
-archive_mode = on
-archive_command = 'cp "%p" /var/lib/pgsql/9.0/data/wals/"%f" < /dev/null'
-max_wal_senders = 1
-wal_level = hot_standby
-hot_standby = on

$ rm -fr /var/lib/pgsql/9.0/data/postmaster.pid
$ rm -fr /var/lib/pgsql/9.0/data/pg_xlog/000*
$ rm -fr /var/lib/pgsql/9.0/data/pg_xlog/archive_status/*
$ rm -fr /var/lib/pgsql/9.0/data/wals/*

Recovery configuration on the slave: (/var/lib/pgsql/9.0/data/recovery.conf)
standby_mode = 'on'
primary_conninfo = 'host=192.168.1.1 port=5432 user=postgres password=postgres'
restore_command = 'cp -i /var/lib/pgsql/9.0/data/wals/%f %p < /dev/null'

Start up the slave
/usr/pgsql-9.0/bin/postgres -D /var/lib/pgsql/9.0/data &

Check the process
$ ps -fea | grep postgres
root 12121 11960 0 19:43 pts/1 00:00:00 su - postgres
postgres 12122 12121 0 19:43 pts/1 00:00:00 -bash
postgres 12823 12122 0 20:23 pts/1 00:00:00 /usr/pgsql-9.0/bin/postgres -D /var/lib/pgsql/9.0/data
postgres 12824 12823 0 20:23 ? 00:00:00 postgres: logger process
postgres 12825 12823 0 20:23 ? 00:00:00 postgres: startup process recovering 00000001000000000000000C
postgres 12828 12823 0 20:23 ? 00:00:00 postgres: wal receiver process streaming 0/C024038
postgres 12829 12823 0 20:23 ? 00:00:00 postgres: writer process
postgres 12830 12823 0 20:23 ? 00:00:00 postgres: stats collector process
postgres 12901 12122 0 20:59 pts/1 00:00:00 ps -fea
postgres 12902 12122 0 20:59 pts/1 00:00:00 grep postgres

Test it

Insert some data in the master:

$ psql
postgres=# create table t (i int);
postgres=# insert into t values(1);
postgres=# insert into t values(2);
postgres=# select * from t;
i
---
1
2
(2 rows)

Check in the Slave
$ psql
postgres=# select * from t;
i
---
1
2
(2 rows)

Friday, August 27, 2010

Transpose rows into column. Example

mysql> select * from site_parts;
+---------+-----------+------+
| site | part | qty |
+---------+-----------+------+
| Sunrise | HD500 | 50 |
| Sunrise | NIC1000 | 10 |
| Miami | HD500 | 20 |
| Miami | Monitor20 | 10 |
| Miami | HD500 | 5 |
| NY | Mouse | 40 |
+---------+-----------+------+
6 rows in set (0.00 sec)

mysql> select
site,
sum(if(part='HD500',qty,0)) as HD500,
sum(if(part='NIC1000',qty,0)) as NIC1000,
sum(if(part='Monitor20',qty,0)) as Monitor20,
sum(if(part='Mouse',qty,0)) as Mouse
from
site_parts group by site;

+---------+-------+---------+-----------+-------+
| site | HD500 | NIC1000 | Monitor20 | Mouse |
+---------+-------+---------+-----------+-------+
| Miami | 25 | 0 | 10 | 0 |
| NY | 0 | 0 | 0 | 40 |
| Sunrise | 50 | 10 | 0 | 0 |
+---------+-------+---------+-----------+-------+
3 rows in set (0.00 sec)

Monday, August 2, 2010

Merge table management using MySQL event scheduler and Stored Procedures

Normally we use cronjobs to execute scheduled tasks, but some times because of security policies we don't have access to cronjobs, this is even worse in safe environments, in those cases, it is quite possible we do not have access to the file system and the only possible access is though MySQL.

So what can we do when we need to run scheduled tasks in MySQL?.

Fortunately, there is an event scheduler in MySQL; using this feature we can execute basically any MySQL scehduled job.

In this case it is required to create merge table based on pre-define schedule that can be minutes, hours, days, months or years. Yes!, I know that now would bemuch better to use partitions but in this case this is the requirement.

To reach the gaol the following stored procedures were implemented:

1.table_management_create_new_merge: Create the merge table based on the create table statement and a template
2.table_management_drop_merge: Deletes the merge table and all Underlying MyISAM tables
3.table_management_alter_merge: Changes the table and all merge MyISAM tables Underlying
4.table_management_add_new_table_to_merge: Adds a new table to merge
5.table_management_drop_table_from_merge: Take out a MyISAM table from the merge

This is a very basic merge table management system, what is more important in this case is to add and exclude the merge tables on cheduled basis, however, I decided to implement all the necessary procedures to make easier the administration of these Merge tables.

I want o emphasize two things that made this a little more complex than initially thought:


1. MySQL does not store any part of its metadata (information_schema) the relationship between merge MyISAM tables and tables that are part of this. Stores it in a file. MRG which I have no access!


2. I was not able by a cursor or variable assign the result of "SHOW CREATE TABLE" which was the second place where it could achieve the relationship between the merge table and MyISAM tables Underlying!. If someone knows please let me know.

Ok, there was no other choice but to implement a basic data structure what I will use record the relationship between the Merge table and its Underlying MyISAM tables.

Lets go back to goal: add / exclude table of a merge table on scheduled basis.

Here is the procedure:


1. Create the data structure that stores the relationship between the merge table and the underlying mysam tables:


create database table_management;

CREATE TABLE `dbschemas` (
`schema_id` int(11) NOT NULL AUTO_INCREMENT,
`schema_name` varchar(64) CHARACTER SET utf8 DEFAULT NULL,
PRIMARY KEY (`schema_id`),
UNIQUE KEY `schema_name_UNIQUE` (`schema_name`)
) ENGINE=InnoDB;


CREATE TABLE `dbtables` (
`table_id` int(11) NOT NULL AUTO_INCREMENT,
`schema_id` int(11) NOT NULL,
`table_name` varchar(64) DEFAULT NULL,
`parent_table_id` int(11) DEFAULT NULL,
PRIMARY KEY (`table_id`,`schema_id`),
UNIQUE KEY `schema_id_table_name_UNIQUE` (`schema_id`,`table_name`),
KEY `fk_table_schema1` (`schema_id`),
KEY `fk_table_table1` (`parent_table_id`),
CONSTRAINT `fk_table_schema1` FOREIGN KEY (`schema_id`) REFERENCES `dbschemas` (`schema_id`) ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `fk_table_table1` FOREIGN KEY (`parent_table_id`) REFERENCES `dbtables` (`table_id`) ON DELETE NO ACTION ON UPDATE NO ACTION
) ENGINE=InnoDB;


2. Create the required stored procedures:


##################Create new merge table and its template MyISAM TABLE#################

drop procedure table_management_create_new_merge;
delimiter //
#The parameter create_stmt is a regular table create statement, it does not include the union neither the engine.
create procedure table_management_create_new_merge (target_schema varchar(64),create_stmt varchar(10000))
begin
declare merge_table_name varchar(64);
declare column_def varchar(10000);
declare v_merge_table_id int;
declare v_schema_id int;

select trim(substring(trim(substring(create_stmt,locate('table',create_stmt)+5)),1,locate('(',trim(substring(create_stmt,locate('table',create_stmt)+5)))-1)) into @merge_table_name;
select substring(trim(substring(create_stmt,locate('table',create_stmt)+5)),locate('(',trim(substring(create_stmt,locate('table',create_stmt)+5)))) into @column_def;

set @create_myisam_template_table_stmt=concat('create table ',target_schema,'.',@merge_table_name,'_tmpt ',@column_def,' ENGINE=MyISAM');
prepare create_myisam_template_table_stmt from @create_myisam_template_table_stmt;
execute create_myisam_template_table_stmt;

set @create_merge_table_stmt=concat('create table ',target_schema,'.',@merge_table_name,' ',@column_def,' ENGINE=MRG_MyISAM UNION=(',target_schema,'.',@merge_table_name,'_tmpt)');
prepare create_merge_table_stmt from @create_merge_table_stmt;
execute create_merge_table_stmt;

start transaction;

insert ignore into table_management.dbschemas set schema_name=target_schema;
select schema_id into v_schema_id from table_management.dbschemas where schema_name=target_schema;
insert into table_management.dbtables set schema_id=v_schema_id,table_name=@merge_table_name;
select table_id into v_merge_table_id from table_management.dbtables where schema_id=v_schema_id and table_name=@merge_table_name;
insert into table_management.dbtables set schema_id=v_schema_id,table_name=concat(@merge_table_name,'_tmpt'),parent_table_id=v_merge_table_id;

commit;
deallocate prepare create_myisam_template_table_stmt;
deallocate prepare create_merge_table_stmt;
end//
delimiter ;

#################Drop a Merge table and all its underlying MyISAM tables #########################
drop procedure table_management_drop_merge;
delimiter //
create procedure table_management_drop_merge (target_schema varchar(64),target_merge_table varchar(64))
begin
declare done varchar(5) default 'START';
declare table_name varchar(64);
declare v_table_id int;
declare v_schema_id int;
declare table_list cursor for
select c.table_name
from table_management.dbschemas a join table_management.dbtables b on a.schema_id=b.schema_id join table_management.dbtables c on b.table_id=c.parent_table_id
where a.schema_name=target_schema and b.table_name=target_merge_table;
declare continue handler for not found set done = 'END';
open table_list;
while done<>'END' do
fetch table_list into table_name;
if done<>'END' then
set @drop_table_stmt=concat('drop table ',target_schema,'.',table_name);
prepare drop_table_stmt from @drop_table_stmt;
execute drop_table_stmt;
end if;
end while;
close table_list;
set @drop_merge_table_stmt=concat('drop table ',target_schema,'.',target_merge_table);
prepare drop_merge_table_stmt from @drop_merge_table_stmt;
execute drop_merge_table_stmt;
start transaction;
select c.table_id,c.schema_id
into v_table_id,v_schema_id
from table_management.dbschemas a join table_management.dbtables b on a.schema_id=b.schema_id join table_management.dbtables c on b.table_id=c.parent_table_id
where a.schema_name=target_schema and b.table_name=target_merge_table;
delete from table_management.dbtables where table_id=v_table_id and schema_id=v_schema_id;
select a.table_id,a.schema_id
into v_table_id,v_schema_id
from table_management.dbtables a join table_management.dbschemas b on a.schema_id=b.schema_id
where b.schema_name=target_schema and a.table_name=target_merge_table;
delete from table_management.dbtables where table_id=v_table_id and schema_id=v_schema_id;
commit;
deallocate prepare drop_merge_table_stmt;
deallocate prepare drop_table_stmt;
end//
delimiter ;

#################Alter a Merge table and all its underlying MyISAM tables #########################
drop procedure table_management_alter_merge;
delimiter //
create procedure table_management_alter_merge (target_schema varchar(64),alter_stmt varchar(10000))
begin

declare done varchar(5) default 'START';
declare table_name varchar(64);
declare merge_table_name varchar(64);
declare column_def varchar(10000);
declare table_list cursor for
select b.table_name
from table_management.dbschemas a join table_management.dbtables b on a.schema_id=b.schema_id join table_management.dbtables c on b.table_id=c.parent_table_id
where a.schema_name=target_schema and b.table_name=@merge_table_name
union
select c.table_name
from table_management.dbschemas a join table_management.dbtables b on a.schema_id=b.schema_id join table_management.dbtables c on b.table_id=c.parent_table_id
where a.schema_name=target_schema and b.table_name=@merge_table_name;
declare continue handler for not found set done = 'END';

select trim(substring(trim(substring(alter_stmt,locate('table',alter_stmt)+5)),1,locate(' ',trim(substring(alter_stmt,locate('table',alter_stmt)+5)))-1)) into @merge_table_name;

select substring(trim(substring(alter_stmt,locate('table',alter_stmt)+5)),locate(' ',trim(substring(alter_stmt,locate('table',alter_stmt)+5)))) into @column_def;

open table_list;
while done<>'END' do
fetch table_list into table_name;
if done<>'END' then
set @alter_table_stmt=concat('alter table ',target_schema,'.',table_name,' ',@column_def);
prepare alter_table_stmt from @alter_table_stmt;
execute alter_table_stmt;
end if;
end while;
close table_list;
deallocate prepare alter_table_stmt;
end//
delimiter ;

#################Create a new MyISAM table and add it to the a Merge table #########################

drop procedure table_management_add_new_table_to_merge;
delimiter //
create procedure table_management_add_new_table_to_merge (target_schema varchar(64),target_merge_table varchar(64),new_table varchar(64))
begin
declare i int default 0;
declare done varchar(5) default 'START';
declare table_name varchar(64);
declare table_union varchar(10000) default '';
declare table_list cursor for
select c.table_name
from table_management.dbschemas a join table_management.dbtables b on a.schema_id=b.schema_id join table_management.dbtables c on b.table_id=c.parent_table_id
where a.schema_name=target_schema and b.table_name=target_merge_table;
declare continue handler for not found set done = 'END';
open table_list;
while done<>'END' do
fetch table_list into table_name;
if done<>'END' then
if i=0 then
set table_union=concat(target_schema,'.',table_name);
else
set table_union=concat(table_union,',',target_schema,'.',table_name);
end if;
set i=i+1;
end if;
end while;
close table_list;
set @create_table_stmt=concat('create table ',target_schema,'.',new_table,' like ',target_schema,'.',target_merge_table,'_tmpt');
prepare create_table_stmt from @create_table_stmt;
execute create_table_stmt;
set @alter_table_stmt=concat('alter table ',target_schema,'.',target_merge_table,' UNION=(',table_union,',',target_schema,'.',new_table,')');
prepare alter_table_stmt from @alter_table_stmt;
execute alter_table_stmt;
set @insert_into_dbtables_stmt=concat('insert into table_management.dbtables (schema_id,table_name,parent_table_id) select a.schema_id,\'',new_table,'\',b.table_id from table_management.dbschemas a join table_management.dbtables b on a.schema_id=b.schema_id where a.schema_name=\'',target_schema,'\' and table_name=\'',target_merge_table,'\'');
prepare insert_into_dbtables_stmt from @insert_into_dbtables_stmt;
execute insert_into_dbtables_stmt;
deallocate prepare create_table_stmt;
deallocate prepare alter_table_stmt;
deallocate prepare insert_into_dbtables_stmt;
end//
delimiter ;


#################Take a MyISAM table out of ther Merge and alter the merge accordingly #########################
drop procedure table_management_drop_table_from_merge;
delimiter //
create procedure table_management_drop_table_from_merge (target_schema varchar(64),target_merge_table varchar(64),table_to_drop varchar(64))
begin
declare i int default 0;
declare v_schema_id int;
declare v_table_id int;
declare done varchar(5) default 'START';
declare table_name varchar(64);
declare table_union varchar(10000) default '';
declare table_list cursor for
select c.table_name
from table_management.dbschemas a join table_management.dbtables b on a.schema_id=b.schema_id join table_management.dbtables c on b.table_id=c.parent_table_id
where a.schema_name=target_schema and b.table_name=target_merge_table;
declare continue handler for not found set done = 'END';
open table_list;
while done<>'END' do
fetch table_list into table_name;
if done<>'END' then
if table_name<>table_to_drop then
if i=0 then
set table_union=concat(target_schema,'.',table_name);
else
set table_union=concat(table_union,',',target_schema,'.',table_name);
end if;
set i=i+1;
end if;
end if;
end while;
close table_list;
set @alter_table_stmt=concat('alter table ',target_schema,'.',target_merge_table,' UNION=(',table_union,')');
prepare alter_table_stmt from @alter_table_stmt;
execute alter_table_stmt;
start transaction;
delete table_management.dbtables
from
table_management.dbschemas join
table_management.dbtables on table_management.dbschemas.schema_id=table_management.dbtables.schema_id
where table_management.dbschemas.schema_name=target_schema and table_management.dbtables.table_name=table_to_drop;
commit;
deallocate prepare alter_table_stmt;
end//


3. Create the initial merge and template tables:

create database secure3;
table_management_create_new_merge call ('secure3', 'create table myminutetable (id int, timestamp datetime, primary key (id ))');


4. Create a test event. For this, the event scheduler must be enable. This is disabled by default.

To enable it, do:
SET GLOBAL event_scheduler = ON;
To make the change permanent, Add the following to the my.cnf mysqld section inside the
event_scheduler = ON

To check this do a "show running processlist\G" and you should see something like this:

Id: 1
User: event_scheduler
Host: localhost
db: NULL
Command: Daemon
Time: 5
State: Waiting for next activation
Info: NULL

Now you can create the test event, in this case I am creating an event that runs every minute and adds a new table to merge.

drop event myminutetable;
delimiter //

create event myminutetable
on schedule every 1 minute
do
begin
declare datetime varchar(14) default '';
declare table_name varchar(64) default '';
select date_format(now(),'%Y%m%d%k%i') into datetime;
set @table_name=concat('myminutetable_',datetime);
call table_management_add_new_table_to_merge ('secure3','myminutetable',@table_name);
end//
delimiter ;

Verify that it is creating tables every minute:

use secure3;
show tables;

Verify that table_management is being updated

use table_management;
select * from dbtables;

Future improvements:

1. Exception and error handling
2. Parameter validation

Tuesday, October 27, 2009

Simple Hadoop Hive Derby Installation and configuration

This article shows how to install Hadoop and get hive working with centralized metadata repository based on Derby database:


For this case we are going to have 7 datanodes and 1 namenode


hadoopNN01: 192.168.1.171
hadoopDN01: 192.168.1.172
hadoopDN02: 192.168.1.173
hadoopDN03: 192.168.1.174
hadoopDN04: 192.168.1.175
hadoopDN05: 192.168.1.176
hadoopDN06: 192.168.1.177
hadoopDN07: 192.168.1.178


-Hadoop 0.19.2 (There is a new version, but I will install this one for compatibility with other tools I am planning to install (i.e. hive, pig, etc)
-Install Java JDK on each server
-Create a hadoop account on each server
-Configure SSH 


Server 1
ssh-keygen -t dsa

Server 2
$mkdir .ssh (Under the home directory) 
Server 1
scp .ssh/id_dsa.hadoopnn01:/home/hadoop/.ssh/authorized_keys2
Server 2
$chmod 700 .ssh 
$chmod 600 .ssh/authorized_keys2


Do the same for all servers in order to guarantee can be sshed between them without password




Unpacked it under /user/local on each node



Set the following variables, on each node:

export HADOOP_HOME=/usr/local/hadoop-0.19.2
export JAVA_HOME=/usr/local/src/jdk1.6.0_12
export PATH=$PATH:$HADOOP_HOME/bin:$JAVA_HOME/bin


Edit conf/hadoop-site.xml (This file must be in all nodes)








 
    fs.default.name
    hdfs://192.168.1.171
 
 
    dfs.data.dir
    /usr/local/hadoop-0.19.2/hdfs/data
    true
 
 

    dfs.name.dir
    /usr/local/hadoop-0.19.2/hdfs/name
    true
 
 
    hadoop.tmp.dir
    /usr/local/hadoop-0.19.2/tmp
    true
 
 
    mapred.system.dir
    /usr/local/hadoop-0.19.2/mapred/system
    true
 
 
    dfs.replication
    3
 
 
    mapred.job.tracker
    192.168.1.171:8021
 




Copy this files to all nodes



Create the directory /usr/local/hadoop-0.19.2/hdfs/data directory on each data node

Create the directory /usr/local/hadoop-0.19.2/hdfs/name directory in the name node


edit conf/slaves and add all the data nodes



192.168.1.172
192.168.1.173
192.168.1.174
192.168.1.175
192.168.1.176
192.168.1.177
192.168.1.178




On the Name node: start the cluster and mapreduce



$bin/start-dfs.sh

$bin/start-mapred.sh


Hive comes with this distribution,,,, but we need to install derby to have a centralized metadata repository




Untar the file in /usr/local/hadoop-0.19.2


Set the environment variables:



$export DERBY_INSTALL=/usr/local/hadoop-0.19.2/db-derby-10.5.1.1-bin
$export DERBY_HOME=/usr/local/hadoop-0.19.2/db-derby-10.5.1.1-bin
$export HADOOP=/usr/local/hadoop-0.19.2/bin/hadoop




$cd /usr/local/hadoop-0.19.2/db-derby-10.5.1.1-bin/data


Start derby


$nohup /usr/local/db-derby-10.5.1.1-bin/bin/startNetworkServer -h 0.0.0.0 &



CP derbyclient.jar and derbytools.jar to the lib's hive directory



$cp /usr/local/db-derby-10.5.1.3-bin/lib/derbyclient.jar /usr/local/hadoop-0.19.2/contrib/hive/lib

$cp /usr/local/db-derby-10.5.1.3-bin/lib/derbytools.jar /usr/local/hadoop-0.19.2/contrib/hive/lib

Edit the files /usr/local/hadoop-0.19.2/contrib/hive/conf/hive-default.xml and /usr/local/hadoop-0.19.2/contrib/hive/conf/jpox.properties

hive-default.xml:












  hadoop.bin.path
  ${user.dir}/../../../bin/hadoop
  
  Path to hadoop binary. Assumes that by default we are executing from hive



  hadoop.config.dir
  ${user.dir}/../../../conf
  
  Path to hadoop configuration. Again assumes that by default we are executing from hive/




  hive.exec.scratchdir
  /tmp/hive-${user.name}
  Scratch space for Hive jobs



  hive.metastore.local
  true
  controls whether to connect to remove metastore server or open a new metastore server in Hive Client JVM



  javax.jdo.option.ConnectionURL
  jdbc:derby://localhost:1527/metastore_db;create=true
  JDBC connect string for a JDBC metastore



  javax.jdo.option.ConnectionDriverName
  org.apache.derby.jdbc.ClientDriver
  Driver class name for a JDBC metastore



  hive.metastore.uris
  thift://
  Comma separated list of URIs of metastore servers. The first server that can be connected to will be used.



  hive.metastore.metadb.dir
  file:///var/metastore/metadb/
  The location of filestore metadata base dir
  hive.metastore.uris
  file:///var/metastore/metadb/
  
  hive.metastore.warehouse.dir
  /user/hive/warehouse
  location of default database for the warehouse
  hive.metastore.connect.retries
  5
  Number of retries while opening a connection to metastore
  hive.metastore.rawstore.impl
  org.apache.hadoop.hive.metastore.ObjectStore
  Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database
/usr/local/hadoop-0.19.2/contrib/hive/conf/jpox.properties:

javax.jdo.PersistenceManagerFactoryClass=org.jpox.PersistenceManagerFactoryImpl
org.jpox.autoCreateSchema=false
org.jpox.validateTables=false
org.jpox.validateColumns=false
org.jpox.validateConstraints=false
org.jpox.storeManagerType=rdbms
org.jpox.autoCreateSchema=true
org.jpox.autoStartMechanismMode=checked
org.jpox.transactionIsolation=read_committed
javax.jdo.option.DetachAllOnCommit=true
javax.jdo.option.NontransactionalRead=true
javax.jdo.option.ConnectionDriverName=org.apache.derby.jdbc.ClientDriver
javax.jdo.option.ConnectionURL=jdbc:derby://localhost:1527/metastore_db;create=true
javax.jdo.option.ConnectionUserName=APP
javax.jdo.option.ConnectionPassword=mine
org.jpox.cache.level2=true
org.jpox.cache.level2.type=SOFT
Run hive
/usr/local/hadoop-0.19.2/contrib/hive/bin/hive
Doing this all the metadata definitions are going to be stored in a centralized repository based on Derby database.









Friday, October 9, 2009

How to connect to mysql using C/C++

Pre-requisites


-gcc must be installed
-mysql-dev must be installed


Procedure
-Create a user with access to the database/table you need to access


grant all privileges on test.* to test identified by 'mypassword';


-Create a table and populate it:


CREATE TABLE `mytable` (
  `id` int(11) NOT NULL DEFAULT '0',
  `name` varchar(30) DEFAULT NULL,
  PRIMARY KEY (`id`)
)



insert into mytable values (1,'Luis');
insert into mytable values (2,'Scott');
insert into mytable values (3,'Rob');


-Write the C program: 



#include <mysql.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>


main() {
   MYSQL *conn;
   MYSQL_RES *res;
   MYSQL_ROW row;


   char *server = "127.0.0.1";
   char *user = "test";
   char *password = "mypassword";
   char *database = "test";
   char query[] = "select * from mytable";
   conn = mysql_init(NULL);


   /* Connect to database */
   if (!mysql_real_connect(conn, server,
      user, password, database, 0, NULL, 0)) {
      fprintf(stderr, "%s\n", mysql_error(conn));
      exit(0);
    }


   /* Execute the query */
   if (mysql_query(conn, query)) {
       fprintf(stderr, "%s\n", mysql_error(conn));
       exit(0);
   }


   /* send SQL query */
   res = mysql_use_result(conn);


   /* output fields 1 and 2 of each row */
   while ((row = mysql_fetch_row(res)) != NULL)
      printf("%s %s \n", row[0],row[1]);


   /* Release memory used to store results and close connection */
   mysql_free_result(res);
   mysql_close(conn);
}


-Compile it
$ gcc -c `mysql_config --cflags` mysql_connection.c
$ gcc -o mysql_connection  mysql_connection.o `mysql_config --libs`


-Run it


$ ./mysql_connection
1 Luis
2 Scott
3 Rob