ページスクレープしたテキストをデータベースに登録する為、seleniumやsoupを使えば、とったテキストが整ってる場合は、自分のプログラムに関数を入れ処理出来ますが、コピペしてエクセルに貼り付けてSQLにしないと出来ないような整理されてないソースだったので、MySQLのUDFで作ってみました。
CREATE DEFINER=`root`@`localhost` FUNCTION `fc_decLL_from_timeLL`( `srctxt` VARCHAR(20) ) RETURNS double LANGUAGE SQL NOT DETERMINISTIC CONTAINS SQL SQL SECURITY DEFINER COMMENT '時刻経緯度十進変換関数' BEGIN DECLARE resLL DOUBLE DEFAULT 0; -- decimaled return value DECLARE pos_comma TINYINT; -- chart index of . DECLARE txt_int_part VARCHAR(7); -- always 7char or 6char DECLARE txt_dec_part VARCHAR(2); -- decimal part text DECLARE chr_nesw CHAR(1); -- north or east or south or west DECLARE txt_int VARCHAR(3); -- 2 char or 3 char DECLARE txt_min CHAR(2); -- always 2 char DECLARE txt_sec CHAR(2); -- always 2 char SET pos_comma = INSTR(srctxt, '.'); SET txt_int_part = LEFT(srctxt, pos_comma - 1); SET txt_dec_part = SUBSTR(srctxt, pos_comma + 1, pos_comma + 4); SET txt_dec_part = TRIM('N' FROM txt_dec_part); SET txt_dec_part = TRIM('S' FROM txt_dec_part); SET txt_dec_part = TRIM('W' FROM txt_dec_part); SET txt_dec_part = TRIM('E' FROM txt_dec_part); -- 整数部分、分の切り取り IF LENGTH(txt_int_part) = 5 THEN SET txt_int = LEFT(txt_int_part, 1); SET txt_min = MID(txt_int_part, 2, 2); ELSEIF LENGTH(txt_int_part) = 6 THEN SET txt_int = LEFT(txt_int_part, 2); SET txt_min = MID(txt_int_part, 3, 2); ELSEIF LENGTH(txt_int_part) = 7 THEN SET txt_int = LEFT(txt_int_part, 3); SET txt_min = MID(txt_int_part, 4, 2); END IF; -- 秒は常に右2文字 SET txt_sec = RIGHT(txt_int_part, 2); SET chr_nesw = RIGHT(srctxt, 1); SET resLL = CAST(txt_int AS UNSIGNED) + CAST(txt_min AS UNSIGNED) / 60 + CAST(txt_sec AS UNSIGNED) / 3600 + CAST(txt_dec_part AS UNSIGNED) / 360000; -- 南半球、西半球の場合 IF (chr_nesw = 'S' OR chr_nesw = 'W') THEN SET resLL = resLL * -1; END IF; -- debug -- INSERT INTO tst (fld0, fld1) VALUES(txt_int_part, txt_dec_part); -- INSERT INTO tst (fld0, fld1, fld2) VALUES(txt_int, txt_min, txt_sec); RETURN resLL; END
< テキストソース例 >
< Excel貼付例 >