前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >ETL (Extract-Transform-Load) with Kiba(3)

ETL (Extract-Transform-Load) with Kiba(3)

作者头像
franket
发布2021-10-18 11:57:41
3710
发布2021-10-18 11:57:41
举报
文章被收录于专栏:技术杂记

转化日期

加入解析数值的类 ParseFrenchDate ,并定义处理逻辑

代码语言:javascript
复制
[root@h102 kiba]# vim common.rb 
[root@h102 kiba]# cat common.rb 
require 'csv'

class CsvSource
  def initialize(file, options)
    @file = file
    @options = options
  end
  
  def each
    CSV.foreach(@file, @options) do |row|
      yield row.to_hash
    end
  end
end


require 'awesome_print'

def show_me
  transform do |row|
    ap row
    row # always return the row to keep it in the pipeline
  end
end


class ParseFrenchFloat
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = Float(row[@from].gsub(',', '.'))
    row
  end
end


class ParseFrenchDate
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s
    row
  end
end
[root@h102 kiba]# vim convert-csv.etl 
[root@h102 kiba]# cat convert-csv.etl 
require_relative 'common'

# read from source CSV file
source CsvSource, 'commandes.csv', col_sep: ';', headers: true, header_converters: :symbol

# Parse the numbers
transform ParseFrenchFloat, from: :montant_eur, to: :amount_eur

#Reformat the dates
transform ParseFrenchDate, from: :date_facture, to: :invoice_date

# show details of row contents
show_me
[root@h102 kiba]# bundle exec kiba convert-csv.etl
{
       :date_facture => "7/3/2015",
        :montant_eur => "10,96",
    :numero_commande => "FA1986",
         :amount_eur => 10.96,
       :invoice_date => "2015-03-07"
}
{
       :date_facture => "7/3/2015",
        :montant_eur => "85,11",
    :numero_commande => "FA1987",
         :amount_eur => 85.11,
       :invoice_date => "2015-03-07"
}
{
       :date_facture => "8/3/2015",
        :montant_eur => "6,41",
    :numero_commande => "FA1988",
         :amount_eur => 6.41,
       :invoice_date => "2015-03-08"
}
[root@h102 kiba]# 

其中最主要的就是 row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s

它的意思就是对 from 字段(或 Key) 指向的值进行处理,将其中的值以 '%d/%m/%Y' 模式解析成日期 ,然后转化为字符串格式,然后赋予给 to 字段,这个字段是新字段,在 row hash 中添加入新的 KV 对

运行的结果正如预期


对列进行重命名

加入对列进行重命名的类 RenameField ,并定义处理逻辑

代码语言:javascript
复制
[root@h102 kiba]# vim common.rb 
[root@h102 kiba]# cat common.rb 
require 'csv'

class CsvSource
  def initialize(file, options)
    @file = file
    @options = options
  end
  
  def each
    CSV.foreach(@file, @options) do |row|
      yield row.to_hash
    end
  end
end


require 'awesome_print'

def show_me
  transform do |row|
    ap row
    row # always return the row to keep it in the pipeline
  end
end


class ParseFrenchFloat
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = Float(row[@from].gsub(',', '.'))
    row
  end
end


class ParseFrenchDate
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s
    row
  end
end


class RenameField
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = row.delete(@from)
    row
  end
end
[root@h102 kiba]# vim convert-csv.etl 
[root@h102 kiba]# cat convert-csv.etl 
require_relative 'common'

# read from source CSV file
source CsvSource, 'commandes.csv', col_sep: ';', headers: true, header_converters: :symbol

# Parse the numbers
transform ParseFrenchFloat, from: :montant_eur, to: :amount_eur

#Reformat the dates
transform ParseFrenchDate, from: :date_facture, to: :invoice_date

#Rename the remaining column
transform RenameField, from: :numero_commande, to: :invoice_number

# show details of row contents
show_me
[root@h102 kiba]# bundle exec kiba convert-csv.etl
{
      :date_facture => "7/3/2015",
       :montant_eur => "10,96",
        :amount_eur => 10.96,
      :invoice_date => "2015-03-07",
    :invoice_number => "FA1986"
}
{
      :date_facture => "7/3/2015",
       :montant_eur => "85,11",
        :amount_eur => 85.11,
      :invoice_date => "2015-03-07",
    :invoice_number => "FA1987"
}
{
      :date_facture => "8/3/2015",
       :montant_eur => "6,41",
        :amount_eur => 6.41,
      :invoice_date => "2015-03-08",
    :invoice_number => "FA1988"
}
[root@h102 kiba]# 

本文系转载,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文系转载前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 转化日期
  • 对列进行重命名
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档