导出fsimage统计hdfs小文件
查找HDFS有哪些小文件以及统计数量
hdfs 导出fsimage文件
hdfs dfsadmin -fetchImage /data
转换为可视化数据
hdfs oiv -i /data/fsimage_0000000000930647029 -o /data/fsimage.csv -p Delimited -delimiter ","
删除数据第一行标题
$ sed -i 1d /data/fsimage.csv
把数据导入mysql
CREATE TABLE
IF
NOT EXISTS `images_info` (
`Path` VARCHAR ( 100 ),
`Replication` INT ( 20 ),
`ModificationTime` VARCHAR ( 100 ),
`AccessTime` VARCHAR ( 100 ),
`PreferredBlockSize` BIGINT ( 100 ),
`BlocksCount` INT ( 20 ),
`FileSize` BIGINT ( 100 ),
`NSQUOTA` VARCHAR ( 100 ),
`DSQUOTA` VARCHAR ( 100 ),
`Permission` VARCHAR ( 100 ),
`UserName` VARCHAR ( 100 ),
`GroupName` VARCHAR ( 100 )
) CHARSET = utf8
加载数据到mysql
load data local infile '/data/fsimage.csv' into table imgaes_info;
查找小文件
select count(Path), count(FileSize) from images_info where FileSize < 500;
导出fsimage统计hdfs小文件
https://www.hechunyu.com/archives/1698216518765