一道比较难的sql题,筛选出重复字段的行数
select * from 导入数据表;
id city_column
1 北京,上海,广州
2 上海,上海,深圳
3 北京,杭州,北京
4 上海,广州,深圳
select substring_index(khmc,',',1), * from 导入数据表
truncate table 导入数据表
select count(distinct khmc) from 导入数据表;
select count(*)
from 导入数据表
where length(khmc)-length(replace(khmc),',','') +1
select trim(substring_index(substring_index(khmc,',',n),',',-1)), * from 导入数据表
select length(khmc), length(khmc)-length(REGEXP_REPLACE(khmc,',','')) +1 from 导入数据表
-----
select xh, city, count(*)
from(
select /*+ MAPJOIN ( t1 ) */
xh,
trim(substring_index(substring_index(khmc,',',n),',',-1)) as city
from 导入数据表 t1
join
(select 1 as n union select 2 union select 3) numbers
on numbers.n <= length(t1.khmc)-length(REGEXP_REPLACE(t1.khmc,',','')) +1
) group by xh, city
having count(*) > 1
select sum(cnt) from
(
select count(distinct xh) as cnt
from(
select /*+ MAPJOIN ( t1 ) */
xh,
trim(substring_index(substring_index(khmc,',',n),',',-1)) as city
from 导入数据表 t1
join
(select 1 as n union select 2 union select 3) numbers
on numbers.n <= length(t1.khmc)-length(REGEXP_REPLACE(t1.khmc,',','')) +1
) group by xh, city
having count(*) > 1