当前位置：首页 > news >正文

一道比较难的sql题，筛选出重复字段的行数

news 2025/8/31 7:05:25

select * from 导入数据表;

id   city_column
1   北京,上海,广州
2   上海,上海,深圳
3   北京,杭州,北京
4   上海,广州,深圳

select substring_index(khmc,',',1), * from 导入数据表

truncate table 导入数据表

select count(distinct khmc) from 导入数据表;

select count(*)
from 导入数据表
where length(khmc)-length(replace(khmc),',','') +1

select trim(substring_index(substring_index(khmc,',',n),',',-1)), * from 导入数据表
select length(khmc), length(khmc)-length(REGEXP_REPLACE(khmc,',','')) +1 from 导入数据表

-----

select xh, city, count(*)
from(
select /*+ MAPJOIN ( t1 ) */
xh,
trim(substring_index(substring_index(khmc,',',n),',',-1)) as city
from 导入数据表 t1
join
(select 1 as n union select 2 union select 3) numbers
on numbers.n <= length(t1.khmc)-length(REGEXP_REPLACE(t1.khmc,',','')) +1
) group by xh, city
having count(*) > 1

select sum(cnt) from
(
select count(distinct xh) as cnt
from(
select /*+ MAPJOIN ( t1 ) */
xh,
trim(substring_index(substring_index(khmc,',',n),',',-1)) as city
from 导入数据表 t1
join
(select 1 as n union select 2 union select 3) numbers
on numbers.n <= length(t1.khmc)-length(REGEXP_REPLACE(t1.khmc,',','')) +1
) group by xh, city
having count(*) > 1