当前位置: 首页 > ai >正文

Spark 之 like 表达式

LikeSimplification 会做优化

/*** Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition.* For example, when the expression is just checking to see if a string starts with a given* pattern.*/
object LikeSimplification extends Rule[LogicalPlan] with PredicateHelper {// if guards below protect from escapes on trailing %.// Cases like "something\%" are not optimized, but this does not affect correctness.private val startsWith = "([^_%]+)%".rprivate val endsWith = "%([^_%]+)".rprivate val startsAndEndsWith = "([^_%]+)%([^_%]+)".rprivate val contains = "%([^_%]+)%".rprivate val equalTo = "([^_%]*)".rprivate def simplifyLike(input: Expression, pattern: String, escapeChar: Char = '\\'): Option[Expression] = {if (pattern.contains(escapeChar)) {// There are three different situations when pattern containing escapeChar:// 1. pattern contains invalid escape sequence, e.g. 'm\aca'// 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'// 3. pattern contains escaped escape character, e.g. 'ma\\ca'// Although there are patterns can be optimized if we handle the escape first, we just// skip this rule if pattern contains any escapeChar for simplicity.None} else {pattern match {case startsWith(prefix) =>Some(StartsWith(input, Literal(prefix)))case endsWith(postfix) =>Some(EndsWith(input, Literal(postfix)))// 'a%a' pattern is basically same with 'a%' && '%a'.// However, the additional `Length` condition is required to prevent 'a' match 'a%a'.case startsAndEndsWith(prefix, postfix) =>Some(And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))))case contains(infix) =>Some(Contains(input, Literal(infix)))case equalTo(str) =>Some(EqualTo(input, Literal(str)))case _ => None}}}private def simplifyMultiLike(child: Expression, patterns: Seq[UTF8String], multi: MultiLikeBase): Expression = {val (remainPatternMap, replacementMap) =patterns.map { p =>p -> Option(p).flatMap(p => simplifyLike(child, p.toString))}.partition(_._2.isEmpty)val remainPatterns = remainPatternMap.map(_._1)val replacements = replacementMap.map(_._2.get)if (replacements.isEmpty) {multi} else {multi match {case l: LikeAll =>val and = buildBalancedPredicate(replacements, And)if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else andcase l: NotLikeAll =>val and = buildBalancedPredicate(replacements.map(Not(_)), And)if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else andcase l: LikeAny =>val or = buildBalancedPredicate(replacements, Or)if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else orcase l: NotLikeAny =>val or = buildBalancedPredicate(replacements.map(Not(_)), Or)if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else or}}}def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(_.containsPattern(LIKE_FAMLIY), ruleId) {case l @ Like(input, Literal(pattern, StringType), escapeChar) =>if (pattern == null) {// If pattern is null, return null value directly, since "col like null" == null.Literal(null, BooleanType)} else {simplifyLike(input, pattern.toString, escapeChar).getOrElse(l)}case l @ LikeAll(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ NotLikeAll(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ LikeAny(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)case l @ NotLikeAny(child, patterns) if CollapseProject.isCheap(child) =>simplifyMultiLike(child, patterns, l)}
}
测试
  test("test data, force apply AQE") {withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {val df = sql("SELECT * FROM testData where value not like '%HotFocus%'")df.showdf.printSchema()}}

在这里插入图片描述

  test("test data like, force apply AQE") {withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {val df = sql("SELECT * FROM testData where value not like '%%HotFocus%%'")df.showdf.printSchema()}}

在这里插入图片描述

http://www.xdnf.cn/news/15324.html

相关文章:

  • uni-app开发的页面跳转全局加载中
  • QT——信号与槽
  • git 访问 github
  • 《恋与深空》中黑白羽毛是谁的代表物?
  • python+Request提取cookie
  • ubuntu22.04下配置qt5.15.17开发环境
  • Elasticsearch9.x核心架构概述
  • 机器学习、深度学习、神经网络之间的关系
  • 多租户云环境下的隔离性保障:虚拟化、容器、安全组如何协同防护?
  • 高德开放平台携手阿里云,面向开发者推出地图服务产品MCP Server
  • Redis技术笔记-主从复制、哨兵与持久化实战指南
  • 工业场合需要千变万化的模拟信号,如何获取?
  • Servlet基础
  • priority_queue的使用和模拟实现以及仿函数
  • FatJar打包和FatJar启动配置文件修改。
  • 对偶原理与蕴含定理
  • [论文阅读] 人工智能 + 软件工程 | 用大语言模型+排名机制,让代码评论自动更新更靠谱
  • Ubuntu22.04 python环境管理
  • 深度解析:htmlspecialchars 与 nl2br 结合使用的前后端协作之道,大学毕业论文——仙盟创梦IDE
  • nginx:SSL_CTX_use_PrivateKey failed
  • 【HTTP版本演变】
  • Python 数据建模与分析项目实战预备 Day5 - 模型训练与评估
  • 九、官方人格提示词汇总(中-1)
  • (LeetCode 每日一题) 1290. 二进制链表转整数 (链表+二进制)
  • Kafka 时间轮深度解析:如何O(1)处理定时任务
  • 前端docx库实现将html页面导出word
  • 【第一章编辑器开发基础第二节编辑器布局_3间距控制(4/4)】
  • Java 大视界 -- 基于 Java 的大数据可视化在城市地下管网管理与风险预警中的应用
  • 显示器核心三要素详解:刷新率、分辨率、色深
  • SpringBoot-26-企业云端开发实践之Vue框架状态管理VueX和数据模拟MockJS