本文通过简单易懂的业务场景,旨在提高大家的 SQL 水平。把文中所有的 SQL 全理解了,工作中遇到的各种 SQL 场景和面试中遇到的各种 SQL 问题都能灵活应对。文中所有的 SQL 都支持 Hive 语法,学会了 Hive 的 SQL,那么 MySQL 的 SQL 题也都可以 cover 住,下面的题目难度从简单到困难的都有。笔者在之前练习 SQL 过程中,发现了网上一些比较好的 SQL 题目,但是苦于好多博客提供的 SQL 可读性、规范性、执行效率并不高,因此才有了今天这篇博文,后续如果发现有可优化的的 SQL,笔者会持续更新本博文。
# 数据表介绍
# 学生表
create table study.student  | |
( | |
student_id string ,-- 学生编号  | |
name string ,-- 学生姓名  | |
birthday string ,-- 学生生日  | |
    sex        string -- 学生性别 | |
) | |
row format delimited fields terminated by '\t';  | 
# 教师表
create table if not exists study.teacher  | |
( | |
teacher_id string ,-- 教师编号  | |
    name       string -- 教师姓名 | |
) | |
row format delimited fields terminated by '\t';  | 
# 课程表
create table if not exists study.course  | |
( | |
course_id string ,-- 课程编号  | |
name string ,-- 课程名  | |
    teacher_id string -- 课程对应的教师编号 | |
) | |
row format delimited fields terminated by '\t';  | 
# 成绩表
create table study.score  | |
( | |
student_id string ,-- 学生编号  | |
course_id string ,-- 课程编号  | |
score int -- 对应的成绩  | |
) | |
row format delimited fields terminated by '\t';  | 
# 答题时请注意:
- 这里认为课程可能是选修课,学生不一定学了所有课程
 - 成绩表里的课程都应该在课程表里存在,而且每门课程都应该有对应的教师,且在教师表里可以找到
 - 成绩表里的学生也应该在学生表里存在
 - 下面的导入数据也是随机写的,读者也可以自己随机制造生成数据导入
 
# 导入数据
# 学生表
insert overwrite table study.student VALUES  | |
('01' , '赵雷' , '1990-01-01' , '男'),  | |
('02' , '钱电' , '1990-12-21' , '男'),  | |
('03' , '孙风' , '1990-12-20' , '男'),  | |
('04' , '李云' , '1990-12-06' , '男'),  | |
('05' , '周梅' , '1991-12-01' , '女'),  | |
('06' , '吴兰' , '1992-01-01' , '女'),  | |
('07' , '郑竹' , '1989-01-01' , '女'),  | |
('09' , '张三' , '2017-12-20' , '女'),  | |
('10' , '李四' , '2017-12-25' , '女'),  | |
('11' , '李四' , '2012-06-06' , '女'),  | |
('12' , '赵六' , '2013-06-13' , '女'),  | |
('13' , '孙七' , '2014-06-01' , '女');  | 
# 教师表
insert overwrite table study.teacher VALUES  | |
('01' , '张三'),  | |
('02' , '李四'),  | |
('03' , '王五');  | 
# 课程表
insert overwrite table study.course VALUES  | |
('01' , '语文' , '02'),  | |
('02' , '数学' , '01'),  | |
('03' , '英语' , '03');  | 
# 成绩表
insert overwrite table study.score VALUES  | |
('01' , '01' , 80),  | |
('01' , '02' , 90),  | |
('01' , '03' , 99),  | |
('02' , '01' , 70),  | |
('02' , '02' , 60),  | |
('02' , '03' , 80),  | |
('03' , '01' , 80),  | |
('03' , '02' , 80),  | |
('03' , '03' , 80),  | |
('04' , '01' , 50),  | |
('04' , '02' , 30),  | |
('04' , '03' , 20),  | |
('05' , '01' , 76),  | |
('05' , '02' , 87),  | |
('06' , '01' , 31),  | |
('06' , '03' , 34),  | |
('07' , '02' , 89),  | |
('07' , '03' , 98);  | 
# 练习题
- 查询所有教师的全部信息(教师编号和姓名)
 - 输出所有学生中男生的全部信息
 - 查询所有学生中男生的全部信息,按照生日排降序
 - 查询所有学生的全部信息,先按照性别排序,再按照生日排降序
 - 求出学生总数
 - 查询学生中男生、女生人数
 - 参加考试的学生中,查出每个学生的学生编号、选了几门课
 - 检索至少选修三门课程的学生学号
 - 查询存在不及格的课程编号
 - 输出所有课程的课程编号、课程名、对应的教师姓名
 - 求每门课程的学生人数及平均分,输出课程编号、对应学生人数、平均分
 - 求每门课程的学生人数及平均分,输出课程名称、对应学生人数、平均分
 - 查询同时学习 "01" 课程和 "02" 课程的学生编号及 01 和 02 课程分数
 - 查询 "01" 课程比 "02" 课程成绩高的学生编号及 01 和 02 课程分数
 - 查询 "01" 课程比 "02" 课程成绩高的学生姓名及 01 和 02 课程分数
 - 查询选择了 "01" 课程但没选择 "02" 课程的学生姓名
 - 查询学过 ' 张三 ' 老师课程的所有同学姓名、生日、性别
 - 查询同时学习 "01"、"02" 课程学生的学生编号以及 "01" 和 "02" 课程成绩
 - 查询学习 "01" 课程但没有学习 "02" 课程学生的学生编号以及 "01" 课程成绩
 - 查询学习 "02" 课程但没有学习 "01" 课程学生的学生编号以及 "02" 课程成绩
 - 查询选课的同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分
 - 查询考试平均分大于 60 分同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分,按照科目数排降序、科目数相同按照分数排降序
 - 检索 "01" 课程分数小于 60 分的学生信息及 "01" 课程分数,按照分数排降序
 - 查询两门及其以上不及格课程的同学的姓名及其平均成绩
 - 查询没有学全所有课程的同学的编号 (包含无选课的同学)
 - 查询 1990 年出生的学生名单
 - 查询名字中含有「风」字的学生信息
 - 查询「李」姓老师的数量
 - 查询至少有两门课与学号为 "01" 的学生所学相同的学生 id
 - 查询选修了全部课程的学生 id 的姓名和姓名
 - 查询和 "01" 号的同学学习的课程完全相同的其他同学的学生 id
 - 统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[60-0] 及所占百分比
 - 查询学生的总成绩,并进行排名
 - 查询各科成绩前三名的记录
 - 查询出只选修两门课程的学生学号和姓名
 - 查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列
 - 查询平均成绩大于等于 85 的所有学生的学号、姓名和平均成绩
 - 查询课程名称为「数学」,且分数低于 60 的学生姓名和分数
 - 查询课程编号为 01 且课程成绩在 80 分及以上的学生的学号和姓名
 - 查询选修「张三」老师所授课程的学生中,成绩最高的学生编号、课程编号及其成绩
 - 统计每门课程的学生选修人数(超过 5 人的课程才统计)。
 - 查询该学生有不同课程但成绩相同的学生编号、课程编号、学生成绩
 
# 答案(请先独立思考后,再参考答案)
- 查询所有教师的全部信息(教师编号和姓名)
 
select *  | |
from teacher;  | 
- 输出所有学生中男生的全部信息
 
select *  | |
from student | |
where sex = '男';  | 
- 查询所有学生中男生的全部信息,按照生日排降序
 
select *  | |
from student | |
where sex='男'  | |
order by birthday desc;  | 
- 查询所有学生的全部信息,先按照性别排序,再按照生日排降序
 
select *  | |
from student | |
order by sex,birthday desc ;  | 
- 求出学生总数
 
select count(*)  | |
from student;  | 
- 查询学生中男生、女生人数
 
select sex, count(*)  | |
from student | |
group by sex;  | 
- 参加考试的学生中,查出每个学生的学生编号、选了几门课
 
select student_id, count(course_id)  | |
from score | |
group by student_id;  | 
- 检索至少选修三门课程的学生学号
 
select student_id, count(course_id) as course_ct  | |
from score | |
group by student_id  | |
having course_ct >= 3;  | 
- 查询存在不及格的课程编号
 
1. 写法一 | |
select distinct course_id, score  | |
from score | |
where score < 60;  | |
2. 写法二 | |
select course_id | |
from score | |
where score<60  | |
group by course_id;  | 
- 输出所有课程的课程编号、课程名、对应的教师姓名
 
1. 写法一 | |
select c.course_id,c.name,t.name  | |
from | |
course c join teacher t on c.teacher_id = t.teacher_id;  | |
2. 写法二 | |
select course_id, a.name as course_name, b.name as teacher_name  | |
from (select course_id, name, teacher_id from study.course) a join (  | |
select teacher_id, name  | |
from study.teacher ) b  | |
on a.teacher_id = b.teacher_id;  | 
- 求每门课程的学生人数及平均分,输出课程编号、对应学生人数、平均分
 
select course_id,count(student_id),avg(score)  | |
from score | |
group by course_id;  | 
- 求每门课程的学生人数及平均分,输出课程名称、对应学生人数、平均分
 
select c.name, count(student_id), avg(score)  | |
from course c left join score s on s.course_id = c.course_id  | |
group by c.name;  | 
- 查询同时学习 "01" 课程和 "02" 课程的学生编号及 01 和 02 课程分数
 
1. 写法一 | |
select s1.student_id,s1.score,s2.score  | |
from score s1 | |
left join score s2 on s1.student_id = s2.student_id  | |
where s1.course_id = '01' and s2.course_id= '02';  | |
2. 写法二 | |
select a.student_id, a.score as score_01, b.score as score_02  | |
from (select * from score where course_id = '01') a  | |
join (  | |
select *  | |
    from score | |
where course_id = '02') b  | |
on a.student_id = b.student_id;  | 
- 查询 "01" 课程比 "02" 课程成绩高的学生编号及 01 和 02 课程分数
 
1. 写法一 | |
select s1.student_id, s1.score, s2.score  | |
from score s1 | |
left join score s2 on s1.student_id = s2.student_id  | |
where s1.course_id = '01'  | |
and s2.course_id = '02'  | |
and s1.score > s2.score;  | |
2. 写法二 | |
select a.student_id, a.score as score_01, b.score as score_02  | |
from (select * from score where course_id = '01') a  | |
join (  | |
select *  | |
    from score | |
where course_id = '02') b  | |
on a.student_id = b.student_id  | |
where a.score > b.score;  | 
- 查询 "01" 课程比 "02" 课程成绩高的学生姓名及 01 和 02 课程分数
 
select s.name, a.score as score_01, b.score as score_02  | |
from (select * from score where course_id = '01') a  | |
join (  | |
select *  | |
    from score | |
where course_id = '02') b  | |
         join student s | |
on a.student_id = b.student_id and a.student_id = s.student_id  | |
where a.score > b.score;  | 
- 连续三道题都是有关联的,前两道题给这道题做铺垫,所以当我们拿到一个向这种比较复杂的需求时,可以进行拆分需求,先拆分成 1、在做 2,最后达到完整的需求 ------ 查询选择了 "01" 课程但没选择 "02" 课程的学生姓名
 
1. 找出选择了 "01"课程但没选择 "02"课程的学生编号  | |
select a.student_id  | |
from (select * from score where course_id = '01') a  | |
left outer join (select * from score where course_id = '02') b  | |
on a.student_id = b.student_id  | |
where b.student_id is null  | |
2. 拿着学生编号关联学生表找到姓名 | |
select name | |
from (select a.student_id as student_id  | |
from (select * from score where course_id = '01') a  | |
left join (select * from score where course_id = '02') b on a.student_id = b.student_id  | |
where b.student_id is null) a  | |
join (  | |
select *  | |
from student) b  | |
on a.student_id = b.student_id  | 
- 查询学过 ' 张三 ' 老师课程的所有同学姓名、生日、性别
 - 找出 ' 张三 ' 老师的教师编号
 - 找出 ' 张三 ' 老师所有教授的课程编号
 - 找出这些课程对应的学生编号
 - 根据学生编号找到对应的学生信息
 
select *  | |
from student s | |
join (  | |
    select student_id | |
    from score s | |
join (  | |
select c.course_id  | |
from (  | |
select teacher_id, name  | |
                 from teacher | |
where name = '张三'  | |
             ) t1 | |
left join (  | |
select teacher_id, course_id  | |
            from course | |
) c on t1.teacher_id = c.teacher_id  | |
) t2 on s.course_id = t2.course_id  | |
) t3 on s.student_id = t3.student_id;  | 
- 查询同时学习 "01"、"02" 课程学生的学生编号以及 "01" 和 "02" 课程成绩
 
select a.student_id as student_id, a.score as score_01, b.score as score_02  | |
from (select * from score where course_id = '01') a  | |
join (  | |
select *  | |
    from score | |
where course_id = '02') b  | |
on a.student_id = b.student_id;  | 
- 查询学习 "01" 课程但没有学习 "02" 课程学生的学生编号以及 "01" 课程成绩
 
select a.student_id as student_id, a.score as score_01  | |
from (select * from score where course_id = '01') a  | |
left join (select * from score where course_id = '02') b  | |
on a.student_id = b.student_id  | |
where b.student_id is null;  | 
- 查询学习 "02" 课程但没有学习 "01" 课程学生的学生编号以及 "02" 课程成绩
 
select b.student_id as student_id, b.score as score_02  | |
from (select * from score where course_id = '01') a  | |
right join (select * from score where course_id = '02') b  | |
on a.student_id = b.student_id  | |
where a.student_id is null;  | 
- 查询选课的同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分
 
select a.name as name ,course_num ,score_sum ,score_avg from ( select student_id ,name from student ) ajoin ( select student_id ,count(course_id) as course_num ,sum(score) as score_sum ,avg(score) as score_avg from score group by student_id ) b on a.student_id=b.student_idorder by course_num desc ,score_sum desc  | 
- 查询考试平均分大于 60 分同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分,按照科目数排降序、科目数相同按照分数排降序
 
select a.name as name ,course_num ,score_sum ,score_avg from ( select student_id ,name from student ) ajoin ( select student_id ,count(course_id) as course_num ,sum(score) as score_sum ,avg(score) as score_avg from score group by student_id ) b on a.student_id=b.student_idwhere score_avg > 60order by course_num desc ,score_sum desc  | 
- 检索 "01" 课程分数小于 60 分的学生信息及 "01" 课程分数,按照分数排降序
 
select a.student_id as student_id ,name ,birthday ,sex ,score from ( select * from student ) ajoin ( select student_id ,score from score where course_id='01' and score < 60 ) b on a.student_id = b.student_idorder by score desc  | 
- 查询两门及其以上不及格课程的同学的姓名及其平均成绩
 
select name ,score_avg from ( select * from student ) ajoin ( select student_id ,score_avg from ( select student_id ,avg(score) as score_avg ,count(case when score < 60 then 1 end) as fail_count from score group by student_id ) a where fail_count > 1 ) b on a.student_id = b.student_id;  | 
查询没有学全所有课程的同学的编号 (包含无选课的同学)
- 在成绩表中找出不满足课程总数的学生 id - 找出学生表中无成绩的学生 id - 二者之和
select b.student_id ( select count(*) as course_num from score ) ajoin ( select student_id ,count(*) as course_num from score ) b on a.course_num = b.course_numunion ( select a.student_id from ( select * from student ) a left join ( select * from score ) b on a.student_id = b.student_id where course_id is null ) b  | 
- 查询 1990 年出生的学生名单
 
select * from studentwhere substr(birthday,1,4)='1990'  | 
- 查询名字中含有「风」字的学生信息
 
select * from studentwhere name like '%风%'  | 
- 查询「李」姓老师的数量
 
select count(*) from teacherwhere name like '李%'  | 
- 查询至少有两门课与学号为 "01" 的学生所学相同的学生 id
 
select a.student_id  | |
from (select b.student_id as student_id, b.course_id as course_id  | |
from (select course_id from score where student_id = '01') a  | |
join (select student_id, course_id from score where student_id <> '01') b  | |
on a.course_id = b.course_id) a  | |
group by a.student_id  | |
having count(course_id) > 1;  | 
- 查询选修了全部课程的学生 id 的姓名和姓名
 
select b.student_id as student_id ,b.name as name ,a.course_num as course_numfrom ( select student_id ,a.course_num as course_num from ( select student_id ,count(course_id) as course_num from course ) a join ( select student_id ,count(*) as course_num from score group by student_id ) b on a.course_num = b.course_num ) ajoin ( select * from student ) bon a.student_id = b.student_id  | 
- 查询和 "01" 号的同学学习的课程完全相同的其他同学的学生 id
 
select student_id | |
from (  | |
select student_id, ct, ct1  | |
from (  | |
select student_id, count(*) ct  | |
from score as a  | |
left join  | |
(select course_id  | |
                        from score | |
where student_id = '01') as b  | |
on a.course_id = b.course_id  | |
group by student_id  | |
              ) t1 | |
join(  | |
select count(*) ct1  | |
             from score | |
where student_id = '01'  | |
         ) t2 | |
     ) t3 | |
where ct = ct1  | |
and student_id != '01'  | 
- 先选出‘01’同学所学的课程 id
 - 然后选出学过这些课程 id 的学生 id 及其选过这些课程数的课程总数
 - 匹配 “01” 同学学的课程总数(存在某同学学的课程超过 “01” 同学学的课程)
 - 匹配成绩表中学生的课程总数
 
统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[60-0] 及所占百分比
- 根据课程号在成绩表中查询分数段的个数除以该学习该课程的总数
select a.course_id as course_id ,b.name as name ,round(sum(case when score > 85 then 1 else 0 end)/count(*),2) as '100-85' ,round(sum(case when score between 70 and 84 then 1 else 0 end)/count(*),2) as '85-70' ,round(sum(case when score between 60 and 69 then 1 else 0 end)/count(*),2) as '70-60' ,round(sum(case when score < 60 then 1 else 0 end)/count(*),2) as '60-0' from ( select course_id ,score from score ) a join ( select course_id ,name from course ) b on a.course_id = b.course_id group by a.course_id ,b.name  | 
- 查询学生的总成绩,并进行排名
 
select student_id ,sum(score) score_sum ,row_number() over(order by sum(score) desc) as rank from scoregroup by student_id  | 
- 查询各科成绩前三名的记录
 
select course_id,student_id,rk  | |
from(  | |
select *,  | |
rank() over (partition by course_id order by score) rk  | |
        from score | |
        )t1 | |
where rk<=3;  | 
- 查询出只选修两门课程的学生学号和姓名
 
select a.student_id as student_id ,a.name as name from ( select student_id ,name from student ) ajoin ( select student_id from score group by student_id having count(course_id) = 2 ) b on a.student_id = b.student_id;  | 
- 查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列
 
select course_id ,avg(score) as score_avg from scoreorder by score_avg desc ,course_id asc  | 
- 查询平均成绩大于等于 85 的所有学生的学号、姓名和平均成绩
 
select a.student_id as student_id ,a.name as name ,b.score_avg from ( select student_id ,name from student ) ajoin ( select student_id ,avg(score) as score_avg from score group by student_id having avg(score) >= 85 ) b on a.student_id = b.student_id  | 
- 查询课程名称为「数学」,且分数低于 60 的学生姓名和分数
 
select name ,score from ( select student_id ,score from ( select course_id from course where name = '数学' ) a join ( select student_id ,course_id ,score from score where score < 60 ) b on a.course_id = b.course_id ) ajoin ( select student_id ,name from student ) b on a.student_id = b.student_id  | 
- 查询课程编号为 01 且课程成绩在 80 分及以上的学生的学号和姓名
 
select a.student_id as student_id ,name from ( select student_id from score where course_id = '01' and score >= 80 ) ajoin ( select student_id ,name from student ) b on a.student_id = b.student_id  | 
- 查询选修「张三」老师所授课程的学生中,成绩最高的学生编号、课程编号及其成绩
 
select student_id ,a.course_id as course_id ,score from ( select course_id from ( select course_id ,teacher_id from course ) a join ( select teacher_id from teacher where name = '张三' ) b on a.teacher_id = b.teacher_id ) ajoin ( select student_id ,course_id ,score from score ) b on a.course_id = b.course_idorder by score desclimit 1;  | 
- 统计每门课程的学生选修人数(超过 5 人的课程才统计)。
 
select a.course_id as course_id ,b.name ,a.num as num from ( select course_id ,count(*) as num from score group by course_id having count(*) >= 5 ) ajoin ( select course_id ,name from course ) b on a.course_id =b.course_id  | 
- 查询该学生有不同课程但成绩相同的学生编号、课程编号、学生成绩
 
1. 写法一 | |
select a.student_id, a.course_id, a.score  | |
from (  | |
         select student_id | |
              , course_id | |
              , score | |
         from score | |
     ) a | |
         join | |
     ( | |
         select student_id | |
              , course_id | |
              , score | |
         from score | |
     ) b | |
on a.student_id = b.student_id  | |
and a.score = b.score  | |
and a.course_id <> b.course_id  | |
group by a.student_id, a.course_id, a.score  | |
2. 写法二 | |
select DISTINCT b.student_id, b.course_id, b.score  | |
from score a,  | |
score b  | |
where a.course_id != b.course_id  | |
and a.score = b.score  | |
and a.student_id = b.student_id;  | |
3. 写法三 | |
select *  | |
from score as t  | |
where not exists (select 1  | |
                  from score | |
where student_id = t.student_id  | |
and course_id <> t.course_id  | |
and score <> t.score)  |