欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

101.Spark大型电商项目-各区域热门商品统计-查询用户指定日期范围内的点击行为数据

程序员文章站 2022-05-14 21:33:47
...

目录

代码

AreaTop3ProductSpark.java


本篇文章记录各区域热门商品统计-查询用户指定日期范围内的点击行为数据。

代码

spark.product

AreaTop3ProductSpark.java

package graduation.java.spark.product;

import com.alibaba.fastjson.JSONObject;
import graduation.java.constant.Constants;
import graduation.java.dao.ITaskDAO;
import graduation.java.domain.Task;
import graduation.java.factory.DAOFactory;
import graduation.java.util.ParamUtils;
import graduation.java.util.SparkUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;

/**
 * FileName: AreaTop3ProductSpark
 * Author:   hadoop
 * Email:    [email protected]
 * Date:     19-4-1 下午4:17
 * Description:
 *
 * 各区域top3热门商品统计Spark作业
 */
public class AreaTop3ProductSpark {

    public static void main(String[] args) {
        //1.创建SparkConf
        SparkConf conf = new SparkConf()
                .setAppName(Constants.SPARK_APP_NAME_PRODUCT);
        SparkUtils.setMaster(conf);

        //2.构建spark上下文、
        JavaSparkContext sc = new JavaSparkContext(conf);
        SQLContext sqlContext = SparkUtils.getSQLContext(sc.sc());
        //3.模拟数据
        SparkUtils.mockData(sc,sqlContext);

        //4.获取命令行传入的taskid,查询对应的任务参数
        ITaskDAO iTaskDAO = DAOFactory.getTaskDAO();
        long taskid = ParamUtils.getTaskIdFromArgs(args,Constants.SPARK_LOCAL_TASKID_PRODUCT);
        Task task = iTaskDAO.findById(taskid);

        JSONObject taskParam = JSONObject.parseObject(task.getTaskParam());
        
        String startDate = ParamUtils.getParam(taskParam,Constants.PARAM_START_DATE);
        String endDate = ParamUtils.getParam(taskParam,Constants.PARAM_END_DATE);

        JavaRDD<Row> clickActionRDD = getClickActionRDDByDate(sqlContext,startDate,endDate);
        
        sc.close();
        


    }

    /**查询指定日期范围内的点击行为数据
     *
     * @param sqlContext
     * @param startDate 起始日期
     * @param endDate 结束日期
     * @return
     */
    private static JavaRDD<Row> getClickActionRDDByDate(SQLContext sqlContext, String startDate, String endDate) {
        // 从user_visit_action中,查询用户访问行为数据
        // 第一个限定:click_product_id,限定为不为空的访问行为,那么就代表着点击行为
        // 第二个限定:在用户指定的日期范围内的数据

        String sql = "SELECT " +
                "city_id," +
                "click_product_id product_id " +
                "FROM user_visit_action " +
                "WHERE click_product_id IS NOT NULL " +
                "AND click_product_id != 'NULL' " +
                "AND click_product_id != 'null' " +
                "AND action_time >='" + startDate + "' " +
                "AND action_time <= '"+ endDate + "'" ;

        Dataset clickActionDs = sqlContext.sql(sql);
        return clickActionDs.javaRDD();
    }


}