{ "cells": [ { "cell_type": "code", "execution_count": 11, "id": "6974daac", "metadata": {}, "outputs": [], "source": [ "from pyspark.sql import SparkSession\n", "from pyspark.context import SparkContext" ] }, { "cell_type": "code", "execution_count": 12, "id": "e72fbe47", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pyspark.sql.session.SparkSession" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SparkSession" ] }, { "cell_type": "code", "execution_count": 13, "id": "9333e07a", "metadata": {}, "outputs": [], "source": [ "spark = SparkSession.builder.master(\"local\").appName(\"HelloWorld\").getOrCreate()" ] }, { "cell_type": "code", "execution_count": 14, "id": "adf79021", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "

SparkSession - hive

\n", " \n", "
\n", "

SparkContext

\n", "\n", "

Spark UI

\n", "\n", "
\n", "
Version
\n", "
v3.1.1
\n", "
Master
\n", "
local[*]
\n", "
AppName
\n", "
PySparkShell
\n", "
\n", "
\n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spark" ] }, { "cell_type": "code", "execution_count": 15, "id": "324b74ca", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'3.1.1'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spark.version" ] }, { "cell_type": "code", "execution_count": 17, "id": "856f2700", "metadata": {}, "outputs": [], "source": [ "sc = SparkContext.getOrCreate()" ] }, { "cell_type": "code", "execution_count": 18, "id": "7218a07f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "

SparkContext

\n", "\n", "

Spark UI

\n", "\n", "
\n", "
Version
\n", "
v3.1.1
\n", "
Master
\n", "
local[*]
\n", "
AppName
\n", "
PySparkShell
\n", "
\n", "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sc" ] }, { "cell_type": "code", "execution_count": 23, "id": "17114da3", "metadata": {}, "outputs": [], "source": [ "txt = sc.textFile('file:////usr/share/doc/python3/copyright')" ] }, { "cell_type": "code", "execution_count": 24, "id": "c1af9b77", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "file:////usr/share/doc/python3/copyright MapPartitionsRDD[4] at textFile at NativeMethodAccessorImpl.java:0" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "txt" ] }, { "cell_type": "code", "execution_count": 25, "id": "ee37564c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pyspark.rdd.RDD" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(txt)" ] }, { "cell_type": "code", "execution_count": 26, "id": "208ef97a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "319" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "txt.count()" ] }, { "cell_type": "code", "execution_count": 27, "id": "06f807b8", "metadata": {}, "outputs": [], "source": [ "python_lines = txt.filter(lambda line: 'python' in line.lower())" ] }, { "cell_type": "code", "execution_count": 28, "id": "ff2835b7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "52\n" ] } ], "source": [ "print(python_lines.count())" ] }, { "cell_type": "code", "execution_count": 30, "id": "9c8ec8c7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['This is the Debian GNU/Linux prepackaged version of the Python programming',\n", " 'language. Python was written by Guido van Rossum and others.',\n", " 'sources from ftp.python.org:/pub/python, based on the Debianization by',\n", " 'Python was created in the early 1990s by Guido van Rossum at Stichting',\n", " \"as a successor of a language called ABC. Guido remains Python's\",\n", " 'In 1995, Guido continued his work on Python at the Corporation for',\n", " 'In May 2000, Guido and the Python core development team moved to',\n", " 'BeOpen.com to form the BeOpen PythonLabs team. In October of the same',\n", " 'year, the PythonLabs team moved to Digital Creations (now Zope',\n", " 'Corporation, see http://www.zope.com). In 2001, the Python Software',\n", " 'Foundation (PSF, see http://www.python.org/psf/) was formed, a',\n", " 'non-profit organization created specifically to own Python-related',\n", " 'All Python releases are Open Source (see http://www.opensource.org for',\n", " 'the Open Source Definition). Historically, most, but not all, Python',\n", " \"(1) GPL-compatible doesn't mean that we're distributing Python under\",\n", " ' the GPL. All Python licenses, unlike the GPL, let you distribute',\n", " ' GPL-compatible licenses make it possible to combine Python with',\n", " 'B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON',\n", " 'PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2',\n", " '1. This LICENSE AGREEMENT is between the Python Software Foundation',\n", " 'otherwise using this software (\"Python\") in source or binary form and',\n", " 'distribute, and otherwise use Python alone or in any derivative version,',\n", " 'Python Software Foundation; All Rights Reserved\" are retained in Python alone or',\n", " 'or incorporates Python or any part thereof, and wants to make',\n", " 'the changes made to Python.',\n", " '4. PSF is making Python available to Licensee on an \"AS IS\"',\n", " 'FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT',\n", " '5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON',\n", " 'A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,',\n", " '8. By copying, installing or otherwise using Python, Licensee',\n", " 'BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0',\n", " 'BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1',\n", " '2. Subject to the terms and conditions of this BeOpen Python License',\n", " 'provided, however, that the BeOpen Python License is retained in the',\n", " 'third party. As an exception, the \"BeOpen Python\" logos available at',\n", " 'http://www.pythonlabs.com/logos.html may be used according to the',\n", " 'CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1',\n", " '(\"Licensee\") accessing and otherwise using Python 1.6.1 software in',\n", " 'prepare derivative works, distribute, and otherwise use Python 1.6.1',\n", " 'Reserved\" are retained in Python 1.6.1 alone or in any derivative',\n", " 'quotes): \"Python 1.6.1 is made available subject to the terms and',\n", " 'Python 1.6.1 may be located on the Internet using the following',\n", " 'or incorporates Python 1.6.1 or any part thereof, and wants to make',\n", " 'the changes made to Python 1.6.1.',\n", " '4. CNRI is making Python 1.6.1 available to Licensee on an \"AS IS\"',\n", " 'FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT',\n", " '5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON',\n", " 'A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,',\n", " 'on Python 1.6.1 that incorporate non-separable material that was',\n", " 'installing or otherwise using Python 1.6.1, Licensee agrees to be',\n", " 'CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2',\n", " 'py3compile, py3clean and debpython module:']" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "python_lines.collect()" ] }, { "cell_type": "code", "execution_count": null, "id": "9c60ff17", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "cgraph", "language": "python", "name": "cgraph" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.8" } }, "nbformat": 4, "nbformat_minor": 5 }