To operate HDFs: first make sure that HDFs is up:
To start the Spark cluster:
Run on the Spark cluster with Spark-shell:
View the "LICENSE.txt" file that was uploaded to HDFs before:
Read this file with Spark:
Count the number of rows in the file using the Counts:
We can see that count time is 0.239708s
Caches the RDD and executes count to make the cache effective:
The execution count result is:
This time consuming is 0.21132s
To perform the count operation again:
This time it takes 0.029580s, because we operate on our own cache-based data.
We then perform a wordcount operation on the above RDD:
To store the data in HDFs via Saveastextfile:
We view the results of the operation from the Web console:
We look at the contents of part-00000 and part-00001 through the command line:
[Email protected] ~]$Hadoop fs-cat/data/resultlicensewordcount/part-00000
15/01/22 13:51:32 WARN util. nativecodeloader:unable to load Native-hadoop library for your platform ... using Builtin-java classes where applicable
(under,10)
(unless,3)
(contributions), 1)
(offer,1)
(agree,1)
(business,2)
(non-infringement,,1)
(its,4)
(materials,2)
(event,1)
(intentionally,2)
(grant,2)
(writing,1)
(include,3)
(responsibility,,1)
(have,2)
(merchantability,,1)
(contribution,3)
(massachusetts,1)
(express,2)
("Your"), 1)
((i), 1)
(however,,1)
(been,2)
(files;,1)
(this,1)
(stating,1)
(2-clause,1)
(conditions.,1)
(non-exclusive,,2)
(appropriateness,1)
(marked,1)
(risks,1)
(any,28)
(is ", 4)
(implementation,1)
(filed.,1)
(sections,1)
(fee,1)
(losses),, 1)
(out,1)
(contract,2)
(distribution,1)
(4.,1)
(file,6)
(documentation,,2)
(wherever,1)
(unless,1)
(below)., 1)
(names,,1)
(verbal,,1)
(any,10)
(version,1)
(file.,2)
(are,10)
(no-charge,,2)
(2.,1)
(from,,1)
(reproduction,,3)
(2011-2014,,1)
(assume,1)
(licenses,1)
(data,,2)
(is,2)
(recommend,1)
(prominent,1)
(revisions,,1)
("[]", 1)
(fitness,3)
(otherwise,,3)
(distribution,,1)
(necessarily,1)
(apache,5)
(grant,1)
(contributors,4)
(as,15)
(irrevocable,2)
(inclusion,2)
(purpose,2)
(products,1)
(are,2)
(merely,1)
(file,1)
(definitions.,1)
(form,10)
(implied,4)
(warranty,1)
(patent,1)
(incurred,1)
(8.,1)
(repository,1)
(contributors,1)
("printed,1")
(sell,,2)
(:, 3)
(malfunction,,1)
(version,2)
(origin,1)
(alongside,1)
(crc,1)
(implied.,1)
(contract,,1)
(representatives,,1)
(warranty,1)
(offer,,1)
(org.apache.hadoop.util.bloom.*,1)
(kind,,2)
(is,10)
(conspicuously,1)
(found,1)
(charge,1)
(make,,1)
(file,,1)
(associated,1)
(even,1)
(same,1)
(Don ' t,1)
(outstanding,1)
(link,1)
([name,1)
(trademarks.,1)
(notice,2)
(endorse,1)
(shall,15)
(contact,1)
(redistributions,4)
(using,1)
(class,1)
(name), 1)
(behalf,5)
(form.,1)
(we,1)
(interruption), 2)
(responsible,1)
(annotations,,1)
(this,4)
(subject,1)
(acting,1)
(permitted,2)
(out,2)
(basis,,2)
(has,2)
(accepting,1)
(defend,,1)
(university,1)
([yyyy],1)
((http://www.one-lab.org), 1)
(event,2)
(granting,1)
(portions,1)
(implied,,1)
(notice,5)
(infringed,1)
(limitation,,1)
(names,2)
(electronic,,1)
(purpose,2)
(licensable,1)
(section), 1)
(conditions,14)
(even,2)
(Acts), 1)
(law,3)
(licenses.,1)
(compression,1)
(readable,1)
(solely,1)
(configuration,1)
(information.,1)
(litigation,2)
(represent,,1)
(warranty,,1)
(shares,,1)
(supersede,1)
(governed,1)
(marks,,1)
(http://code.google.com/p/lz4/,1)
(modification,,2)
(fifty,1)
(sent,1)
(places:,1)
(means,2)
(identifying,1)
(this,22)
(Works ", 1)
(louvain,1)
(prior,1)
(slicing-by-8,1)
(procurement,2)
(changed,1)
(describing,1)
(only,4)
(contributory,1)
(normally,1)
(indirect,,2)
(without,2)
(works,12)
(documentation,3)
(agreement,1)
(otherwise,3)
("as,4")
(damages,,1)
(patent,,1)
(apache,1)
(without,6)
("NOTICE", 1)
(limitation,1)
(substitute,2)
(Contribution (s), 3)
(subject,2)
(submission,1)
(ucl,1)
(title,,1)
(trademarks,,1)
((iii), 1)
(2.0,1)
(fast,1)
(exercise,1)
(accepting,2)
(example,1)
(distribution.,2)
(interfaces,1)
(conditions:,1)
(act,1)
(incorporated,2)
(provides,2)
(limited,4)
(lz4,3)
(2008,2009,2010,1)
(can,2)
(contents,1)
(purpose.,1)
(recipients,1)
("Contribution", 1)
(failure,1)
(communication,3)
(commercial,1)
(works,1)
(language,1)
(permissions,3)
(warranties,4)
(media,1)
(reserved.,2)
(works,,2)
(how,1)
(warranties,,2)
(controlled,1)
(warranty.,1)
(2.0,,1)
((http://www.opensource.org/licenses/bsd-license.php), 1)
(own,4)
(submit,1)
(shall,2)
(reasonable,1)
(reason,1)
(agreed,3)
(systems,1)
(patent,5)
(form,,4)
(technology.,1)
(advised,1)
(systems,,1)
(classes:,1)
(however,2)
(distribution,3)
(damages,2)
((c), 2)
(src/main/native/src/org/apache/hadoop/util:,1)
(profits;,2)
(perpetual,,2)
(applies,1)
(apply,2)
(subcomponents,2)
(modify,2)
(owner],1)
(one,1)
(modifying,1)
(counterclaim,1)
(january,1)
(discussing,1)
(contract,,2)
(with,16)
((C), 1)
(infringement,,1)
(2004,1)
(lawsuit), 1)
(specific,2)
(lz,1)
(warranties,1)
(reproducing,1)
(promote,1)
(beneficial,1)
(advised,2)
((a), 1)
(other,9)
(date,1)
(met:,2)
(publicly,2)
(from,4)
(limited,4)
(display,,1)
(merchantability,2)
(damages,3)
(subcomponents:,1)
(negligence),, 1)
(remain,1)
(conditions,4)
(their,2)
(electronic,1)
(identification,1)
(determining,1)
(consistent,1)
(display,1)
(writing,,3)
(trade,1)
(third-party,2)
(, 1299)
(description,1)
(reproduction,,1)
(attached,1)
(list,4)
(*,34)
(indirect,,2)
(designated,1)
(Contribution. ", 1)
(complies,1)
(addendum,1)
(damages.,1)
(yann,1)
(express,2)
(license;,1)
(6.,1)
(goods,2)
(subsequently,1)
(included,2)
(replaced,1)
(notice,,5)
[Email protected] ~]$Hadoop fs-cat/data/resultlicensewordcount/part-00001
15/01/22 13:52:29 WARN util. nativecodeloader:unable to load Native-hadoop library for your platform ... using Builtin-java classes where applicable
(for,6)
(reproduce,,1)
("contributor", 1)
((or,3)
(nothing,1)
(work.,1)
(content,1)
(holders,2)
(add,2)
(through,1)
(all,2)
(perform,,1)
(result,1)
(goodwill,,1)
(herein,1)
(direct,,1)
(used,1)
(to,1)
(harmless,1)
(9.,1)
(these,1)
(control,,1)
(incidental,,2)
(indicated,1)
(part,4)
(alone,1)
(different,1)
(forms,,2)
(purposes,4)
(https://groups.google.com/forum/#!forum/lz4c,1)
(be,7)
(/**,2)
(carry,1)
(separable,1)
(including,5)
(contained,1)
(combination,1)
(calculation,1)
(license,7)
(for,6)
(thereof,,2)
(arising,2)
(constitutes,1)
(but,5)
(types.,1)
(stated,2)
(archives.,1)
(obligations,,1)
(5.,1)
(works;,3)
(nor,1)
("legal,1")
(work,20)
(whole,,2)
(copyright,5)
(at,3)
(copyright,,1)
(redistribution,2)
(object,1)
(copy,3)
(indemnify,,1)
(asserted,1)
(hadoop,1)
(attach,1)
("Control", 1)
(support,,1)
("Object", 1)
(give,1)
(theory,2)
(may,10)
(except,2)
("Work", 1)
(sublicense,,1)
(if,2)
(granted,2)
(project,2)
(authorized,2)
(special,,2)
(by,2)
(retain,2)
(or,65)
(transfer,1)
(fields,1)
(licensor,,1)
((b), 1)
((ii), 1)
(2005,,1)
(of,75)
(does,1)
(transformation,1)
((including,2)
(direct,,2)
(management,1)
(modified,1)
(licensed,1)
(percent,1)
(header,1)
(original,2)
(contributor,,1)
(native,1)
((including,,2)
(particular,3)
(limitations,1)
(the,10)
(including,,2)
(power,,1)
(caused,2)
(de,1)
(appropriate,1)
(against,,1)
(tort,2)
("Source", 1)
(each,4)
(1.,1)
(following,10)
(liability.,2)
(acceptance,1)
("You", 1)
(sole,1)
(from), 1)
(see,1)
(tracking,1)
(for,19)
(cause,2)
(alleging,1)
(obtain,1)
(reproduce,3)
(source,,1)
(control,2)
(exemplary,,2)
(terms,2)
(terms,8)
(syntax,1)
(services;,2)
(made,,1)
(but,4)
(compiled,1)
(issue,1)
("submitted", 1)
(onelab,1)
(algorithm,1)
(was,1)
(while,1)
(entity,,1)
(do,3)
(provided,2)
(no,2)
(license,10)
(entity,3)
(contributions.,2)
(mean,10)
(individual,3)
(institute,1)
(computer,1)
(notices,9)
(neither,1)
(licensor,8)
(strict,2)
(made,1)
(authorship,,2)
(bind,1)
((the,1)
(indemnity,,1)
(distribute,3)
(you,24)
(grants,2)
(brackets,1)
(meet,1)
(for,,1)
(service,1)
(in,31)
(trademark,,1)
(boilerplate,1)
(way,2)
(loss,2)
(distributed,3)
(liability,,4)
(submitted,2)
(public,1)
(of,19)
(managed,1)
(derived,2)
(source,8)
(use,,4)
(name,2)
(definition,,2)
(that,25)
(src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,lz4hc.c},,1)
(customary,1)
(bsd,1)
(thereof,1)
(claims,2)
(consequential,2)
(translation,1)
(format.,1)
(construed,1)
(damage.,2)
(applicable,3)
(binary,4)
(regarding,1)
(european,1)
(excluding,3)
(end,1)
((d), 1)
(choose,1)
(no,2)
(be,2)
(direct,2)
(retain,,1)
(modifications,,3)
(forum,1)
(owner,4)
(use,2)
(informational,1)
(the,3)
(legal,1)
((50%), 1)
(document.,1)
(received,1)
(such,17)
(institute,1)
(distribute,,2)
(whether,2)
(page ", 1)
((except,1)
(loss,1)
(common,1)
(additions,1)
(bsd-style,1)
(appendix,1)
(use,1)
(disclaimer,2)
(resulting,1)
(on,2)
(hereby,2)
(license.,11)
(software,3)
(whom,1)
(along,1)
(lists,,1)
(required,4)
(or,18)
(ownership,2)
(software,2)
(the,122)
(includes,1)
(obligations,1)
(import,,1)
(not,11)
(either,2)
(terminate,1)
(if,4)
(stoppage,,1)
(provided,9)
(submitted.,1)
(all,3)
(permission.,1)
("License");, 1)
(written,2)
(generated,2)
(consequential,1)
(derivative,17)
(and,11)
(rights,3)
(http://www.apache.org/licenses/,1)
(terms.,1)
(catholique,1)
(deliberate,1)
(entity.,2)
(work,,4)
(special,,1)
(additional,1)
(legal,3)
(034819,1)
(least,1)
(text,4)
(on,11)
(editorial,1)
(redistributing,2)
("License", 1)
(against,1)
(permission,1)
(9,1)
(separate,2)
(and/or,3)
(license,1)
(union,1)
((and,1)
(a)
(including,,1)
(entity,3)
(negligent,1)
(liable,2)
(in,6)
(use,8)
(enclosed,2)
(contains,1)
(files,1)
(Entity ", 1)
(work.,1)
(owner.,1)
(preferred,1)
(modifications,3)
(brackets!), 1)
(available,1)
(code,5)
(http://www.apache.org/licenses/LICENSE-2.0,1)
(more,1)
(possibility,1)
(product,1)
(liable,1)
(such,2)
(direction,1)
(must,8)
(making,1)
(disclaimer,1)
(disclaimer.,2)
(commission,1)
(OTHERWISE), 2)
(hadoop,1)
((an,1)
(appendix:,1)
("Licensor", 1)
(disclaimed.,2)
("derivative,1")
(elaborations,,1)
(incidental,,1)
(prepare,1)
(a,3)
(exercising,1)
(*/,3)
(which,2)
(pertain,2)
(explicitly,1)
(tort,1)
(3.,1)
(also,1)
(conversions,1)
(liability,2)
(whether,4)
(character,1)
(should,1)
(thereof.,1)
(of,,3)
(your,4)
(royalty-free,,2)
(entities,1)
(or,,1)
(negligence,2)
(author,1)
("not,1")
(source,9)
(then,2)
((including,3)
(redistribution.,1)
(attribution,4)
(by,21)
(to,,4)
(defined,1)
(owner,2)
(if,2)
(an,6)
(/*,1)
(collet.,1)
(improving,1)
(grossly,1)
(copyright,4)
(above,,1)
(theory,,1)
(mailing,1)
(7.,1)
(notwithstanding,1)
(code,,2)
(cross-claim,1)
(provide,1)
((such,1)
(arising,1)
(object,4)
(in,1)
(-,7)
(those,3)
(work,,2)
(easier,1)
(based,1)
(medium,,1)
(within,8)
(worldwide,,2)
(authorship.,1)
(files.,1)
(inability,1)
(you,2)
(possibility,2)
(cannot,1)
(copies,1)
(a,21)
(statement,1)
(above,4)
(state,1)
(work,5)
(by,,3)
(to,41)
(appear.,1)
(your,9)
(where,1)
(liability.,1)
(governing,1)
(not,4)
(license,,6)
(hold,1)
(and,51)
(copyright,15)
(use,,3)
(compliance,1)
(software,,2)
(comment,1)
(additional,4)
(executed,1)
(mechanical,1)
(contributor,8)
[Email protected] ~]$
Spark API programming Hands-on combat-02-in cluster mode Spark API combat Textfile, cache, Count