aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Computer_Science/data_structures/chapter_4/Notes on Data Structures and Programming Techniques (CPSC 223, Spring 2015).html20469
-rwxr-xr-xComputer_Science/data_structures/chapter_4/a.outbin12928 -> 0 bytes
-rwxr-xr-xComputer_Science/data_structures/chapter_4/avl_treebin13712 -> 17912 bytes
-rw-r--r--Computer_Science/data_structures/chapter_4/avl_tree.c68
-rwxr-xr-xComputer_Science/data_structures/chapter_4/avl_tree.c.outbin0 -> 17912 bytes
-rwxr-xr-xComputer_Science/data_structures/chapter_4/binary_search_tree.c.outbin0 -> 13624 bytes
-rw-r--r--Computer_Science/data_structures/chapter_4/depth_or_random_binary_search_tree.pdfbin0 -> 297160 bytes
-rw-r--r--Computer_Science/data_structures/chapter_4/depth_or_random_binary_search_tree_handout.pdfbin0 -> 1056354 bytes
-rw-r--r--Computer_Science/leetcode/15-3_sum.c37
-rw-r--r--Computer_Science/leetcode/15-3_sum.c~7
-rw-r--r--Computer_Science/leetcode/17-letter_combinations_of_a_phone_number.c46
-rw-r--r--Computer_Science/leetcode/17-letter_combinations_of_a_phone_number.c~11
-rw-r--r--Computer_Science/leetcode/5-longest_palindromic_substring.c34
-rw-r--r--Computer_Science/leetcode/5-longest_palindromic_substring.c~7
-rw-r--r--Computer_Science/leetcode/60-permutation_sequence.c33
-rw-r--r--Computer_Science/leetcode/67-add_binary.c75
-rw-r--r--Computer_Science/leetcode/67-add_binary.c~30
-rw-r--r--Computer_Science/leetcode/73-set_matrix_zeros.c46
-rw-r--r--Computer_Science/leetcode/73-set_matrix_zeros.c~7
-rw-r--r--Computer_Science/leetcode/746-min_cost_climbing_stairs.c14
-rw-r--r--Computer_Science/leetcode/746-min_cost_climbing_stairs.c~14
-rw-r--r--Computer_Science/leetcode/75-sort_colors.c41
-rw-r--r--Computer_Science/leetcode/75-sort_colors.c~7
-rw-r--r--Personal/Plan/plan.org43
24 files changed, 20974 insertions, 15 deletions
diff --git a/Computer_Science/data_structures/chapter_4/Notes on Data Structures and Programming Techniques (CPSC 223, Spring 2015).html b/Computer_Science/data_structures/chapter_4/Notes on Data Structures and Programming Techniques (CPSC 223, Spring 2015).html
new file mode 100644
index 0000000..4c11671
--- /dev/null
+++ b/Computer_Science/data_structures/chapter_4/Notes on Data Structures and Programming Techniques (CPSC 223, Spring 2015).html
@@ -0,0 +1,20469 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml"><head>
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+ <meta http-equiv="Content-Style-Type" content="text/css">
+ <meta name="generator" content="pandoc">
+ <meta name="author" content="James Aspnes">
+ <title>Notes on Data Structures and Programming Techniques (CPSC 223, Spring 2015)</title>
+ <style type="text/css">code{white-space: pre;}</style>
+ <style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+ margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+ </style>
+ <link href="data:text/css;charset=utf-8,%0Ahtml%20%7B%0Afont%2Dsize%3A%20100%25%3B%0Aoverflow%2Dy%3A%20scroll%3B%0A%2Dwebkit%2Dtext%2Dsize%2Dadjust%3A%20100%25%3B%0A%2Dms%2Dtext%2Dsize%2Dadjust%3A%20100%25%3B%0A%7D%0Abody%20%7B%0Acolor%3A%20%23444%3B%0Afont%2Dfamily%3A%20Georgia%2C%20Palatino%2C%20%27Palatino%20Linotype%27%2C%20Times%2C%20%27Times%20New%20Roman%27%2C%20serif%3B%0Afont%2Dsize%3A%2012px%3B%0Aline%2Dheight%3A%201%2E4%3B%0Apadding%3A%201em%3B%0Amargin%3A%20auto%3B%0Amax%2Dwidth%3A%2044em%3B%0Abackground%3A%20%23fefefe%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230645ad%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%230b0080%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%2306e%3B%0A%7D%0Aa%3Aactive%20%7B%0Acolor%3A%20%23faa700%3B%0A%7D%0Aa%3Afocus%20%7B%0Aoutline%3A%20thin%20dotted%3B%0A%7D%0A%2A%3A%3A%2Dmoz%2Dselection%20%7B%0Abackground%3A%20rgba%28255%2C%20255%2C%200%2C%200%2E3%29%3B%0Acolor%3A%20%23000%3B%0A%7D%0A%2A%3A%3Aselection%20%7B%0Abackground%3A%20rgba%28255%2C%20255%2C%200%2C%200%2E3%29%3B%0Acolor%3A%20%23000%3B%0A%7D%0Aa%3A%3A%2Dmoz%2Dselection%20%7B%0Abackground%3A%20rgba%28255%2C%20255%2C%200%2C%200%2E3%29%3B%0Acolor%3A%20%230645ad%3B%0A%7D%0Aa%3A%3Aselection%20%7B%0Abackground%3A%20rgba%28255%2C%20255%2C%200%2C%200%2E3%29%3B%0Acolor%3A%20%230645ad%3B%0A%7D%0Ap%20%7B%0Amargin%3A%201em%200%3B%0A%7D%0Aimg%20%7B%0Amax%2Dwidth%3A%20100%25%3B%0A%7D%0Ah1%2C%20h2%2C%20h3%2C%20h4%2C%20h5%2C%20h6%20%7B%0Acolor%3A%20%23111%3B%0Aline%2Dheight%3A%20125%25%3B%0Amargin%2Dtop%3A%202em%3B%0Afont%2Dweight%3A%20normal%3B%0A%7D%0Ah4%2C%20h5%2C%20h6%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Ah1%20%7B%0Afont%2Dsize%3A%202%2E5em%3B%0A%7D%0Ah2%20%7B%0Afont%2Dsize%3A%202em%3B%0A%7D%0Ah3%20%7B%0Afont%2Dsize%3A%201%2E5em%3B%0A%7D%0Ah4%20%7B%0Afont%2Dsize%3A%201%2E2em%3B%0A%7D%0Ah5%20%7B%0Afont%2Dsize%3A%201em%3B%0A%7D%0Ah6%20%7B%0Afont%2Dsize%3A%200%2E9em%3B%0A%7D%0Ablockquote%20%7B%0Acolor%3A%20%23666666%3B%0Amargin%3A%200%3B%0Apadding%2Dleft%3A%203em%3B%0Aborder%2Dleft%3A%200%2E5em%20%23EEE%20solid%3B%0A%7D%0Ahr%20%7B%0Adisplay%3A%20block%3B%0Aheight%3A%202px%3B%0Aborder%3A%200%3B%0Aborder%2Dtop%3A%201px%20solid%20%23aaa%3B%0Aborder%2Dbottom%3A%201px%20solid%20%23eee%3B%0Amargin%3A%201em%200%3B%0Apadding%3A%200%3B%0A%7D%0Apre%2C%20code%2C%20kbd%2C%20samp%20%7B%0Acolor%3A%20%23000%3B%0Afont%2Dfamily%3A%20monospace%2C%20monospace%3B%0A%5Ffont%2Dfamily%3A%20%27courier%20new%27%2C%20monospace%3B%0Afont%2Dsize%3A%200%2E98em%3B%0A%7D%0Apre%20%7B%0Awhite%2Dspace%3A%20pre%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%0Aword%2Dwrap%3A%20break%2Dword%3B%0A%7D%0Ab%2C%20strong%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Adfn%20%7B%0Afont%2Dstyle%3A%20italic%3B%0A%7D%0Ains%20%7B%0Abackground%3A%20%23ff9%3B%0Acolor%3A%20%23000%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Amark%20%7B%0Abackground%3A%20%23ff0%3B%0Acolor%3A%20%23000%3B%0Afont%2Dstyle%3A%20italic%3B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Asub%2C%20sup%20%7B%0Afont%2Dsize%3A%2075%25%3B%0Aline%2Dheight%3A%200%3B%0Aposition%3A%20relative%3B%0Avertical%2Dalign%3A%20baseline%3B%0A%7D%0Asup%20%7B%0Atop%3A%20%2D0%2E5em%3B%0A%7D%0Asub%20%7B%0Abottom%3A%20%2D0%2E25em%3B%0A%7D%0Aul%2C%20ol%20%7B%0Amargin%3A%201em%200%3B%0Apadding%3A%200%200%200%202em%3B%0A%7D%0Ali%20p%3Alast%2Dchild%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Aul%20ul%2C%20ol%20ol%20%7B%0Amargin%3A%20%2E3em%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dbottom%3A%201em%3B%0A%7D%0Adt%20%7B%0Afont%2Dweight%3A%20bold%3B%0Amargin%2Dbottom%3A%20%2E8em%3B%0A%7D%0Add%20%7B%0Amargin%3A%200%200%20%2E8em%202em%3B%0A%7D%0Add%3Alast%2Dchild%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Aimg%20%7B%0Aborder%3A%200%3B%0A%2Dms%2Dinterpolation%2Dmode%3A%20bicubic%3B%0Avertical%2Dalign%3A%20middle%3B%0A%7D%0Afigure%20%7B%0Adisplay%3A%20block%3B%0Atext%2Dalign%3A%20center%3B%0Amargin%3A%201em%200%3B%0A%7D%0Afigure%20img%20%7B%0Aborder%3A%20none%3B%0Amargin%3A%200%20auto%3B%0A%7D%0Afigcaption%20%7B%0Afont%2Dsize%3A%200%2E8em%3B%0Afont%2Dstyle%3A%20italic%3B%0Amargin%3A%200%200%20%2E8em%3B%0A%7D%0Atable%20%7B%0Amargin%2Dbottom%3A%202em%3B%0Aborder%2Dbottom%3A%201px%20solid%20%23ddd%3B%0Aborder%2Dright%3A%201px%20solid%20%23ddd%3B%0Aborder%2Dspacing%3A%200%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Apadding%3A%20%2E2em%201em%3B%0Abackground%2Dcolor%3A%20%23eee%3B%0Aborder%2Dtop%3A%201px%20solid%20%23ddd%3B%0Aborder%2Dleft%3A%201px%20solid%20%23ddd%3B%0A%7D%0Atable%20td%20%7B%0Apadding%3A%20%2E2em%201em%3B%0Aborder%2Dtop%3A%201px%20solid%20%23ddd%3B%0Aborder%2Dleft%3A%201px%20solid%20%23ddd%3B%0Avertical%2Dalign%3A%20top%3B%0A%7D%0A%2Eauthor%20%7B%0Afont%2Dsize%3A%201%2E2em%3B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%40media%20only%20screen%20and%20%28min%2Dwidth%3A%20480px%29%20%7B%0Abody%20%7B%0Afont%2Dsize%3A%2014px%3B%0A%7D%0A%7D%0A%40media%20only%20screen%20and%20%28min%2Dwidth%3A%20768px%29%20%7B%0Abody%20%7B%0Afont%2Dsize%3A%2016px%3B%0A%7D%0A%7D%0A%40media%20print%20%7B%0A%2A%20%7B%0Abackground%3A%20transparent%20%21important%3B%0Acolor%3A%20black%20%21important%3B%0Afilter%3A%20none%20%21important%3B%0A%2Dms%2Dfilter%3A%20none%20%21important%3B%0A%7D%0Abody%20%7B%0Afont%2Dsize%3A%2012pt%3B%0Amax%2Dwidth%3A%20100%25%3B%0A%7D%0Aa%2C%20a%3Avisited%20%7B%0Atext%2Ddecoration%3A%20underline%3B%0A%7D%0Ahr%20%7B%0Aheight%3A%201px%3B%0Aborder%3A%200%3B%0Aborder%2Dbottom%3A%201px%20solid%20black%3B%0A%7D%0Aa%5Bhref%5D%3Aafter%20%7B%0Acontent%3A%20%22%20%28%22%20attr%28href%29%20%22%29%22%3B%0A%7D%0Aabbr%5Btitle%5D%3Aafter%20%7B%0Acontent%3A%20%22%20%28%22%20attr%28title%29%20%22%29%22%3B%0A%7D%0A%2Eir%20a%3Aafter%2C%20a%5Bhref%5E%3D%22javascript%3A%22%5D%3Aafter%2C%20a%5Bhref%5E%3D%22%23%22%5D%3Aafter%20%7B%0Acontent%3A%20%22%22%3B%0A%7D%0Apre%2C%20blockquote%20%7B%0Aborder%3A%201px%20solid%20%23999%3B%0Apadding%2Dright%3A%201em%3B%0Apage%2Dbreak%2Dinside%3A%20avoid%3B%0A%7D%0Atr%2C%20img%20%7B%0Apage%2Dbreak%2Dinside%3A%20avoid%3B%0A%7D%0Aimg%20%7B%0Amax%2Dwidth%3A%20100%25%20%21important%3B%0A%7D%0A%40page%20%3Aleft%20%7B%0Amargin%3A%2015mm%2020mm%2015mm%2010mm%3B%0A%7D%0A%40page%20%3Aright%20%7B%0Amargin%3A%2015mm%2010mm%2015mm%2020mm%3B%0A%7D%0Ap%2C%20h2%2C%20h3%20%7B%0Aorphans%3A%203%3B%0Awidows%3A%203%3B%0A%7D%0Ah2%2C%20h3%20%7B%0Apage%2Dbreak%2Dafter%3A%20avoid%3B%0A%7D%0A%7D%0A" rel="stylesheet" type="text/css">
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+<style></style></head>
+<body>
+<div id="header">
+<h1 class="title">Notes on Data Structures and Programming Techniques (CPSC 223, Spring 2015)</h1>
+<h2 class="author">James Aspnes</h2>
+<h3 class="date">2017-12-02T12:59:47-0500</h3>
+</div>
+<div id="TOC">
+<ul>
+<li><a href="#courseAdministration"><span class="toc-section-number">1</span> Course administration</a><ul>
+<li><a href="#index"><span class="toc-section-number">1.1</span> Overview</a><ul>
+<li><a href="#license"><span class="toc-section-number">1.1.1</span> License</a></li>
+<li><a href="#resources"><span class="toc-section-number">1.1.2</span> Resources</a></li>
+<li><a href="#Documentation"><span class="toc-section-number">1.1.3</span> Documentation</a></li>
+<li><a href="#questions-and-comments"><span class="toc-section-number">1.1.4</span> Questions and comments</a></li>
+</ul></li>
+<li><a href="#schedule"><span class="toc-section-number">1.2</span> Lecture schedule</a><ul>
+<li><a href="#topics-by-date"><span class="toc-section-number">1.2.1</span> Topics by date</a></li>
+<li><a href="#topics-not-covered-in-2015"><span class="toc-section-number">1.2.2</span> Topics not covered in 2015</a></li>
+</ul></li>
+<li><a href="#syllabus"><span class="toc-section-number">1.3</span> Syllabus</a><ul>
+<li><a href="#On-line_course_information"><span class="toc-section-number">1.3.1</span> On-line course information</a></li>
+<li><a href="#Meeting_times"><span class="toc-section-number">1.3.2</span> Meeting times</a></li>
+<li><a href="#Synopsis_of_the_course"><span class="toc-section-number">1.3.3</span> Synopsis of the course</a></li>
+<li><a href="#Prerequisites"><span class="toc-section-number">1.3.4</span> Prerequisites</a></li>
+<li><a href="#Textbook"><span class="toc-section-number">1.3.5</span> Textbook</a></li>
+<li><a href="#Course_requirements"><span class="toc-section-number">1.3.6</span> Course requirements</a></li>
+<li><a href="#staff"><span class="toc-section-number">1.3.7</span> Staff</a><ul>
+<li><a href="#instructor"><span class="toc-section-number">1.3.7.1</span> Instructor</a></li>
+<li><a href="#teaching-fellows"><span class="toc-section-number">1.3.7.2</span> Teaching Fellows</a></li>
+<li><a href="#peer-tutors"><span class="toc-section-number">1.3.7.3</span> Peer tutors</a></li>
+</ul></li>
+<li><a href="#Use_of_outside_help"><span class="toc-section-number">1.3.8</span> Use of outside help</a></li>
+<li><a href="#Clarifications_for_homework_assignments"><span class="toc-section-number">1.3.9</span> Clarifications for homework assignments</a></li>
+<li><a href="#Late_assignments"><span class="toc-section-number">1.3.10</span> Late assignments</a></li>
+</ul></li>
+<li><a href="#introduction"><span class="toc-section-number">1.4</span> Introduction</a><ul>
+<li><a href="#whyC"><span class="toc-section-number">1.4.1</span> Why should you learn to program in C?</a></li>
+<li><a href="#why-should-you-learn-about-data-structures-and-programming-techniques"><span class="toc-section-number">1.4.2</span> Why should you learn about data structures and programming techniques?</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#zoo"><span class="toc-section-number">2</span> The Zoo and the Zoo Annex</a><ul>
+<li><a href="#Getting_an_account"><span class="toc-section-number">2.1</span> Getting an account</a></li>
+<li><a href="#Getting_into_the_room"><span class="toc-section-number">2.2</span> Getting into the room</a></li>
+<li><a href="#Remote_use"><span class="toc-section-number">2.3</span> Remote use</a><ul>
+<li><a href="#fastX"><span class="toc-section-number">2.3.1</span> Access using FastX</a><ul>
+<li><a href="#fastXLicense"><span class="toc-section-number">2.3.1.1</span> Getting a license key</a></li>
+<li><a href="#fastXZooAnnex"><span class="toc-section-number">2.3.1.2</span> FastX in the Zoo Annex</a></li>
+<li><a href="#fastXWindows"><span class="toc-section-number">2.3.1.3</span> Using FastX from Windows</a></li>
+<li><a href="#fastXOSX"><span class="toc-section-number">2.3.1.4</span> Using FastX from OSX</a></li>
+</ul></li>
+<li><a href="#zooSSH"><span class="toc-section-number">2.3.2</span> Terminal access</a></li>
+<li><a href="#GUI_access"><span class="toc-section-number">2.3.3</span> GUI access</a></li>
+</ul></li>
+<li><a href="#compiling"><span class="toc-section-number">2.4</span> How to compile and run programs</a><ul>
+<li><a href="#Creating_the_program"><span class="toc-section-number">2.4.1</span> Creating the program</a></li>
+<li><a href="#Compiling_and_running_a_program"><span class="toc-section-number">2.4.2</span> Compiling and running a program</a></li>
+<li><a href="#Some_notes_on_what_the_program_does"><span class="toc-section-number">2.4.3</span> Some notes on what the program does</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#linux"><span class="toc-section-number">3</span> The Linux programming environment</a><ul>
+<li><a href="#the-shell"><span class="toc-section-number">3.1</span> The shell</a><ul>
+<li><a href="#Getting_a_shell_prompt_in_the_Zoo"><span class="toc-section-number">3.1.1</span> Getting a shell prompt in the Zoo</a></li>
+<li><a href="#The_Unix_filesystem"><span class="toc-section-number">3.1.2</span> The Unix filesystem</a></li>
+<li><a href="#Unix_command-line_programs"><span class="toc-section-number">3.1.3</span> Unix command-line programs</a></li>
+<li><a href="#Stopping_and_interrupting_programs"><span class="toc-section-number">3.1.4</span> Stopping and interrupting programs</a></li>
+<li><a href="#Running_your_own_programs"><span class="toc-section-number">3.1.5</span> Running your own programs</a></li>
+<li><a href="#shellRedirects"><span class="toc-section-number">3.1.6</span> Redirecting input and output</a></li>
+</ul></li>
+<li><a href="#editing"><span class="toc-section-number">3.2</span> Text editors</a><ul>
+<li><a href="#Writing_C_programs_with_Emacs"><span class="toc-section-number">3.2.1</span> Writing C programs with Emacs</a><ul>
+<li><a href="#My_favorite_Emacs_commands"><span class="toc-section-number">3.2.1.1</span> My favorite Emacs commands</a></li>
+</ul></li>
+<li><a href="#Using_Vi_instead_of_Emacs"><span class="toc-section-number">3.2.2</span> Using Vi instead of Emacs</a><ul>
+<li><a href="#My_favorite_Vim_commands"><span class="toc-section-number">3.2.2.1</span> My favorite Vim commands</a><ul>
+<li><a href="#Normal_mode"><span class="toc-section-number">3.2.2.1.1</span> Normal mode</a></li>
+<li><a href="#Insert_mode"><span class="toc-section-number">3.2.2.1.2</span> Insert mode</a></li>
+</ul></li>
+<li><a href="#Settings"><span class="toc-section-number">3.2.2.2</span> Settings</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#compilationTools"><span class="toc-section-number">3.3</span> Compilation tools</a><ul>
+<li><a href="#gcc"><span class="toc-section-number">3.3.1</span> The GNU C compiler <code>gcc</code></a></li>
+<li><a href="#make"><span class="toc-section-number">3.3.2</span> Make</a><ul>
+<li><a href="#Make_gotchas"><span class="toc-section-number">3.3.2.1</span> Make gotchas</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#debugging"><span class="toc-section-number">3.4</span> Debugging tools</a><ul>
+<li><a href="#Debugging_in_general"><span class="toc-section-number">3.4.1</span> Debugging in general</a></li>
+<li><a href="#Assertions"><span class="toc-section-number">3.4.2</span> Assertions</a></li>
+<li><a href="#gdb"><span class="toc-section-number">3.4.3</span> The GNU debugger <code>gdb</code></a><ul>
+<li><a href="#My_favorite_gdb_commands"><span class="toc-section-number">3.4.3.1</span> My favorite gdb commands</a></li>
+<li><a href="#Debugging_strategies"><span class="toc-section-number">3.4.3.2</span> Debugging strategies</a></li>
+<li><a href="#common-applications-of-gdb"><span class="toc-section-number">3.4.3.3</span> Common applications of <code>gdb</code></a><ul>
+<li><a href="#watching-your-program-run"><span class="toc-section-number">3.4.3.3.1</span> Watching your program run</a></li>
+<li><a href="#dealing-with-failed-assertions"><span class="toc-section-number">3.4.3.3.2</span> Dealing with failed assertions</a></li>
+<li><a href="#dealing-with-segmentation-faults"><span class="toc-section-number">3.4.3.3.3</span> Dealing with segmentation faults</a></li>
+<li><a href="#dealing-with-infinite-loops"><span class="toc-section-number">3.4.3.3.4</span> Dealing with infinite loops</a></li>
+<li><a href="#mysterious-variable-changes"><span class="toc-section-number">3.4.3.3.5</span> Mysterious variable changes</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#valgrind"><span class="toc-section-number">3.4.4</span> Valgrind</a><ul>
+<li><a href="#Compilation_flags"><span class="toc-section-number">3.4.4.1</span> Compilation flags</a></li>
+<li><a href="#Automated_testing"><span class="toc-section-number">3.4.4.2</span> Automated testing</a></li>
+<li><a href="#Examples_of_some_common_valgrindErrors"><span class="toc-section-number">3.4.4.3</span> Examples of some common valgrind errors</a><ul>
+<li><a href="#Uninitialized_values"><span class="toc-section-number">3.4.4.3.1</span> Uninitialized values</a></li>
+<li><a href="#Bytes_definitely_lost"><span class="toc-section-number">3.4.4.3.2</span> Bytes definitely lost</a></li>
+<li><a href="#Invalid_write_or_read_operations"><span class="toc-section-number">3.4.4.3.3</span> Invalid write or read operations</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#Not_recommended:_debugging_output"><span class="toc-section-number">3.4.5</span> Not recommended: debugging output</a></li>
+</ul></li>
+<li><a href="#performanceTuning"><span class="toc-section-number">3.5</span> Performance tuning</a><ul>
+<li><a href="#Timing_under_Linux"><span class="toc-section-number">3.5.1</span> Timing under Linux</a></li>
+<li><a href="#profiling"><span class="toc-section-number">3.5.2</span> Profiling with gprof</a><ul>
+<li><a href="#effect-of-optimization-during-compilation"><span class="toc-section-number">3.5.2.1</span> Effect of optimization during compilation</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#versionControl"><span class="toc-section-number">3.6</span> Version control</a><ul>
+<li><a href="#Setting_up_Git"><span class="toc-section-number">3.6.1</span> Setting up Git</a></li>
+<li><a href="#Editing_files"><span class="toc-section-number">3.6.2</span> Editing files</a></li>
+<li><a href="#Renaming_files"><span class="toc-section-number">3.6.3</span> Renaming files</a></li>
+<li><a href="#Adding_and_removing_files"><span class="toc-section-number">3.6.4</span> Adding and removing files</a></li>
+<li><a href="#Recovering_files_from_the_repository"><span class="toc-section-number">3.6.5</span> Recovering files from the repository</a></li>
+<li><a href="#Undoing_bad_commits"><span class="toc-section-number">3.6.6</span> Undoing bad commits</a></li>
+<li><a href="#Looking_at_old_versions"><span class="toc-section-number">3.6.7</span> Looking at old versions</a></li>
+<li><a href="#More_information_about_Git"><span class="toc-section-number">3.6.8</span> More information about Git</a></li>
+</ul></li>
+<li><a href="#submitScript"><span class="toc-section-number">3.7</span> Submitting assignments</a></li>
+</ul></li>
+<li><a href="#c"><span class="toc-section-number">4</span> The C programming language</a><ul>
+<li><a href="#CProgramStructure"><span class="toc-section-number">4.1</span> Structure of a C program</a></li>
+<li><a href="#numericTypes"><span class="toc-section-number">4.2</span> Numeric data types</a><ul>
+<li><a href="#integerTypes"><span class="toc-section-number">4.2.1</span> Integer types in C</a><ul>
+<li><a href="#basicIntegerTypes"><span class="toc-section-number">4.2.1.1</span> Basic integer types</a></li>
+<li><a href="#C99_fixed-width_types"><span class="toc-section-number">4.2.1.2</span> C99 fixed-width types</a></li>
+</ul></li>
+<li><a href="#sizeTypes"><span class="toc-section-number">4.2.2</span> <code>size_t</code> and <code>ptrdiff_t</code></a><ul>
+<li><a href="#integerConstants"><span class="toc-section-number">4.2.2.1</span> Integer constants</a><ul>
+<li><a href="#naming-constants"><span class="toc-section-number">4.2.2.1.1</span> Naming constants</a></li>
+</ul></li>
+<li><a href="#integerOperators"><span class="toc-section-number">4.2.2.2</span> Integer operators</a><ul>
+<li><a href="#Arithmetic_operators"><span class="toc-section-number">4.2.2.2.1</span> Arithmetic operators</a></li>
+<li><a href="#Bitwise_operators"><span class="toc-section-number">4.2.2.2.2</span> Bitwise operators</a></li>
+<li><a href="#Logical_operators"><span class="toc-section-number">4.2.2.2.3</span> Logical operators</a></li>
+<li><a href="#Relational_operators"><span class="toc-section-number">4.2.2.2.4</span> Relational operators</a></li>
+</ul></li>
+<li><a href="#integerStringConversion"><span class="toc-section-number">4.2.2.3</span> Converting to and from strings</a></li>
+</ul></li>
+<li><a href="#floatingPointTypes"><span class="toc-section-number">4.2.3</span> Floating-point types</a><ul>
+<li><a href="#Floating_point_basics"><span class="toc-section-number">4.2.3.1</span> Floating point basics</a></li>
+<li><a href="#Floating-point_constants"><span class="toc-section-number">4.2.3.2</span> Floating-point constants</a></li>
+<li><a href="#Operators"><span class="toc-section-number">4.2.3.3</span> Operators</a></li>
+<li><a href="#Conversion_to_and_from_integer_types"><span class="toc-section-number">4.2.3.4</span> Conversion to and from integer types</a></li>
+<li><a href="#The_IEEE-754_floating-point_standard"><span class="toc-section-number">4.2.3.5</span> The IEEE-754 floating-point standard</a></li>
+<li><a href="#Error"><span class="toc-section-number">4.2.3.6</span> Error</a></li>
+<li><a href="#Reading_and_writing_floating-point_numbers"><span class="toc-section-number">4.2.3.7</span> Reading and writing floating-point numbers</a></li>
+<li><a href="#Non-finite_numbers_in_C"><span class="toc-section-number">4.2.3.8</span> Non-finite numbers in C</a></li>
+<li><a href="#The_math_library"><span class="toc-section-number">4.2.3.9</span> The math library</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#operatorPrecedence"><span class="toc-section-number">4.3</span> Operator precedence</a></li>
+<li><a href="#programmingStyle"><span class="toc-section-number">4.4</span> Programming style</a></li>
+<li><a href="#variables"><span class="toc-section-number">4.5</span> Variables</a><ul>
+<li><a href="#MachineMemory"><span class="toc-section-number">4.5.1</span> Memory</a></li>
+<li><a href="#variablesAsNames"><span class="toc-section-number">4.5.2</span> Variables as names</a><ul>
+<li><a href="#Variable_declarations"><span class="toc-section-number">4.5.2.1</span> Variable declarations</a></li>
+<li><a href="#Variable_names"><span class="toc-section-number">4.5.2.2</span> Variable names</a></li>
+</ul></li>
+<li><a href="#usingVariables"><span class="toc-section-number">4.5.3</span> Using variables</a></li>
+<li><a href="#initializers"><span class="toc-section-number">4.5.4</span> Initialization</a></li>
+<li><a href="#qualifiers"><span class="toc-section-number">4.5.5</span> Storage class qualifiers</a><ul>
+<li><a href="#scopeAndExtent"><span class="toc-section-number">4.5.5.1</span> Scope and extent</a><ul>
+<li><a href="#additional-qualifiers-for-global-variables"><span class="toc-section-number">4.5.5.1.1</span> Additional qualifiers for global variables</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#const"><span class="toc-section-number">4.5.6</span> Marking variables as constant</a><ul>
+<li><a href="#pointers-to-const"><span class="toc-section-number">4.5.6.1</span> Pointers to <code>const</code></a></li>
+</ul></li>
+</ul></li>
+<li><a href="#IO"><span class="toc-section-number">4.6</span> Input and output</a><ul>
+<li><a href="#Character_streams"><span class="toc-section-number">4.6.1</span> Character streams</a></li>
+<li><a href="#characterIO"><span class="toc-section-number">4.6.2</span> Reading and writing single characters</a></li>
+<li><a href="#Formatted_I.2FO"><span class="toc-section-number">4.6.3</span> Formatted I/O</a></li>
+<li><a href="#Rolling_your_own_I.2FO_routines"><span class="toc-section-number">4.6.4</span> Rolling your own I/O routines</a></li>
+<li><a href="#File_I.2FO"><span class="toc-section-number">4.6.5</span> File I/O</a></li>
+</ul></li>
+<li><a href="#statements"><span class="toc-section-number">4.7</span> Statements and control structures</a><ul>
+<li><a href="#Simple_statements"><span class="toc-section-number">4.7.1</span> Simple statements</a></li>
+<li><a href="#Compound_statements"><span class="toc-section-number">4.7.2</span> Compound statements</a><ul>
+<li><a href="#conditionals"><span class="toc-section-number">4.7.2.1</span> Conditionals</a></li>
+<li><a href="#Loops"><span class="toc-section-number">4.7.2.2</span> Loops</a><ul>
+<li><a href="#The_while_loop"><span class="toc-section-number">4.7.2.2.1</span> The while loop</a></li>
+<li><a href="#The_do..while_loop"><span class="toc-section-number">4.7.2.2.2</span> The do..while loop</a></li>
+<li><a href="#forLoop"><span class="toc-section-number">4.7.2.2.3</span> The for loop</a></li>
+<li><a href="#Loops_with_break.2C_continue.2C_and_goto"><span class="toc-section-number">4.7.2.2.4</span> Loops with break, continue, and goto</a></li>
+</ul></li>
+<li><a href="#Choosing_where_to_put_a_loop_exit"><span class="toc-section-number">4.7.2.3</span> Choosing where to put a loop exit</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#functions"><span class="toc-section-number">4.8</span> Functions</a><ul>
+<li><a href="#functionDefinitions"><span class="toc-section-number">4.8.1</span> Function definitions</a></li>
+<li><a href="#functionIdeology"><span class="toc-section-number">4.8.2</span> When to write a function</a></li>
+<li><a href="#Calling_a_function"><span class="toc-section-number">4.8.3</span> Calling a function</a></li>
+<li><a href="#The_return_statement"><span class="toc-section-number">4.8.4</span> The return statement</a></li>
+<li><a href="#Function_declarations_and_modules"><span class="toc-section-number">4.8.5</span> Function declarations and modules</a></li>
+<li><a href="#Static_functions"><span class="toc-section-number">4.8.6</span> Static functions</a></li>
+<li><a href="#Local_variables"><span class="toc-section-number">4.8.7</span> Local variables</a></li>
+<li><a href="#Mechanics_of_function_calls"><span class="toc-section-number">4.8.8</span> Mechanics of function calls</a></li>
+</ul></li>
+<li><a href="#pointers"><span class="toc-section-number">4.9</span> Pointers</a><ul>
+<li><a href="#addressSpace"><span class="toc-section-number">4.9.1</span> Memory and addresses</a></li>
+<li><a href="#Pointer_variables"><span class="toc-section-number">4.9.2</span> Pointer variables</a><ul>
+<li><a href="#Declaring_a_pointer_variable"><span class="toc-section-number">4.9.2.1</span> Declaring a pointer variable</a></li>
+<li><a href="#Assigning_to_pointer_variables"><span class="toc-section-number">4.9.2.2</span> Assigning to pointer variables</a></li>
+<li><a href="#Using_a_pointer"><span class="toc-section-number">4.9.2.3</span> Using a pointer</a></li>
+<li><a href="#Printing_pointers"><span class="toc-section-number">4.9.2.4</span> Printing pointers</a></li>
+</ul></li>
+<li><a href="#The_null_pointer"><span class="toc-section-number">4.9.3</span> The null pointer</a></li>
+<li><a href="#Pointers_and_functions"><span class="toc-section-number">4.9.4</span> Pointers and functions</a></li>
+<li><a href="#pointerArithmetic"><span class="toc-section-number">4.9.5</span> Pointer arithmetic and arrays</a><ul>
+<li><a href="#arrays"><span class="toc-section-number">4.9.5.1</span> Arrays</a></li>
+<li><a href="#arraysAndFunctions"><span class="toc-section-number">4.9.5.2</span> Arrays and functions</a></li>
+<li><a href="#multidimensionalArrays"><span class="toc-section-number">4.9.5.3</span> Multidimensional arrays</a></li>
+<li><a href="#variableLengthArrays"><span class="toc-section-number">4.9.5.4</span> Variable-length arrays</a></li>
+</ul></li>
+<li><a href="#Void_pointers"><span class="toc-section-number">4.9.6</span> Void pointers</a><ul>
+<li><a href="#alignment"><span class="toc-section-number">4.9.6.1</span> Alignment</a></li>
+</ul></li>
+<li><a href="#malloc"><span class="toc-section-number">4.9.7</span> Run-time storage allocation using <code>malloc</code></a></li>
+<li><a href="#functionPointers"><span class="toc-section-number">4.9.8</span> Function pointers</a><ul>
+<li><a href="#Function_pointer_declarations"><span class="toc-section-number">4.9.8.1</span> Function pointer declarations</a></li>
+<li><a href="#Callbacks"><span class="toc-section-number">4.9.8.2</span> Callbacks</a></li>
+<li><a href="#Dispatch_tables"><span class="toc-section-number">4.9.8.3</span> Dispatch tables</a></li>
+</ul></li>
+<li><a href="#The_restrict_keyword"><span class="toc-section-number">4.9.9</span> The restrict keyword</a></li>
+</ul></li>
+<li><a href="#strings"><span class="toc-section-number">4.10</span> Strings</a><ul>
+<li><a href="#C_strings"><span class="toc-section-number">4.10.1</span> C strings</a></li>
+<li><a href="#String_constants"><span class="toc-section-number">4.10.2</span> String constants</a></li>
+<li><a href="#String_buffers"><span class="toc-section-number">4.10.3</span> String buffers</a><ul>
+<li><a href="#string-buffers-and-the-perils-of-gets"><span class="toc-section-number">4.10.3.1</span> String buffers and the perils of <code>gets</code></a></li>
+</ul></li>
+<li><a href="#Operations_on_strings"><span class="toc-section-number">4.10.4</span> Operations on strings</a></li>
+<li><a href="#Finding_the_length_of_a_string"><span class="toc-section-number">4.10.5</span> Finding the length of a string</a><ul>
+<li><a href="#The_strlen_tarpit"><span class="toc-section-number">4.10.5.1</span> The strlen tarpit</a></li>
+</ul></li>
+<li><a href="#Comparing_strings"><span class="toc-section-number">4.10.6</span> Comparing strings</a></li>
+<li><a href="#Formatted_output_to_strings"><span class="toc-section-number">4.10.7</span> Formatted output to strings</a></li>
+<li><a href="#Dynamic_allocation_of_strings"><span class="toc-section-number">4.10.8</span> Dynamic allocation of strings</a></li>
+<li><a href="#argv"><span class="toc-section-number">4.10.9</span> Command-line arguments</a></li>
+</ul></li>
+<li><a href="#structuredDataTypes"><span class="toc-section-number">4.11</span> Structured data types</a><ul>
+<li><a href="#structs"><span class="toc-section-number">4.11.1</span> Structs</a><ul>
+<li><a href="#operations-on-structs"><span class="toc-section-number">4.11.1.1</span> Operations on structs</a></li>
+<li><a href="#structLayout"><span class="toc-section-number">4.11.1.2</span> Layout in memory</a></li>
+<li><a href="#Bit_fields"><span class="toc-section-number">4.11.1.3</span> Bit fields</a></li>
+</ul></li>
+<li><a href="#unions"><span class="toc-section-number">4.11.2</span> Unions</a></li>
+<li><a href="#enums"><span class="toc-section-number">4.11.3</span> Enums</a><ul>
+<li><a href="#specifying-particular-values"><span class="toc-section-number">4.11.3.1</span> Specifying particular values</a></li>
+<li><a href="#what-most-people-do"><span class="toc-section-number">4.11.3.2</span> What most people do</a></li>
+<li><a href="#enumTagsForUnion"><span class="toc-section-number">4.11.3.3</span> Using <code>enum</code> with <code>union</code></a></li>
+</ul></li>
+</ul></li>
+<li><a href="#typedef"><span class="toc-section-number">4.12</span> Type aliases using <code>typedef</code></a><ul>
+<li><a href="#opaqueStructs"><span class="toc-section-number">4.12.1</span> Opaque structs</a></li>
+</ul></li>
+<li><a href="#macros"><span class="toc-section-number">4.13</span> Macros</a><ul>
+<li><a href="#Macros_with_arguments"><span class="toc-section-number">4.13.1</span> Macros with arguments</a><ul>
+<li><a href="#Multiple_arguments"><span class="toc-section-number">4.13.1.1</span> Multiple arguments</a></li>
+<li><a href="#Perils_of_repeating_arguments"><span class="toc-section-number">4.13.1.2</span> Perils of repeating arguments</a></li>
+<li><a href="#Variable-length_argument_lists"><span class="toc-section-number">4.13.1.3</span> Variable-length argument lists</a></li>
+<li><a href="#macros-vs.-inline-functions"><span class="toc-section-number">4.13.1.4</span> Macros vs. inline functions</a></li>
+</ul></li>
+<li><a href="#Multiple_macros"><span class="toc-section-number">4.13.2</span> Macros that include other macros</a></li>
+<li><a href="#Macro_tricks"><span class="toc-section-number">4.13.3</span> More specialized macros</a><ul>
+<li><a href="#Multiple_expressions_in_a_macro"><span class="toc-section-number">4.13.3.1</span> Multiple expressions in a macro</a></li>
+<li><a href="#nonSyntacticMacros"><span class="toc-section-number">4.13.3.2</span> Non-syntactic macros</a></li>
+<li><a href="#Multiple_statements_in_one_macro"><span class="toc-section-number">4.13.3.3</span> Multiple statements in one macro</a></li>
+<li><a href="#String_expansion"><span class="toc-section-number">4.13.3.4</span> String expansion</a></li>
+<li><a href="#Big_macros"><span class="toc-section-number">4.13.3.5</span> Big macros</a></li>
+</ul></li>
+<li><a href="#ifdef"><span class="toc-section-number">4.13.4</span> Conditional compilation</a></li>
+<li><a href="#defining-macros-on-the-command-line"><span class="toc-section-number">4.13.5</span> Defining macros on the command line</a></li>
+<li><a href="#the-if-directive"><span class="toc-section-number">4.13.6</span> The <code>#if</code> directive</a></li>
+<li><a href="#Debugging_macro_expansions"><span class="toc-section-number">4.13.7</span> Debugging macro expansions</a></li>
+<li><a href="#Can_a_macro_call_a_preprocessor_command.3F"><span class="toc-section-number">4.13.8</span> Can a macro call a preprocessor command?</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#dataStructuresAndProgrammingTechniques"><span class="toc-section-number">5</span> Data structures and programming techniques</a><ul>
+<li><a href="#asymptoticNotation"><span class="toc-section-number">5.1</span> Asymptotic notation</a><ul>
+<li><a href="#two-sorting-algorithms"><span class="toc-section-number">5.1.1</span> Two sorting algorithms</a></li>
+<li><a href="#big-o-to-the-rescue"><span class="toc-section-number">5.1.2</span> Big-O to the rescue</a></li>
+<li><a href="#asymptotic-cost-of-programs"><span class="toc-section-number">5.1.3</span> Asymptotic cost of programs</a></li>
+<li><a href="#other-variants-of-asymptotic-notation"><span class="toc-section-number">5.1.4</span> Other variants of asymptotic notation</a></li>
+</ul></li>
+<li><a href="#linkedLists"><span class="toc-section-number">5.2</span> Linked lists</a><ul>
+<li><a href="#stacks"><span class="toc-section-number">5.2.1</span> Stacks</a><ul>
+<li><a href="#Building_a_stack_out_of_an_array"><span class="toc-section-number">5.2.1.1</span> Building a stack out of an array</a></li>
+</ul></li>
+<li><a href="#queues"><span class="toc-section-number">5.2.2</span> Queues</a></li>
+<li><a href="#Looping_over_a_linked_list"><span class="toc-section-number">5.2.3</span> Looping over a linked list</a></li>
+<li><a href="#Looping_over_a_linked_list_backwards"><span class="toc-section-number">5.2.4</span> Looping over a linked list backwards</a></li>
+<li><a href="#deques"><span class="toc-section-number">5.2.5</span> Deques and doubly-linked lists</a><ul>
+<li><a href="#ringBuffers"><span class="toc-section-number">5.2.5.1</span> Alternate implementation using a ring buffer</a></li>
+</ul></li>
+<li><a href="#Circular_linked_lists"><span class="toc-section-number">5.2.6</span> Circular linked lists</a></li>
+<li><a href="#What_linked_lists_are_and_are_not_good_for"><span class="toc-section-number">5.2.7</span> What linked lists are and are not good for</a></li>
+<li><a href="#Further_reading"><span class="toc-section-number">5.2.8</span> Further reading</a></li>
+</ul></li>
+<li><a href="#abstractDataTypes"><span class="toc-section-number">5.3</span> Abstract data types</a><ul>
+<li><a href="#abstractDataTypeExample"><span class="toc-section-number">5.3.1</span> A sequence type</a><ul>
+<li><a href="#Interface"><span class="toc-section-number">5.3.1.1</span> Interface</a></li>
+<li><a href="#adtImplementation"><span class="toc-section-number">5.3.1.2</span> Implementation</a></li>
+<li><a href="#Compiling_and_linking"><span class="toc-section-number">5.3.1.3</span> Compiling and linking</a></li>
+</ul></li>
+<li><a href="#Designing_abstract_data_types"><span class="toc-section-number">5.3.2</span> Designing abstract data types</a><ul>
+<li><a href="#Parnas.27s_Principle"><span class="toc-section-number">5.3.2.1</span> Parnas's Principle</a></li>
+<li><a href="#When_to_build_an_abstract_data_type"><span class="toc-section-number">5.3.2.2</span> When to build an abstract data type</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#hashTables"><span class="toc-section-number">5.4</span> Hash tables</a><ul>
+<li><a href="#dictionaries"><span class="toc-section-number">5.4.1</span> Dictionary data types</a></li>
+<li><a href="#Basics_of_hashing"><span class="toc-section-number">5.4.2</span> Basics of hashing</a></li>
+<li><a href="#Resolving_collisions"><span class="toc-section-number">5.4.3</span> Resolving collisions</a><ul>
+<li><a href="#Chaining"><span class="toc-section-number">5.4.3.1</span> Chaining</a></li>
+<li><a href="#Open_addressing"><span class="toc-section-number">5.4.3.2</span> Open addressing</a></li>
+</ul></li>
+<li><a href="#Choosing_a_hash_function"><span class="toc-section-number">5.4.4</span> Choosing a hash function</a><ul>
+<li><a href="#Division_method"><span class="toc-section-number">5.4.4.1</span> Division method</a></li>
+<li><a href="#Multiplication_method"><span class="toc-section-number">5.4.4.2</span> Multiplication method</a></li>
+<li><a href="#Universal_hashing"><span class="toc-section-number">5.4.4.3</span> Universal hashing</a></li>
+</ul></li>
+<li><a href="#Maintaining_a_constant_load_factor"><span class="toc-section-number">5.4.5</span> Maintaining a constant load factor</a></li>
+<li><a href="#Examples"><span class="toc-section-number">5.4.6</span> Examples</a><ul>
+<li><a href="#A_low-overhead_hash_table_using_open_addressing"><span class="toc-section-number">5.4.6.1</span> A low-overhead hash table using open addressing</a></li>
+<li><a href="#A_string_to_string_dictionary_using_chaining"><span class="toc-section-number">5.4.6.2</span> A string to string dictionary using chaining</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#genericContainers"><span class="toc-section-number">5.5</span> Generic containers</a><ul>
+<li><a href="#Generic_dictionary:_interface"><span class="toc-section-number">5.5.1</span> Generic dictionary: interface</a></li>
+<li><a href="#genericDictionaryImplementation"><span class="toc-section-number">5.5.2</span> Generic dictionary: implementation</a></li>
+</ul></li>
+<li><a href="#recursion"><span class="toc-section-number">5.6</span> Recursion</a><ul>
+<li><a href="#Example_of_recursion_in_C"><span class="toc-section-number">5.6.1</span> Example of recursion in C</a></li>
+<li><a href="#Common_problems_with_recursion"><span class="toc-section-number">5.6.2</span> Common problems with recursion</a><ul>
+<li><a href="#Omitting_the_base_case"><span class="toc-section-number">5.6.2.1</span> Omitting the base case</a></li>
+<li><a href="#Blowing_out_the_stack"><span class="toc-section-number">5.6.2.2</span> Blowing out the stack</a></li>
+<li><a href="#Failure_to_make_progress"><span class="toc-section-number">5.6.2.3</span> Failure to make progress</a></li>
+</ul></li>
+<li><a href="#tailRecursion"><span class="toc-section-number">5.6.3</span> Tail-recursion and iteration</a><ul>
+<li><a href="#binarySearch"><span class="toc-section-number">5.6.3.1</span> Binary search: recursive and iterative versions</a></li>
+</ul></li>
+<li><a href="#mergesort"><span class="toc-section-number">5.6.4</span> Mergesort: a recursive sorting algorithm</a></li>
+<li><a href="#asymptotic-complexity-of-recursive-functions"><span class="toc-section-number">5.6.5</span> Asymptotic complexity of recursive functions</a></li>
+</ul></li>
+<li><a href="#binaryTrees"><span class="toc-section-number">5.7</span> Binary trees</a><ul>
+<li><a href="#Tree_basics"><span class="toc-section-number">5.7.1</span> Tree basics</a></li>
+<li><a href="#Binary_tree_implementations"><span class="toc-section-number">5.7.2</span> Binary tree implementations</a></li>
+<li><a href="#The_canonical_binary_tree_algorithm"><span class="toc-section-number">5.7.3</span> The canonical binary tree algorithm</a></li>
+<li><a href="#Nodes_vs_leaves"><span class="toc-section-number">5.7.4</span> Nodes vs leaves</a></li>
+<li><a href="#Special_classes_of_binary_trees"><span class="toc-section-number">5.7.5</span> Special classes of binary trees</a></li>
+</ul></li>
+<li><a href="#heaps"><span class="toc-section-number">5.8</span> Heaps</a><ul>
+<li><a href="#priorityQueues"><span class="toc-section-number">5.8.1</span> Priority queues</a></li>
+<li><a href="#Expensive_implementations_of_priority_queues"><span class="toc-section-number">5.8.2</span> Expensive implementations of priority queues</a></li>
+<li><a href="#heapStructure"><span class="toc-section-number">5.8.3</span> Structure of a heap</a></li>
+<li><a href="#Packed_heaps"><span class="toc-section-number">5.8.4</span> Packed heaps</a></li>
+<li><a href="#Bottom-up_heapification"><span class="toc-section-number">5.8.5</span> Bottom-up heapification</a></li>
+<li><a href="#heapSort"><span class="toc-section-number">5.8.6</span> Heapsort</a></li>
+<li><a href="#heapMoreInformation"><span class="toc-section-number">5.8.7</span> More information</a></li>
+</ul></li>
+<li><a href="#binarySearchTrees"><span class="toc-section-number">5.9</span> Binary search trees</a><ul>
+<li><a href="#Searching_for_a_node"><span class="toc-section-number">5.9.1</span> Searching for a node</a></li>
+<li><a href="#Inserting_a_new_node"><span class="toc-section-number">5.9.2</span> Inserting a new node</a></li>
+<li><a href="#deleting-a-node"><span class="toc-section-number">5.9.3</span> Deleting a node</a></li>
+<li><a href="#Costs"><span class="toc-section-number">5.9.4</span> Costs</a></li>
+</ul></li>
+<li><a href="#augmentedTrees"><span class="toc-section-number">5.10</span> Augmented trees</a><ul>
+<li><a href="#applications"><span class="toc-section-number">5.10.1</span> Applications</a></li>
+</ul></li>
+<li><a href="#balancedTrees"><span class="toc-section-number">5.11</span> Balanced trees</a><ul>
+<li><a href="#treeRotations"><span class="toc-section-number">5.11.1</span> Tree rotations</a></li>
+<li><a href="#AVLtrees"><span class="toc-section-number">5.11.2</span> AVL trees</a><ul>
+<li><a href="#avlTreeImplementation"><span class="toc-section-number">5.11.2.1</span> Sample implementation</a></li>
+</ul></li>
+<li><a href="#A2.2BIBM-3_trees"><span class="toc-section-number">5.11.3</span> 2–3 trees</a></li>
+<li><a href="#redBlackTrees"><span class="toc-section-number">5.11.4</span> Red-black trees</a></li>
+<li><a href="#B-trees"><span class="toc-section-number">5.11.5</span> B-trees</a></li>
+<li><a href="#splayTrees"><span class="toc-section-number">5.11.6</span> Splay trees</a><ul>
+<li><a href="#how-splaying-works"><span class="toc-section-number">5.11.6.1</span> How splaying works</a></li>
+<li><a href="#splayTreeAnalysis"><span class="toc-section-number">5.11.6.2</span> Analysis</a></li>
+<li><a href="#other-operations"><span class="toc-section-number">5.11.6.3</span> Other operations</a></li>
+<li><a href="#top-down-splaying"><span class="toc-section-number">5.11.6.4</span> Top-down splaying</a></li>
+<li><a href="#splayTreeImplementation"><span class="toc-section-number">5.11.6.5</span> An implementation</a></li>
+<li><a href="#splayTreesMoreInformation"><span class="toc-section-number">5.11.6.6</span> More information</a></li>
+</ul></li>
+<li><a href="#scapegoatTrees"><span class="toc-section-number">5.11.7</span> Scapegoat trees</a></li>
+<li><a href="#skip-lists"><span class="toc-section-number">5.11.8</span> Skip lists</a></li>
+<li><a href="#treeImplementations"><span class="toc-section-number">5.11.9</span> Implementations</a></li>
+</ul></li>
+<li><a href="#graphs"><span class="toc-section-number">5.12</span> Graphs</a><ul>
+<li><a href="#graphDefinitions"><span class="toc-section-number">5.12.1</span> Basic definitions</a></li>
+<li><a href="#Why_graphs_are_useful"><span class="toc-section-number">5.12.2</span> Why graphs are useful</a></li>
+<li><a href="#Operations_on_graphs"><span class="toc-section-number">5.12.3</span> Operations on graphs</a></li>
+<li><a href="#Representations_of_graphs"><span class="toc-section-number">5.12.4</span> Representations of graphs</a><ul>
+<li><a href="#Adjacency_matrices"><span class="toc-section-number">5.12.4.1</span> Adjacency matrices</a></li>
+<li><a href="#Adjacency_lists"><span class="toc-section-number">5.12.4.2</span> Adjacency lists</a><ul>
+<li><a href="#An_implementation"><span class="toc-section-number">5.12.4.2.1</span> An implementation</a></li>
+</ul></li>
+<li><a href="#Implicit_representations"><span class="toc-section-number">5.12.4.3</span> Implicit representations</a></li>
+</ul></li>
+<li><a href="#graphSearch"><span class="toc-section-number">5.12.5</span> Searching for paths in a graph</a><ul>
+<li><a href="#graphSearchImplementation"><span class="toc-section-number">5.12.5.1</span> Implementation of depth-first and breadth-first search</a></li>
+<li><a href="#combinedDFSBFS"><span class="toc-section-number">5.12.5.2</span> Combined implementation of depth-first and breadth-first search</a></li>
+<li><a href="#Other_variations_on_the_basic_algorithm"><span class="toc-section-number">5.12.5.3</span> Other variations on the basic algorithm</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#dynamicProgramming"><span class="toc-section-number">5.13</span> Dynamic programming</a><ul>
+<li><a href="#Memoization"><span class="toc-section-number">5.13.1</span> Memoization</a></li>
+<li><a href="#Dynamic_programming"><span class="toc-section-number">5.13.2</span> Dynamic programming</a><ul>
+<li><a href="#More_examples"><span class="toc-section-number">5.13.2.1</span> More examples</a><ul>
+<li><a href="#Longest_increasing_subsequence"><span class="toc-section-number">5.13.2.1.1</span> Longest increasing subsequence</a></li>
+<li><a href="#All-pairs_shortest_paths"><span class="toc-section-number">5.13.2.1.2</span> All-pairs shortest paths</a></li>
+<li><a href="#longestCommonSubsequence"><span class="toc-section-number">5.13.2.1.3</span> Longest common subsequence</a></li>
+</ul></li>
+</ul></li>
+</ul></li>
+<li><a href="#randomization"><span class="toc-section-number">5.14</span> Randomization</a><ul>
+<li><a href="#Generating_random_values_in_C"><span class="toc-section-number">5.14.1</span> Generating random values in C</a><ul>
+<li><a href="#The_rand_function_from_the_standard_library"><span class="toc-section-number">5.14.1.1</span> The <code>rand</code> function from the standard library</a><ul>
+<li><a href="#supplying-a-seed-with-srand"><span class="toc-section-number">5.14.1.1.1</span> Supplying a seed with <code>srand</code></a></li>
+</ul></li>
+<li><a href="#Better_pseudorandom_number_generators"><span class="toc-section-number">5.14.1.2</span> Better pseudorandom number generators</a></li>
+<li><a href="#Random_numbers_without_the_pseudo"><span class="toc-section-number">5.14.1.3</span> Random numbers without the pseudo</a></li>
+<li><a href="#RANDMAX"><span class="toc-section-number">5.14.1.4</span> Range issues</a></li>
+</ul></li>
+<li><a href="#Randomized_algorithms"><span class="toc-section-number">5.14.2</span> Randomized algorithms</a><ul>
+<li><a href="#Randomized_search"><span class="toc-section-number">5.14.2.1</span> Randomized search</a></li>
+<li><a href="#quicksort"><span class="toc-section-number">5.14.2.2</span> Quickselect and quicksort</a></li>
+</ul></li>
+<li><a href="#randomizedDataStructures"><span class="toc-section-number">5.14.3</span> Randomized data structures</a><ul>
+<li><a href="#skipLists"><span class="toc-section-number">5.14.3.1</span> Skip lists</a></li>
+<li><a href="#Universal_hash_families"><span class="toc-section-number">5.14.3.2</span> Universal hash families</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#stringProcessing"><span class="toc-section-number">5.15</span> String processing</a><ul>
+<li><a href="#radixSearch"><span class="toc-section-number">5.15.1</span> Radix search</a><ul>
+<li><a href="#Tries"><span class="toc-section-number">5.15.1.1</span> Tries</a><ul>
+<li><a href="#Searching_a_trie"><span class="toc-section-number">5.15.1.1.1</span> Searching a trie</a></li>
+<li><a href="#Inserting_a_new_element_into_a_trie"><span class="toc-section-number">5.15.1.1.2</span> Inserting a new element into a trie</a></li>
+<li><a href="#trieImplementation"><span class="toc-section-number">5.15.1.1.3</span> Implementation</a></li>
+</ul></li>
+<li><a href="#Patricia_trees"><span class="toc-section-number">5.15.1.2</span> Patricia trees</a></li>
+<li><a href="#Ternary_search_trees"><span class="toc-section-number">5.15.1.3</span> Ternary search trees</a></li>
+<li><a href="#treesMoreInformation"><span class="toc-section-number">5.15.1.4</span> More information</a></li>
+</ul></li>
+<li><a href="#radixSort"><span class="toc-section-number">5.15.2</span> Radix sort</a><ul>
+<li><a href="#Bucket_sort"><span class="toc-section-number">5.15.2.1</span> Bucket sort</a></li>
+<li><a href="#Classic_LSB_radix_sort"><span class="toc-section-number">5.15.2.2</span> Classic LSB radix sort</a></li>
+<li><a href="#MSB_radix_sort"><span class="toc-section-number">5.15.2.3</span> MSB radix sort</a><ul>
+<li><a href="#Issues_with_recursion_depth"><span class="toc-section-number">5.15.2.3.1</span> Issues with recursion depth</a></li>
+<li><a href="#Implementing_the_buckets"><span class="toc-section-number">5.15.2.3.2</span> Implementing the buckets</a></li>
+<li><a href="#Further_optimization"><span class="toc-section-number">5.15.2.3.3</span> Further optimization</a></li>
+<li><a href="#radixSortImplementation"><span class="toc-section-number">5.15.2.3.4</span> Sample implementation</a></li>
+</ul></li>
+</ul></li>
+</ul></li>
+</ul></li>
+<li><a href="#other-topics-not-covered-in-detail-in-2015"><span class="toc-section-number">6</span> Other topics not covered in detail in 2015</a><ul>
+<li><a href="#more-applications-of-function-pointers"><span class="toc-section-number">6.1</span> More applications of function pointers</a><ul>
+<li><a href="#iterators"><span class="toc-section-number">6.1.1</span> Iterators</a><ul>
+<li><a href="#Option_1:_Function_that_returns_a_sequence"><span class="toc-section-number">6.1.1.1</span> Option 1: Function that returns a sequence</a></li>
+<li><a href="#Option_2:_Iterator_with_first.2Fdone.2Fnext_operations"><span class="toc-section-number">6.1.1.2</span> Option 2: Iterator with first/done/next operations</a></li>
+<li><a href="#Option_3:_Iterator_with_function_argument"><span class="toc-section-number">6.1.1.3</span> Option 3: Iterator with function argument</a></li>
+</ul></li>
+<li><a href="#closures"><span class="toc-section-number">6.1.2</span> Closures</a></li>
+<li><a href="#Objects"><span class="toc-section-number">6.1.3</span> Objects</a></li>
+</ul></li>
+<li><a href="#suffixArrays"><span class="toc-section-number">6.2</span> Suffix arrays</a><ul>
+<li><a href="#Why_do_we_want_to_do_this.3F"><span class="toc-section-number">6.2.1</span> Why do we want to do this?</a></li>
+<li><a href="#String_search_algorithms"><span class="toc-section-number">6.2.2</span> String search algorithms</a></li>
+<li><a href="#Suffix_trees_and_suffix_arrays"><span class="toc-section-number">6.2.3</span> Suffix trees and suffix arrays</a><ul>
+<li><a href="#Building_a_suffix_array"><span class="toc-section-number">6.2.3.1</span> Building a suffix array</a></li>
+<li><a href="#Searching_a_suffix_array"><span class="toc-section-number">6.2.3.2</span> Searching a suffix array</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#Burrows-Wheeler_transform"><span class="toc-section-number">6.3</span> Burrows-Wheeler transform</a><ul>
+<li><a href="#Suffix_arrays_and_the_Burrows-Wheeler_transform"><span class="toc-section-number">6.3.1</span> Suffix arrays and the Burrows-Wheeler transform</a></li>
+<li><a href="#sample-implementation"><span class="toc-section-number">6.3.2</span> Sample implementation</a></li>
+</ul></li>
+<li><a href="#cplusplus"><span class="toc-section-number">6.4</span> C++</a><ul>
+<li><a href="#Hello_world"><span class="toc-section-number">6.4.1</span> Hello world</a></li>
+<li><a href="#References"><span class="toc-section-number">6.4.2</span> References</a></li>
+<li><a href="#Function_overloading"><span class="toc-section-number">6.4.3</span> Function overloading</a></li>
+<li><a href="#Classes"><span class="toc-section-number">6.4.4</span> Classes</a></li>
+<li><a href="#Operator_overloading"><span class="toc-section-number">6.4.5</span> Operator overloading</a></li>
+<li><a href="#Templates"><span class="toc-section-number">6.4.6</span> Templates</a></li>
+<li><a href="#Exceptions"><span class="toc-section-number">6.4.7</span> Exceptions</a></li>
+<li><a href="#Storage_allocation"><span class="toc-section-number">6.4.8</span> Storage allocation</a><ul>
+<li><a href="#Storage_allocation_inside_objects"><span class="toc-section-number">6.4.8.1</span> Storage allocation inside objects</a></li>
+</ul></li>
+<li><a href="#Standard_library"><span class="toc-section-number">6.4.9</span> Standard library</a></li>
+<li><a href="#Things_we_haven.27t_talked_about"><span class="toc-section-number">6.4.10</span> Things we haven't talked about</a></li>
+</ul></li>
+<li><a href="#testingDuringDevelopment"><span class="toc-section-number">6.5</span> Testing during development</a><ul>
+<li><a href="#unitTests"><span class="toc-section-number">6.5.1</span> Unit tests</a><ul>
+<li><a href="#what-to-put-in-the-test-code"><span class="toc-section-number">6.5.1.1</span> What to put in the test code</a></li>
+<li><a href="#example"><span class="toc-section-number">6.5.1.2</span> Example</a></li>
+</ul></li>
+<li><a href="#test-harnesses"><span class="toc-section-number">6.5.2</span> Test harnesses</a><ul>
+<li><a href="#Module_interface"><span class="toc-section-number">6.5.2.1</span> Module interface</a><ul>
+<li><a href="#stack.h"><span class="toc-section-number">6.5.2.1.1</span> stack.h</a></li>
+</ul></li>
+<li><a href="#Test_code"><span class="toc-section-number">6.5.2.2</span> Test code</a><ul>
+<li><a href="#test-stack.c"><span class="toc-section-number">6.5.2.2.1</span> test-stack.c</a></li>
+</ul></li>
+<li><a href="#Makefile"><span class="toc-section-number">6.5.2.3</span> Makefile</a><ul>
+<li><a href="#Makefile-1"><span class="toc-section-number">6.5.2.3.1</span> Makefile</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#Stub_implementation"><span class="toc-section-number">6.5.3</span> Stub implementation</a><ul>
+<li><a href="#stack.c"><span class="toc-section-number">6.5.3.1</span> stack.c</a></li>
+</ul></li>
+<li><a href="#Bounded-space_implementation"><span class="toc-section-number">6.5.4</span> Bounded-space implementation</a><ul>
+<li><a href="#stack.c-1"><span class="toc-section-number">6.5.4.1</span> stack.c</a></li>
+</ul></li>
+<li><a href="#First_fix"><span class="toc-section-number">6.5.5</span> First fix</a></li>
+<li><a href="#Final_version"><span class="toc-section-number">6.5.6</span> Final version</a><ul>
+<li><a href="#stack.c-2"><span class="toc-section-number">6.5.6.1</span> stack.c</a></li>
+</ul></li>
+<li><a href="#Moral"><span class="toc-section-number">6.5.7</span> Moral</a></li>
+<li><a href="#Appendix:_Test_macros"><span class="toc-section-number">6.5.8</span> Appendix: Test macros</a></li>
+</ul></li>
+<li><a href="#algorithmDesignTechniques"><span class="toc-section-number">6.6</span> Algorithm design techniques</a><ul>
+<li><a href="#Basic_principles_of_algorithm_design"><span class="toc-section-number">6.6.1</span> Basic principles of algorithm design</a></li>
+<li><a href="#algorithmDesignTechniquesClassification"><span class="toc-section-number">6.6.2</span> Specific techniques</a></li>
+<li><a href="#Example:_Finding_the_maximum"><span class="toc-section-number">6.6.3</span> Example: Finding the maximum</a></li>
+<li><a href="#algorithmDesignSorting"><span class="toc-section-number">6.6.4</span> Example: Sorting</a></li>
+</ul></li>
+<li><a href="#bitManipulation"><span class="toc-section-number">6.7</span> Bit manipulation</a></li>
+<li><a href="#persistence"><span class="toc-section-number">6.8</span> Persistence</a><ul>
+<li><a href="#A_simple_solution_using_text_files"><span class="toc-section-number">6.8.1</span> A simple solution using text files</a></li>
+<li><a href="#Using_a_binary_file"><span class="toc-section-number">6.8.2</span> Using a binary file</a></li>
+<li><a href="#A_version_that_updates_the_file_in_place"><span class="toc-section-number">6.8.3</span> A version that updates the file in place</a></li>
+<li><a href="#An_even_better_version_using_mmap"><span class="toc-section-number">6.8.4</span> An even better version using mmap</a></li>
+<li><a href="#Concurrency_and_fault-tolerance_issues:_ACIDity"><span class="toc-section-number">6.8.5</span> Concurrency and fault-tolerance issues: ACIDity</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#whatNext"><span class="toc-section-number">7</span> What next?</a><ul>
+<li><a href="#What.27s_wrong_with_C"><span class="toc-section-number">7.1</span> What's wrong with C</a></li>
+<li><a href="#What_C.2B-.2B-_fixes"><span class="toc-section-number">7.2</span> What C++ fixes</a></li>
+<li><a href="#other-c-like-languages"><span class="toc-section-number">7.3</span> Other C-like languages</a></li>
+<li><a href="#Scripting_languages"><span class="toc-section-number">7.4</span> Scripting languages</a></li>
+</ul></li>
+<li><a href="#assignments"><span class="toc-section-number">8</span> Assignments</a><ul>
+<li><a href="#hw1"><span class="toc-section-number">8.1</span> Assignment 1, due Thursday 2015-01-29, at 11:00pm</a><ul>
+<li><a href="#bureaucratic-part"><span class="toc-section-number">8.1.1</span> Bureaucratic part</a></li>
+<li><a href="#a-rotten-cipher"><span class="toc-section-number">8.1.2</span> A rotten cipher</a></li>
+<li><a href="#your-task"><span class="toc-section-number">8.1.3</span> Your task</a></li>
+<li><a href="#hints"><span class="toc-section-number">8.1.4</span> Hints</a></li>
+<li><a href="#testing-your-assignment"><span class="toc-section-number">8.1.5</span> Testing your assignment</a></li>
+<li><a href="#submitting-your-assignment"><span class="toc-section-number">8.1.6</span> Submitting your assignment</a></li>
+<li><a href="#hw1Solution"><span class="toc-section-number">8.1.7</span> Sample solution</a></li>
+</ul></li>
+<li><a href="#hw2"><span class="toc-section-number">8.2</span> Assignment 2, due Wednesday 2015-02-04, at 11:00pm</a><ul>
+<li><a href="#opening-a-safe"><span class="toc-section-number">8.2.1</span> Opening a safe</a></li>
+<li><a href="#submitting-your-assignment-1"><span class="toc-section-number">8.2.2</span> Submitting your assignment</a></li>
+<li><a href="#hw2valgrind"><span class="toc-section-number">8.2.3</span> Valgrind</a></li>
+<li><a href="#hw2Solution"><span class="toc-section-number">8.2.4</span> Sample solution</a></li>
+</ul></li>
+<li><a href="#hw3"><span class="toc-section-number">8.3</span> Assignment 3, due Wednesday 2015-02-11, at 11:00pm</a><ul>
+<li><a href="#quadratic-letter-sequences"><span class="toc-section-number">8.3.1</span> Quadratic letter sequences</a></li>
+<li><a href="#your-task-1"><span class="toc-section-number">8.3.2</span> Your task</a></li>
+<li><a href="#submitting-your-assignment-2"><span class="toc-section-number">8.3.3</span> Submitting your assignment</a></li>
+<li><a href="#hw3Solution"><span class="toc-section-number">8.3.4</span> Sample solution</a></li>
+</ul></li>
+<li><a href="#hw4"><span class="toc-section-number">8.4</span> Assignment 4, due Wednesday 2015-02-18, at 11:00pm</a><ul>
+<li><a href="#an-ascii-art-compositor"><span class="toc-section-number">8.4.1</span> An ASCII art compositor</a></li>
+<li><a href="#submitting-your-assignment-3"><span class="toc-section-number">8.4.2</span> Submitting your assignment</a></li>
+<li><a href="#notes"><span class="toc-section-number">8.4.3</span> Notes</a><ul>
+<li><a href="#input"><span class="toc-section-number">8.4.3.1</span> Input</a></li>
+<li><a href="#output"><span class="toc-section-number">8.4.3.2</span> Output</a></li>
+<li><a href="#general"><span class="toc-section-number">8.4.3.3</span> General</a></li>
+</ul></li>
+<li><a href="#hw4Solution"><span class="toc-section-number">8.4.4</span> Sample solution</a></li>
+</ul></li>
+<li><a href="#hw5"><span class="toc-section-number">8.5</span> Assignment 5, due Wednesday 2015-02-25, at 11:00pm</a><ul>
+<li><a href="#build-a-turing-machine"><span class="toc-section-number">8.5.1</span> Build a Turing machine!</a></li>
+<li><a href="#example-1"><span class="toc-section-number">8.5.2</span> Example</a></li>
+<li><a href="#your-task-2"><span class="toc-section-number">8.5.3</span> Your task</a></li>
+<li><a href="#submitting-your-assignment-4"><span class="toc-section-number">8.5.4</span> Submitting your assignment</a></li>
+<li><a href="#hw5Solution"><span class="toc-section-number">8.5.5</span> Sample solution</a></li>
+</ul></li>
+<li><a href="#hw6"><span class="toc-section-number">8.6</span> Assignment 6, due Wednesday 2015-03-25, at 11:00pm</a><ul>
+<li><a href="#sinking-ships"><span class="toc-section-number">8.6.1</span> Sinking ships</a></li>
+<li><a href="#things-to-watch-out-for"><span class="toc-section-number">8.6.2</span> Things to watch out for</a></li>
+<li><a href="#the-testships-program"><span class="toc-section-number">8.6.3</span> The <code>testShips</code> program</a></li>
+<li><a href="#submitting-your-assignment-5"><span class="toc-section-number">8.6.4</span> Submitting your assignment</a></li>
+<li><a href="#hw6-2015-source-files"><span class="toc-section-number">8.6.5</span> Provided source files</a></li>
+<li><a href="#hw6Solution"><span class="toc-section-number">8.6.6</span> Sample solution</a></li>
+</ul></li>
+<li><a href="#hw7"><span class="toc-section-number">8.7</span> Assignment 7, due Wednesday 2015-04-01, at 11:00pm</a><ul>
+<li><a href="#solitaire-with-big-cards"><span class="toc-section-number">8.7.1</span> Solitaire with big cards</a></li>
+<li><a href="#explanation-of-the-testing-program"><span class="toc-section-number">8.7.2</span> Explanation of the testing program</a></li>
+<li><a href="#submitting-your-assignment-6"><span class="toc-section-number">8.7.3</span> Submitting your assignment</a></li>
+<li><a href="#hw7Solution"><span class="toc-section-number">8.7.4</span> Sample solution</a></li>
+</ul></li>
+<li><a href="#hw8"><span class="toc-section-number">8.8</span> Assignment 8, due Wednesday 2015-04-08, at 11:00pm</a><ul>
+<li><a href="#an-ordered-set"><span class="toc-section-number">8.8.1</span> An ordered set</a></li>
+<li><a href="#the-testorderedset-wrapper"><span class="toc-section-number">8.8.2</span> The <code>testOrderedSet</code> wrapper</a></li>
+<li><a href="#hw8submission"><span class="toc-section-number">8.8.3</span> Submitting your assignment</a></li>
+<li><a href="#hw8Solution"><span class="toc-section-number">8.8.4</span> Sample solution</a></li>
+</ul></li>
+<li><a href="#hw9"><span class="toc-section-number">8.9</span> Assignment 9, due Wednesday 2015-04-15, at 11:00pm</a><ul>
+<li><a href="#finding-a-cycle-in-a-maze"><span class="toc-section-number">8.9.1</span> Finding a cycle in a maze</a></li>
+<li><a href="#input-and-output-format"><span class="toc-section-number">8.9.2</span> Input and output format</a></li>
+<li><a href="#submitting-and-testing-your-program"><span class="toc-section-number">8.9.3</span> Submitting and testing your program</a></li>
+<li><a href="#hw9solution"><span class="toc-section-number">8.9.4</span> Sample solution</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#codingHints"><span class="toc-section-number">9</span> Common C coding and debugging issues</a></li>
+</ul>
+</div>
+<h1 id="courseAdministration"><span class="header-section-number">1</span> Course administration</h1>
+<h2 id="index"><span class="header-section-number">1.1</span> Overview</h2>
+<p>This is the course information for CPSC 223: <em>Data Structures and Programming Techniques</em> for the Spring 2015 semester. This document is available in two formats, both of which should contain the same information:</p>
+<ul>
+<li><a href="http://www.cs.yale.edu/homes/aspnes/classes/223/notes.html">HTML</a></li>
+<li><a href="http://www.cs.yale.edu/homes/aspnes/classes/223/notes.pdf">PDF</a></li>
+</ul>
+<p>Code examples can be downloaded from links in the text, or can be found in the <a href="http://www.cs.yale.edu/homes/aspnes/classes/223/examples/">examples directory</a>.</p>
+<p>The links above point to <code>www.cs.yale.edu</code>. In case this machine is down, a backup copy of these files can be found at <a href="https://www.dropbox.com/sh/omg9qcxkxeiam2o/AACRAJOTj8af6V7RC1cXBHjQa?dl=0" class="uri">https://www.dropbox.com/sh/omg9qcxkxeiam2o/AACRAJOTj8af6V7RC1cXBHjQa?dl=0</a>.</p>
+<p>This document is a work in progress, and is likely to change frequently as the semester progresses.</p>
+<h3 id="license"><span class="header-section-number">1.1.1</span> License</h3>
+<p>Copyright © 2002–2017 by James Aspnes. Distributed under a Creative Commons Attribution-ShareAlike 4.0 International license: <a href="https://creativecommons.org/licenses/by-sa/4.0/" class="uri">https://creativecommons.org/licenses/by-sa/4.0/</a>.</p>
+<h3 id="resources"><span class="header-section-number">1.1.2</span> Resources</h3>
+<ul>
+<li><a href="#schedule">Schedule</a>: list of lectures and events. Includes reading assignments and pointers to lecture notes.</li>
+<li><a href="http://www.cs.yale.edu/homes/aspnes/#calendar">Calendar</a>: shows office hours, lectures, and assignment deadlines.</li>
+<li><a href="#assignments">Assignments</a>: list of homeworks and exams.</li>
+<li><a href="http://cs.yale.edu/homes/aspnes/classes/223/notes.html">Notes</a>: notes on various topics relevant to the course.</li>
+<li><a href="#syllabus">Syllabus</a>.</li>
+<li><a href="https://piazza.com/yale/spring2015/cpsc223">Piazza</a>. This is a web-based question-and-answer system for communicating with course staff and other students.</li>
+<li><a href="http://www.cs.yale.edu/homes/aspnes/pinewiki/CS223.html">2012 web pages</a>: web pages for 2012 version of the course.</li>
+<li><a href="http://www.cs.usfca.edu/%7Egalles/visualization/Algorithms.html">Data structure visualizations</a>. Much better than the ASCII art (at best) illustrations you will find in this document.<a href="#fn1" class="footnoteRef" id="fnref1"><sup>1</sup></a></li>
+</ul>
+<h3 id="Documentation"><span class="header-section-number">1.1.3</span> Documentation</h3>
+<ul>
+<li><a href="#zoo">How to use the Zoo</a></li>
+<li><a href="http://zoo.cs.yale.edu/help/">Zoo help</a></li>
+<li><a href="#codingHints">Coding hints</a></li>
+<li>GNU <a href="http://www.delorie.com/gnu/docs/">Online Documentation</a>,
+<ul>
+<li><a href="http://www.delorie.com/gnu/docs/gcc/gcc_toc.html">gcc</a></li>
+<li><a href="http://www.delorie.com/gnu/docs/gdb/gdb_toc.html">gdb</a></li>
+<li><a href="http://www.gnu.org/software/ddd/manual/html_mono/ddd.html">ddd</a></li>
+<li><a href="http://www.delorie.com/gnu/docs/emacs/emacs_toc.html">emacs</a></li>
+</ul></li>
+<li>Frequently-asked questions (FAQ) for
+<ul>
+<li><a href="http://www.eskimo.com/%7Escs/C-faq.top.html">C</a></li>
+<li><a href="http://www.faqs.org/faqs/C-faq/abridged/">C (abridged)</a></li>
+<li><a href="http://www.faqs.org/faqs/unix-faq/faq/">Unix</a></li>
+<li><a href="http://www.faqs.org/faqs/GNU-Emacs-FAQ/">Emacs</a></li>
+</ul></li>
+<li><a href="http://www.lysator.liu.se/c/">Programming in C</a></li>
+<li><a href="http://valgrind.org/docs/">Valgrind documentation</a></li>
+<li><a href="http://www.mcsr.olemiss.edu/unixhelp/">UNIXhelp for Users</a></li>
+</ul>
+<h3 id="questions-and-comments"><span class="header-section-number">1.1.4</span> Questions and comments</h3>
+<p>Please feel free to send questions or comments on the class or anything connected to it to <a href="mailto:james.aspnes@gmail.com">james.aspnes@gmail.com</a>.</p>
+<p>For questions about individual assignments, you may be able to get a faster response using <a href="http://piazza.com/yale/spring2015/cpsc223">Piazza</a>.
+ Note that questions you ask there are visible to other students if not
+specifically marked private, so be careful about broadcasting your draft
+ solutions.</p>
+<h2 id="schedule"><span class="header-section-number">1.2</span> Lecture schedule</h2>
+<h3 id="topics-by-date"><span class="header-section-number">1.2.1</span> Topics by date</h3>
+<dl>
+<dt>2015-01-12</dt>
+<dd>Introduction. What the course is about. Getting started with C: running the compiler, the <code>main</code> function, integer data types, a few simple programs. Readings: <a href="#courseAdministration">Course administration</a>, <a href="#zoo">The Zoo and the Zoo Annex</a>, <a href="#linux">The Linux programming environment</a>, <a href="#CProgramStructure">Structure of a C program</a>, <a href="#basicIntegerTypes">Basic integer types</a>; Kernighan and Ritchie §§1.1 and 1.2.
+</dd>
+<dt>2015-01-14</dt>
+<dd>Arithmetic in C. Readings: <a href="#integerConstants">Integer constants</a>, <a href="#integerOperators">Integer operators</a>, <a href="#operatorPrecedence">Operator precedence</a>
+ (we didn't actually do the full operator precedence table in class, but
+ it's a nice summary of what operators are available, and if you don't
+like typing parentheses everywhere it's useful to have an idea of how
+precedence and associativity work); K&amp;R §§1.4, 2.2, 2.3, 2.5, 2.6,
+2.8, 2.9, and 2.12.
+</dd>
+<dt>2015-01-16</dt>
+<dd>Local variables and assignment operators. The <code>++</code> and <code>--</code> operators. More specialized operators: <code>,</code> and <code>?:</code>. I/O using <code>getchar</code> and <code>putchar</code>. Control structures: <code>if</code>, <code>switch</code>, <code>while</code>, <code>for</code>. Readings: <a href="#variables">Variables</a>, <a href="#statements">Statements</a> through <a href="#forLoop">The <code>for</code> loop</a>; K&amp;R §1.3, 1.5, 2.1, 2.4, and 3.1–3.6.
+</dd>
+<dt>2015-01-21</dt>
+<dd>Goto-like control structures: <code>break</code>, <code>continue</code>, <code>goto</code>, and <code>return</code>. Functions. Readings: Rest of [Statements]{#statements}, <a href="#functions">Functions</a>{#functions};
+ K&amp;R §3.7, 3.8, 4.1, and 4.5. Examples from lecture (with a bit of
+after-the-fact sprucing up) can be found in the <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-01-21/">examples directory</a>.
+</dd>
+<dt>2015-01-26</dt>
+<dd>Start of pointers and arrays: pointer types and pointer variables. The <code>&amp;</code> and <code>*</code> operators. Using a pointer to get a value out of a function. Array declarations. Preventing array modification with <code>const</code>. Storage allocation: <code>malloc</code>, <code>free</code>, and <code>realloc</code>. Readings: <a href="#pointers">Pointers</a> up through <a href="#arraysAndFunctions">Arrays and functions</a>; K&amp;R §5.1–5.4. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-01-26/">Examples</a>.
+</dd>
+<dt>2015-01-28</dt>
+<dd>More on pointers and arrays: Multi-dimensional arrays, C99
+variable-length arrays. Function pointers. Finding storage allocation
+bugs using <a href="#valgrind"><code>valgrind</code></a>. Readings: Rest of <a href="#pointers">Pointers</a>, <a href="#valgrind">Valgrind</a>; K&amp;R §§5.6–5.9, 5.11. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-01-28/">Examples from lecture</a>: original array-of-pointers-to-arrays implementation <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-01-28/array2dOriginal.c">array2dOriginal.c</a>, <code>valgrind</code>-approved version after removing <code>printArray2D</code> on uninitialized data <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-01-28/array2d.c">array2d.c</a>, buggy version demonstrating how <code>valgrind</code> complains about writing off the end of an array and doing multiple <code>free</code>s of the same block <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-01-28/array2dBad.c">array2dBad.c</a>.
+ An issue that came up in lecture is that all of these implementations
+are a little bit inefficient in that they allocate a separate block for
+each row, which means extra overhead for <code>malloc</code>s data and the possibility that the rows may be scattered throughout memory, causing issues with virtual memory paging. Here is <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-01-28/array2dPacked.c">yet another implementation</a> that gets space for both the array of row pointers and all rows with a single call to <code>malloc</code> and then does pointer arithmetic to slice the space up.<a href="#fn2" class="footnoteRef" id="fnref2"><sup>2</sup></a> There is also a <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/malloc2d.c">generic version</a> of the simple approach in the section on <a href="#multidimensionalArrays">multidimensional arrays</a>.
+</dd>
+<dt>2015-02-02</dt>
+<dd>Strings in C: null-terminated strings. What goes wrong if you forget to put on the null. Various library functions from <code>&lt;string.h&gt;</code> and how one might implement them. The perils of <code>gets</code> and bounded-size buffers, and how to use <code>malloc</code> and <code>realloc</code> to avoid them. Meaning of <code>argv</code>. Readings: <a href="#strings">Strings</a>; K&amp;R §§5.5, 5.10, B2. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-02/">Examples from lecture</a>.
+</dd>
+<dt>2015-02-04</dt>
+<dd>Structured data types: <code>struct</code>s, <code>union</code>s, and <code>enum</code>s. Type aliases using <code>typedef</code>. Opaque <code>struct</code> definitions and separating interface from implementation. Readings: <a href="#structuredDataTypes">Structured data types</a>, <a href="#typedef"><code>typedef</code></a>; K&amp;R Chapter 6, §2.5 (for <code>enum</code>s). <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-04/">Examples from lecture</a>, plus a <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-04/intArray/">working version of the intArray implementation</a> together with the <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-04/intArray/stubs/intArrayStub.c">originally stubby intArray.c</a> and the <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-04/intArray/stubs/intArrayPartial.c">partial version from the end of lecture</a>.
+</dd>
+<dt>2015-02-09</dt>
+<dd>More C stuff: Makefiles. Floating-point arithmetic and the math
+library. Timing code with a profiler. Basics of file I/O. Readings: <a href="#make">Make</a>, <a href="#floatingPointTypes">Floating point types</a>, <a href="#performanceTuning">Performance tuning</a>, <a href="#IO">Input and output</a>; K&amp;R §§2.7 and 7.5, Appendix B4. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-09/">Examples from lecture</a>.
+</dd>
+<dt>2015-02-11</dt>
+<dd>Start of data structures: efficiency of different data structures, linked lists. Readings: <a href="#asymptoticNotation">Asymptotic notation</a>, <a href="#linkedLists">Linked lists</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-11/">Examples from lecture</a> including improved <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-11/stack.c">stack.c</a> and working <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-11/queue.c">queue.c</a>.
+</dd>
+<dt>2015-02-16</dt>
+<dd>Invariants and representation functions for abstract data types. The
+ deque type, with two implementations: a circular doubly-linked list and
+ a ring buffer. Readings: <a href="#abstractDataTypes">Abstract data types</a>, <a href="#deques">deques</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-16/">Examples from lecture</a>.
+</dd>
+<dt>2015-02-18</dt>
+<dd>Hash Wednesday: set and map data types, hash tables. Readings: <a href="#hashTables">Hash tables</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-18/">Examples from lecture</a>.
+</dd>
+<dt>2015-02-23</dt>
+<dd>Various C preprocessor tricks: macros with arguments, string
+processing in macros, non-syntactic macros, conditional compilation.
+Readings: <a href="#macros">Macros</a>; K&amp;R Appendix A12.3. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-23/">Examples from lecture</a>.
+</dd>
+<dt>2015-02-25</dt>
+<dd>Polymorphism in C: generic containers and object-oriented
+programming (of a sort) using structs full of function pointers. Example
+ of using <code>git</code> for version control (you will not be tested on version control). Readings: <a href="#genericContainers">Generic containers</a>, <a href="#versionControl">version control</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-02-23/">Examples from lecture</a>. Unlike the actual example from lecture, this version really works, instead of just inserting 0 over and over again.
+</dd>
+<dt>2015-03-02</dt>
+<dd>Recursion. Readings: <a href="#recursion">Recursion</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-03-02">Examples from lecture</a>.
+</dd>
+<dt>2015-03-04</dt>
+<dd><strong>Exam 1</strong>. This took place at the usual class time
+(1:00–2:15), and was a closed-book test potentially covering all
+material discussed in lecture prior to the exam. Sample exams from
+previous years: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2005/exam1.pdf">2005</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2012/exam1.pdf">2012</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/exams/exam1.pdf">Sample solutions</a>.
+</dd>
+<dt>2015-03-23</dt>
+<dd>Binary trees and heaps. Readings: <a href="#binaryTrees">Binary trees</a>, <a href="#heaps">Heaps</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-03-23">Examples from lecture</a>.
+</dd>
+<dt>2015-03-25</dt>
+<dd>Binary search tree implementation basics: insertion, search, deletion, various kinds of traversals. Readings: <a href="#binarySearchTrees">Binary search trees</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-03-25">Examples from lecture</a>.
+</dd>
+<dt>2015-03-30</dt>
+<dd>Balanced binary search trees: augmented trees and AVL trees. We also saw a little bit about <a href="#redBlackTrees">red-black trees</a>, but not enough to actually be useful. Readings: <a href="#augmentedTrees">Augmented trees</a>, <a href="#AVLtrees">AVL trees</a>. Example from lecture: see <a href="#avlTreeImplementation">AVL tree implementation</a>.
+</dd>
+<dt>2015-04-01</dt>
+<dd>Self-adjusting binary search trees: splay trees, a little bit about scapegoat trees. Readings: <a href="#splayTrees">Splay trees</a>. There was no new code in lecture but we did spend a while looking at a pre-prepared <a href="#splayTreeImplementation">splay tree implementation</a>.
+</dd>
+<dt>2015-04-06</dt>
+<dd>Graphs: structure of a graph, graph representations, basic ideas of graph search. Readings: <a href="#graphs">Graphs</a> up to start of <a href="#graphSearch">graph search</a>.
+</dd>
+<dt>2015-04-08</dt>
+<dd>More graphs: depth-first and breadth-first search. Minimum spanning trees and shortest paths. Readings: Rest of <a href="#graphs">graphs</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-04-08/">BFS example from lecture</a>. The program I was using in lecture to make nice pictures of graphs was <code>dot</code>, specifically <code>dot -Tpng</code>. Thanks to the good folks at ITS, this is now installed in the Zoo along with the rest of the <a href="http://graphviz.org/">GraphViz</a> tools.
+</dd>
+<dt>2015-04-13</dt>
+<dd>Dynamic programming: all-pairs shortest paths, longest increasing subsequence, longest common subsequence. Readings: <a href="#dynamicProgramming">Dynamic programming</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-04-13">Examples from lecture</a>.
+</dd>
+<dt>2015-04-15</dt>
+<dd>Randomized data structures. Readings: <a href="#randomization">Randomization</a>. The <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/lecture/2015-04-15/devurandom.c">devurandom.c</a> example from lecture.
+</dd>
+<dt>2015-04-20</dt>
+<dd>Data structures for strings: tries, TSTs, and variants; radix sort. Readings: <a href="#stringProcessing">String processing</a>.
+</dd>
+<dt>2015-04-22</dt>
+<dd><strong>Exam 2</strong>. This took place at the usual class time
+(1:00–2:15), and was a closed-book test potential covering all material
+discussed in lecture during the semester. Sample exams from previous
+years: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2005/exam2.pdf">2005</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2012/exam2.pdf">2012</a>. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/exams/exam2.pdf">Sample solutions</a>.
+</dd>
+</dl>
+<h3 id="topics-not-covered-in-2015"><span class="header-section-number">1.2.2</span> Topics not covered in 2015</h3>
+<p>Here are some topics that were not covered specifically this semester but can be found in the notes.</p>
+<ul>
+<li><a href="#iterators">Iterators</a></li>
+<li><a href="#suffixArrays">Suffix arrays</a></li>
+<li><a href="#testingDuringDevelopment">Testing during development</a></li>
+<li><a href="#algorithmDesignTechniques">Algorithm design techniques</a></li>
+<li><a href="#bitManipulation">Bit manipulation</a></li>
+<li><a href="#persistence">Persistence</a></li>
+<li><a href="#cplusplus">C++</a></li>
+<li><a href="#whatNext">What next</a></li>
+</ul>
+<h2 id="syllabus"><span class="header-section-number">1.3</span> Syllabus</h2>
+<p>Syllabus for Computer Science 223b, Data Structures and Programming Techniques. Instructor: James Aspnes.</p>
+<h3 id="On-line_course_information"><span class="header-section-number">1.3.1</span> On-line course information</h3>
+<p>On-line information about the course, including the lecture schedule,
+ lecture notes, and information about assignments, can be found at <a href="http://www.cs.yale.edu/homes/aspnes/classes/223/notes.html" class="uri">http://www.cs.yale.edu/homes/aspnes/classes/223/notes.html</a>. This document will be updated frequently during the semester, and is also available in <a href="http://www.cs.yale.edu/homes/aspnes/classes/223/notes.pdf">PDF</a> format.</p>
+<h3 id="Meeting_times"><span class="header-section-number">1.3.2</span> Meeting times</h3>
+<p>Lectures are MW 13:00–14:15 in WLH 201 (Sudler Hall). The <a href="#schedule">lecture schedule</a> can be found in the course notes. A <a href="http://www.cs.yale.edu/homes/aspnes/#calendar">calendar</a> is also available.</p>
+<h3 id="Synopsis_of_the_course"><span class="header-section-number">1.3.3</span> Synopsis of the course</h3>
+<p>Topics include programming in C; data structures (arrays, stacks,
+queues, lists, trees, heaps, graphs); sorting and searching; storage
+allocation and management; data abstraction; programming style; testing
+and debugging; writing efficient programs.</p>
+<h3 id="Prerequisites"><span class="header-section-number">1.3.4</span> Prerequisites</h3>
+<p>CPSC 201, or equivalent background. See me if you aren't sure.</p>
+<h3 id="Textbook"><span class="header-section-number">1.3.5</span> Textbook</h3>
+<p>The textbook for this course is:</p>
+<ul>
+<li><em>The C Programming Language (2nd Edition),</em> by Brian W.
+Kernighan and Dennis M. Ritchie. Prentice Hall, 1988. ISBN 0131103628.
+The definitive introduction to C. You should memorize this book.</li>
+</ul>
+<p>If you are on the Yale campus or are using VPN to get to Yale's network, you can access this book at <a href="http://proquest.safaribooksonline.com/book/programming/c/9780133086249" class="uri">http://proquest.safaribooksonline.com/book/programming/c/9780133086249</a>. You do not need to buy a physical copy of this book unless you want to.</p>
+<h3 id="Course_requirements"><span class="header-section-number">1.3.6</span> Course requirements</h3>
+<p>Nine weekly homework assignments, and two in-class exams. Assignments
+ will be weighted equally in computing the final grade. Each exam will
+count as three assignments.</p>
+<h3 id="staff"><span class="header-section-number">1.3.7</span> Staff</h3>
+<p>See the <a href="http://www.cs.yale.edu/homes/aspnes/#calendar">calendar</a> for open office hours.</p>
+<h4 id="instructor"><span class="header-section-number">1.3.7.1</span> Instructor</h4>
+<p>James Aspnes (<a href="mailto:james.aspnes@gmail.com">james.aspnes@gmail.com</a>, <a href="http://www.cs.yale.edu/homes/aspnes/" class="uri">http://www.cs.yale.edu/homes/aspnes/</a>). Office: AKW 401. If my open office hours don't work for you, please send email to make an appointment.</p>
+<h4 id="teaching-fellows"><span class="header-section-number">1.3.7.2</span> Teaching Fellows</h4>
+<ul>
+<li>Yujia Hu <a href="mailto:yujia.hu@yale.edu">yujia.hu@yale.edu</a></li>
+<li>Joshua Lockerman <a href="mailto:joshua.lockerman@yale.edu">joshua.lockerman@yale.edu</a></li>
+<li>Junaid Nomani <a href="mailto:junaid.nomani@yale.edu">junaid.nomani@yale.edu</a></li>
+</ul>
+<h4 id="peer-tutors"><span class="header-section-number">1.3.7.3</span> Peer tutors</h4>
+<ul>
+<li>Iulia Tamas <a href="mailto:iulia.tamas@yale.edu">iulia.tamas@yale.edu</a></li>
+<li>Dylan Visher <a href="mailto:dylan.visher@yale.edu">dylan.visher@yale.edu</a></li>
+<li>Lining Wang <a href="mailto:lining.wang@yale.edu">lining.wang@yale.edu</a></li>
+<li>Blake Woodworth <a href="mailto:blake.woodworth@yale.edu">blake.woodworth@yale.edu</a></li>
+</ul>
+<h3 id="Use_of_outside_help"><span class="header-section-number">1.3.8</span> Use of outside help</h3>
+<p>Students are free to discuss homework problems and course material
+with each other, and to consult with the instructor or a TA. Solutions
+handed in, however, should be the student's own work. If a student
+benefits substantially from hints or solutions received from fellow
+students or from outside sources, then the student should hand in their
+solution but acknowledge the outside sources, and we will apportion
+credit accordingly. Using outside resources in solving a problem is
+acceptable but plagiarism is not.</p>
+<h3 id="Clarifications_for_homework_assignments"><span class="header-section-number">1.3.9</span> Clarifications for homework assignments</h3>
+<p>From time to time, ambiguities and errors may creep into homework
+assignments. Questions about the interpretation of homework assignments
+should be sent to the instructor at <a href="mailto:james.aspnes@gmail.com">james.aspnes@gmail.com</a>. Clarifications will appear in the on-line version of the assignment.</p>
+<h3 id="Late_assignments"><span class="header-section-number">1.3.10</span> Late assignments</h3>
+<p>Assignments submitted after the deadline without a Dean's Excuse are automatically assessed a 2%/hour penalty.</p>
+<h2 id="introduction"><span class="header-section-number">1.4</span> Introduction</h2>
+<p>There are two purposes to this course: to teach you to program in the
+ C programming language, and to teach you how to choose, implement, and
+use data structures and standard programming techniques.</p>
+<h3 id="whyC"><span class="header-section-number">1.4.1</span> Why should you learn to program in C?</h3>
+<ul>
+<li>It is the <em>de facto</em> substandard of programming languages.
+<ul>
+<li>C runs on everything.</li>
+<li>C lets you write programs that use very few resources.</li>
+<li>C gives you near-total control over the system, down to the level of pushing around individual bits with your bare hands.</li>
+<li>C imposes very few constraints on programming style: unlike
+higher-level languages, C doesn't have much of an ideology. There are
+very few programs you can't write in C.</li>
+<li>Many of the programming languages people actually use (Visual Basic,
+ perl, python, ruby, PHP, etc.) are executed by interpreters written in C
+ (or <a href="#cplusplus">C++</a>, an extension to C).</li>
+</ul></li>
+<li>You will learn discipline.
+<ul>
+<li>C makes it easy to shoot yourself in the foot.</li>
+<li>You can learn to avoid this by being careful about where you point it.</li>
+<li>Pain is a powerful teacher of caution.</li>
+</ul></li>
+<li>You will fail CS323 if you don't learn C really well in CS223 (CS majors only).</li>
+</ul>
+<p>On the other hand, there are many reasons why you might not want to
+use C later in life. It's missing a lot of features of modern program
+languages, including:</p>
+<ul>
+<li>A garbage collector.</li>
+<li>Minimal programmer-protection features like array bounds-checking or a strong type system.</li>
+<li>Non-trivial built-in data structures.</li>
+<li>Language support for exceptions, namespaces, object-oriented programming, etc.</li>
+</ul>
+<p>For most problems where minimizing programmer time and maximizing
+robustness are more important than minimizing runtime, other languages
+are a better choice. But for this class, we'll be using C.</p>
+<p>If you want to read a lot of flaming about what C is or is not good for, see <a href="http://c2.com/cgi/wiki?CeeLanguage" class="uri">http://c2.com/cgi/wiki?CeeLanguage</a>.</p>
+<h3 id="why-should-you-learn-about-data-structures-and-programming-techniques"><span class="header-section-number">1.4.2</span> Why should you learn about data structures and programming techniques?</h3>
+<p>For small programs, you don't need much in the way of data
+structures. But as soon as you are representing reasonably complicated
+data, you need some place to store it. Thinking about how you want to
+store and organize this data can be a good framework for organizing the
+rest of your program.</p>
+<p>Many programming environments will give you a rich collection of
+built-in data structures as part of their standard library. C does not:
+unless you use third-party libraries, any data structure you want in C
+you will have to build yourself. For most data structures this will
+require an understanding of pointers and storage allocation, mechanisms
+often hidden in other languages. Understanding these concepts will give
+you a deeper understanding of how computers actually work, and will both
+ let you function in minimalist environments where you don't have a lot
+of support and let you understand what more convenient environments are
+doing under their abstraction barriers.</p>
+<p>The same applies to the various programming techniques we will
+discuss in this class. While some of the issues that come up are
+specific to C and similar low-level languages (particular issues
+involving disciplined management of storage), some techniques will apply
+ no matter what kinds of programs you are writing and all will help in
+understanding what your computer systems are doing even if some of the
+details are hidden.</p>
+<h1 id="zoo"><span class="header-section-number">2</span> The Zoo and the Zoo Annex</h1>
+<p>The main undergraduate computing facility for Computer Science is the
+ Zoo, located on the third floor of AKW. The Zoo contains a large number
+ of Linux workstations.</p>
+<p>You don't need to do your work for this class in the Zoo, but that is
+ where your assignments will be submitted and tested, so if you do
+development elsewhere, you will need to copy your files over and make
+sure that they work there as well.</p>
+<p>The "Zoo Annex" is the informal name for 17HH room 111, which is
+reserved for CS student use from 19:00 to 23:59 Sundays through
+Thursdays. The machines in 17HH 111 run Windows, but once logged in, you
+ can create a Linux desktop remotely from a Zoo machine using a program
+called <a href="#fastX">FastX</a>. You can also download and use FastX
+from your own machine to get access to the full Zoo environment if you
+are running Windows or OSX.</p>
+<p>The best place for information about the Zoo is at <a href="http://zoo.cs.yale.edu/" class="uri">http://zoo.cs.yale.edu/</a>. Below are some points that are of particular relevance for CS223 students.</p>
+<h2 id="Getting_an_account"><span class="header-section-number">2.1</span> Getting an account</h2>
+<p>To get an account in the Zoo, follow the instructions at <a href="http://zoo.cs.yale.edu/accounts.html" class="uri">http://zoo.cs.yale.edu/accounts.html</a>. You will need your NetID and password to sign up for an account.</p>
+<p>Even if you already have an account, you still need to use this form
+to register as a CS 223 student, or you will not be able to submit
+assignments.</p>
+<h2 id="Getting_into_the_room"><span class="header-section-number">2.2</span> Getting into the room</h2>
+<p>The Zoo is located on the third floor of Arthur K Watson Hall, toward
+ the front of the building. If you are a Yale student, your ID should
+get you into the building and the room. If you are not a student, you
+will need to get your ID validated in AKW 008a to get in after hours.</p>
+<h2 id="Remote_use"><span class="header-section-number">2.3</span> Remote use</h2>
+<p>There are several options for remote use of the Zoo. <a href="#fastX">FastX</a> is the most straightforward if you want to replicate the Zoo experience elsewhere. I personally tend to connect using <a href="#zooSSH">ssh</a>.</p>
+<h3 id="fastX"><span class="header-section-number">2.3.1</span> Access using FastX</h3>
+<p>These instructions are adapted from Stan Eisenstat's documentation for CS323.</p>
+<p>To use FastX, you will need an installation key. If you are
+downloading the Windows or OSX version from the Yale Software Library,
+this will appear on the download page. Otherwise, see the instructions
+in the following section.</p>
+<h4 id="fastXLicense"><span class="header-section-number">2.3.1.1</span> Getting a license key</h4>
+<p>In order to use FastX, you will need a license key.</p>
+<ol style="list-style-type: decimal">
+<li>Go to the <a href="http://software.yale.edu/Library/Windows/DT-Starnet-FastX-W">Yale Software Library page for FastX (Windows version)</a>.</li>
+<li>Log in with your NetID and password if required.</li>
+<li>Copy the installation key that appears below the "Download Now" button.</li>
+</ol>
+<h4 id="fastXZooAnnex"><span class="header-section-number">2.3.1.2</span> FastX in the Zoo Annex</h4>
+<p>Using FastX you can turn a window on a Windows machine in the Zoo
+Annex (aka, the 17 Hillhouse, Room 111, cluster) into the same Linux
+desktop that you see when you log into a Zoo node.</p>
+<p>Do the following:</p>
+<ol style="list-style-type: decimal">
+<li>If you are not logged in:
+<ul>
+<li>Press CTRL + ALT + DELETE. A new screen with a Usage Agreement will appear.</li>
+<li>Click "OK". A new screen with a NetID and Password box will appear.</li>
+<li>Enter your NetID and Password and click "Continue". Windows will log you in.</li>
+</ul></li>
+<li>Click on the Windows icon in the lower left-hand corner of the screen. A new box will appear.</li>
+<li>Mouse over the "Search programs and files" box in the new window and
+ type "fastx" (but do not hit return). A list of "Programs" will appear.</li>
+<li>Click on "FastX" under "Programs" to launch FastX. (If a "Licensing" window appears asking for an activation key, enter <a href="#fastXLicense">the installation key from the download page</a>, click "OK", and click "Close" to dismiss the new window that appears.)</li>
+<li>If there is no entry named "Zoo" in the FastX window:
+<ul>
+<li>Click on the green + sign in the upper left-hand corner. A new window will appear.</li>
+<li>Enter "Zoo" in the Name field.</li>
+<li>Enter "node.zoo.cs.yale.edu" in the Host field.</li>
+<li>Do not change the Port or User field.</li>
+<li>Click "Save" to save the configuration.</li>
+</ul></li>
+<li>Double click on the "Zoo" entry. A "Login" box should appear. If a "Password" box appears instead:
+<ul>
+<li>Click "Cancel".</li>
+<li>Click on the green pencil in the upper left-hand corner of the FastX window.</li>
+<li>Delete the contents of the User field. A grayed out "Joe" will appear.</li>
+<li>Click "Save" to save the configuration.</li>
+<li>Double click on the "Zoo" entry again. A "Login" box will appear.</li>
+</ul></li>
+<li>Enter your Yale netID in the "Login" box (it should already be
+there) and click "Continue". (If a warning appears asking whether you
+want to accept a new key, click on "Accept" to dismiss it.) A "Password"
+ box will appear.</li>
+<li>Enter your password in the "Password" box and click on "Continue". A
+ new "Start new session" entry will appear in the right-hand side of the
+ FastX window.</li>
+<li>Click on "Start new session" to produce a pulldown menu and click on
+ either "Gnome" or "KDE" to choose a window manager (Gnome is the
+default in the Zoo) or "Xterm" to open a terminal window.<br>
+<strong>WARNING:</strong> If you click on "Gnome" but the desktop that
+appears does not have the usual "Applications" and "Places" in the upper
+ left-hand corner, then:
+<ol style="list-style-type: decimal">
+<li>Type ALT-F2 (that is, press the ALT and F2 keys simultaneously) to open a command field.</li>
+<li>Enter the command "gnome-shell --mode=classic -r" (without the surrounding quotes) and press the "Enter" key.</li>
+</ol></li>
+<li>A new window with a Zoo Linux desktop should open. You may resize it
+ by dragging on the lower left-hand corner if you prefer a different
+size.</li>
+<li>When you log out of the Linux desktop, close the "FastX" window by clicking the red X in the upper right-hand corner.</li>
+<li>Do not forget to log out of Windows when you are done.</li>
+</ol>
+<h4 id="fastXWindows"><span class="header-section-number">2.3.1.3</span> Using FastX from Windows</h4>
+<p>Using FastX you can turn a window on Windows into the same Linux desktop that you see when you log into a Zoo node.</p>
+<p>To install the software on your own Windows machine:</p>
+<ol style="list-style-type: decimal">
+<li>Go to the <a href="http://software.yale.edu/">Yale Software Library</a> and click on "Windows".</li>
+<li>Scroll through the list and click on "FastX".</li>
+<li>Copy the installation key that appears below the "Download Now" button.</li>
+<li>Click on the "Download Now" button and follow the instructions for installing the software.</li>
+<li>Launch FastX.</li>
+<li>Click on the green + sign in the upper left-hand corner of the FastX window. In the new window that appears:
+<ul>
+<li>Enter "Zoo" in the Name field.</li>
+<li>Enter "node.zoo.cs.yale.edu" in the Host field.</li>
+<li>Do not change the Port field.</li>
+<li>Enter your Yale netID in the User field.</li>
+<li>Click "Save" to save the configuration.</li>
+</ul></li>
+<li>Double click on the "Zoo" entry.</li>
+<li>When the "Licensing" window appears asking for an activation key, enter <a href="#fastXLicense">the number copied above</a> and click "OK". A new window will appear. Click "Close" to dismiss it.</li>
+<li>Quit FastX.</li>
+</ol>
+<p>If you run into difficulties up to this point, seek help from a student tech or one of the instructional staff.</p>
+<p>Once you have installed the software, do the following to run FastX:</p>
+<ol style="list-style-type: decimal">
+<li>Launch FastX.</li>
+<li>Double click on the "Zoo" entry. A "Password" box will appear.</li>
+<li>Enter the password for your Yale netID and click "OK". A "Start a new session" entry will appear in the FastX window.</li>
+<li>Click on "Start new session" to produce a pulldown menu and click on
+ either "Gnome" or "KDE" to choose a window manager (Gnome is the
+default in the Zoo) or "Xterm" to open a terminal window.<br>
+<strong>WARNING:</strong> If you click on "Gnome" but the desktop that
+appears does not have the usual "Applications" and "Places" in the upper
+ left-hand corner, then:
+<ol style="list-style-type: decimal">
+<li>Type ALT-F2 (that is, press the ALT and F2 keys simultaneously) to open a command field.</li>
+<li>Enter the command "gnome-shell --mode=classic -r" (without the surrounding quotes) and press the "Enter" key.</li>
+</ol></li>
+<li>A new window with a Zoo Linux desktop should open. You may resize it
+ by dragging on the lower left-hand corner if you prefer a different
+size.</li>
+<li>When you log out of the Linux desktop, close the "FastX" window by clicking the red X in the upper right-hand corner.</li>
+</ol>
+<h4 id="fastXOSX"><span class="header-section-number">2.3.1.4</span> Using FastX from OSX</h4>
+<p>Using FastX you can turn a window on a Mac into the same Linux desktop that you see when you log into a Zoo node.</p>
+<p>To install the software on your own Mac:</p>
+<ol style="list-style-type: decimal">
+<li>Go to the <a href="http://software.yale.edu/">Yale Software Library</a> and click on "Mac".</li>
+<li>Scroll through the list and click on "FastX".</li>
+<li>Copy the installation key that appears below the "Download Now" button.</li>
+<li>Click on the "Download Now" button and save the downloaded file as "fastx_1016.dmg" (for version 1.0.16).</li>
+<li>Click on the downloaded file "fastx_1016.dmg" to open the disk
+image. A "FastX" disk icon will appear on your desktop, and a folder
+containing a "FastX" icon will appear.</li>
+<li>Drag the "FastX" icon to the folder where you would like to save it, and launch FastX by clicking on it.</li>
+<li>Click on the green + sign in the upper left-hand corner of the FastX window. In the new window that appears:
+<ul>
+<li>Enter "Zoo" in the Name field.</li>
+<li>Enter "node.zoo.cs.yale.edu" in the Host field.</li>
+<li>Do not change the Port field.</li>
+<li>Enter your Yale netID in the User field.</li>
+<li>Click "Save" to save the configuration.</li>
+</ul></li>
+<li>Double click on the "Zoo" entry.</li>
+<li>When the "Licensing" window appears asking for an activation key, enter <a href="#fastXLicense">the number copied above</a> and click "OK". A new window will appear. Click "Close" to dismiss it.</li>
+<li>Quit FastX.</li>
+<li>Drag the "FastX" disk icon and then the "fastx_1016.dmg" icon to the Trash to clean up.</li>
+</ol>
+<p>If you run into difficulties up to this point, seek help from a student tech or one of the instructional staff.</p>
+<p>Once you have installed the software, do the following to run FastX:</p>
+<ol style="list-style-type: decimal">
+<li>Launch FastX from the folder where you saved it.</li>
+<li>Double click on "Zoo". A "Password" box will appear, possibly behind
+ the FastX window. (If it does not, click on the "FastX" icon that is
+flashing in your dock.)</li>
+<li>Enter the password for your Yale netID and click "OK". A "Start a new session" entry will appear in the FastX window.</li>
+<li>Click on "Start new session" to produce a pulldown menu and click on
+ either "Gnome" or "KDE" to choose a window manager (Gnome is the
+default in the Zoo) or "Xterm" to open a terminal window.<br>
+<strong>WARNING:</strong> If you click on "Gnome" but the desktop that
+appears does not have the usual "Applications" and "Places" in the upper
+ left-hand corner, then:
+<ol style="list-style-type: decimal">
+<li>Type ALT-F2 (that is, press the ALT and F2 keys simultaneously) to open a command field.</li>
+<li>Enter the command "gnome-shell --mode=classic -r" (without the surrounding quotes) and press the "Enter" key.</li>
+</ol></li>
+<li>A new window with a Zoo Linux desktop should open behind the FastX
+window. You may resize it by dragging on the lower left-hand corner if
+you prefer a different size.</li>
+<li>When you log out of the Linux desktop, close the "FastX" window by clicking the red dot in the upper left-hand corner.</li>
+</ol>
+<h3 id="zooSSH"><span class="header-section-number">2.3.2</span> Terminal access</h3>
+<pre><code>Date: Mon, 13 Dec 2004 14:34:19 -0500 (EST)
+From: Jim Faulkner &lt;james.faulkner@yale.edu&gt;
+Subject: Accessing the Zoo
+
+Hello all,
+
+I've been asked to write up a quick guide on how to access the Linux
+computers in the Zoo. For those who need this information, please read
+on.
+
+There are 2 ways of accessing the Zoo nodes, by walking up to one and
+logging in on the console (the computers are located on the 3rd floor of
+AKW), or by connecting remotely via SSH. Telnet access is not allowed.
+SSH clients for various operating systems are available here:
+
+http://www.yale.edu/software/
+
+Mac OSX comes with an SSH client by default. A good choice for an SSH
+client if you run Microsoft Windows is PuTTY:
+
+http://www.chiark.greenend.org.uk/~sgtatham/putty/
+
+With the exception of a few legacy accounts, the Zoo uses your campus-wide
+NetID and password for login access. However, you must sign up for a Zoo
+account before access is allowed. To sign up for a Zoo account, go to
+this web page:
+
+http://zoo.cs.yale.edu/accounts.html
+
+Then login with your campus-wide NetID and password. You may choose a
+different shell, or set up your account to be enrolled in a class if that
+is appropriate for you, but neither is necessary. Just click "Submit".
+Within an hour, your Zoo account will be created, and you will receive
+more information via e-mail about how to access the Zoo.
+
+Users cannot log into zoo.cs.yale.edu (the central file server) directly,
+they must log into one of the Zoo nodes. Following is the list of Zoo
+nodes:
+
+aphid.zoo.cs.yale.edu lion.zoo.cs.yale.edu
+bumblebee.zoo.cs.yale.edu macaw.zoo.cs.yale.edu
+cardinal.zoo.cs.yale.edu monkey.zoo.cs.yale.edu
+chameleon.zoo.cs.yale.edu newt.zoo.cs.yale.edu
+cicada.zoo.cs.yale.edu peacock.zoo.cs.yale.edu
+cobra.zoo.cs.yale.edu perch.zoo.cs.yale.edu
+cricket.zoo.cs.yale.edu python.zoo.cs.yale.edu
+frog.zoo.cs.yale.edu rattlesnake.zoo.cs.yale.edu
+gator.zoo.cs.yale.edu rhino.zoo.cs.yale.edu
+giraffe.zoo.cs.yale.edu scorpion.zoo.cs.yale.edu
+grizzly.zoo.cs.yale.edu swan.zoo.cs.yale.edu
+hare.zoo.cs.yale.edu termite.zoo.cs.yale.edu
+hippo.zoo.cs.yale.edu tick.zoo.cs.yale.edu
+hornet.zoo.cs.yale.edu tiger.zoo.cs.yale.edu
+jaguar.zoo.cs.yale.edu tucan.zoo.cs.yale.edu
+koala.zoo.cs.yale.edu turtle.zoo.cs.yale.edu
+ladybug.zoo.cs.yale.edu viper.zoo.cs.yale.edu
+leopard.zoo.cs.yale.edu zebra.zoo.cs.yale.edu
+
+If you have already created an account, you can SSH directly to one of
+the above computers and log in with your campus-wide NetID and
+password. You can also SSH to node.zoo.cs.yale.edu, which will connect
+you to a random Zoo node.
+
+Feel free to contact me if you have any questions about the Zoo.
+
+thanks,
+Jim Faulkner
+Zoo Systems Administrator</code></pre>
+<h3 id="GUI_access"><span class="header-section-number">2.3.3</span> GUI access</h3>
+<p>(These notes from Debayan Gupta.)</p>
+<p>For Mac or Linux users, typing "ssh -X netID@node.zoo.cs.yale.edu"
+into a terminal and then running "nautilus" will produce an X window
+interface.</p>
+<p>When on Windows, I usually use XMing (I've included a step-by-step guide at the end of this mail).</p>
+<p>For transferring files, I use CoreFTP (<a href="http://www.coreftp.com/" class="uri">http://www.coreftp.com</a>). FileZilla (<a href="https://filezilla-project.org/" class="uri">https://filezilla-project.org/</a>) is another option.</p>
+<p>Step-by-step guide to XMIng:</p>
+<p>You can download Xming from here: <a href="http://sourceforge.net/projects/xming/" class="uri">http://sourceforge.net/projects/xming/</a></p>
+<p>Download and install. Do NOT launch Xming at the end of your installation.</p>
+<p>Once you've installed Xming, go to your start menu and find XLaunch (it should be in the same folder as Xming).</p>
+<ol style="list-style-type: decimal">
+<li><p>Start XLaunch, and select "Multiple Windows". Leave "Display Number" as its default value. Click next.</p></li>
+<li><p>Select "Start a program". Click next.</p></li>
+<li><p>Type "nautilus" (or "terminal", if you want a terminal) into the "Start Program" text area. Select "Using PuTTY (plink.exe)".</p></li>
+<li><p>Type in the name of the computer (use "node.zoo.cs.yale.edu") in the "Connect to computer" text box.</p></li>
+<li><p>Type in your netID in the "Login as user" text box (you can leave the password blank). Click next.</p></li>
+<li><p>Make sure "Clipboard" is ticked. Leave everything else blank. Click next.</p></li>
+<li><p>Click "Save Configuration". When saving, make sure your filename
+ends with ".xlaunch" - this will let you connect with a click (you won't
+ need to do all this every time you connect).</p></li>
+<li><p>Click Finish.</p></li>
+<li><p>You will be prompted for your password - enter it. Ignore any security warnings.</p></li>
+<li><p>You now have a remote connection to the Zoo.</p></li>
+</ol>
+<p>For more options and information, you can go to: <a href="http://www.straightrunning.com/XmingNotes/" class="uri">http://www.straightrunning.com/XmingNotes/</a></p>
+<h2 id="compiling"><span class="header-section-number">2.4</span> How to compile and run programs</h2>
+<p>See the chapter on <a href="#zoo">how to use the Zoo</a> for details of particular commands. The basic steps are</p>
+<ul>
+<li>Creating the program with a text editor of your choosing. (I like <code class="backtick">vim</code> for long programs and <code class="backtick">cat</code> for very short ones.)</li>
+<li>Compiling it with <code class="backtick">gcc</code>.</li>
+<li>Running it.</li>
+</ul>
+<p>If any of these steps fail, the next step is debugging. We'll talk about debugging elsewhere.</p>
+<h3 id="Creating_the_program"><span class="header-section-number">2.4.1</span> Creating the program</h3>
+<p>Use your favorite text editor. The program file should have a name of the form <code class="backtick">foo.c</code>; the <code class="backtick">.c</code> at the end tells the C compiler the contents are C source code. Here is a typical C program:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="co">/* print the numbers from 1 to 10 */</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+
+ puts(<span class="st">"Now I will count from 1 to 10"</span>);
+ <span class="kw">for</span>(i = <span class="dv">1</span>; i &lt;= <span class="dv">10</span>; i++) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/count.c" class="uri">examples/count.c</a>
+</div>
+<h3 id="Compiling_and_running_a_program"><span class="header-section-number">2.4.2</span> Compiling and running a program</h3>
+<p>Here's what happens when I compile and run it on the Zoo:</p>
+<pre><code>$ c99 -g3 -o count count.c
+$ ./count
+Now I will count from 1 to 10
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+$</code></pre>
+<p>The first line is the command to compile the program. The dollar sign is my <strong>prompt</strong>, which is printed by the system to tell me it is waiting for a command. The command calls <code>gcc</code> as <code>c99</code> with arguments <code>-g3</code> (enable maximum debugging info), <code>-o</code> (specify executable file name, otherwise defaults to <code>a.out</code>), <code>count</code> (the actual executable file name), and <code>count.c</code> (the source file to compile). This tells <code>gcc</code> that we should compile <code>count.c</code> to <code>count</code> in C99 mode with maximum debugging info included in the executable file.</p>
+<p>The second line runs the output file <code class="backtick">count</code>. Calling it <code class="backtick">./count</code>
+ is necessary because by default the shell (the program that interprets
+what you type) only looks for programs in certain standard system
+directories. To make it run a program in the current directory, we have
+to include the directory name.</p>
+<h3 id="Some_notes_on_what_the_program_does"><span class="header-section-number">2.4.3</span> Some notes on what the program does</h3>
+<p>Noteworthy features of this program include:</p>
+<ul>
+<li>The <code class="backtick">#include&nbsp;&lt;stdio.h&gt;</code> in
+line 1. This is standard C boilerplate, and will appear in any program
+you see that does input or output. The meaning is to tell the compiler
+to include the text of the file <code class="backtick">/usr/include/stdio.h</code>
+ in your program as if you had typed it there yourself. This particular
+file contains declarations for the standard I/O library functions like <code class="backtick">puts</code> (put string) and <code class="backtick">printf</code> (print formatted), as used in the program. If you don't put it in, your program may or may not still compile. Do it anyway.</li>
+<li>Line 3 is a comment; its beginning and end is marked by the <code class="backtick">/*</code> and <code class="backtick">*/</code>
+ characters. Comments are ignored by the compiler but can be helpful for
+ other programmers looking at your code (including yourself, after
+you've forgotten why you wrote something).</li>
+<li>Lines 5 and 6 declare the <code class="backtick">main</code> function. Every C program has to have a <code class="backtick">main</code> function declared in exactly this way—it's what the operating system calls when you execute the program. The <code class="backtick">int</code> on Line 3 says that main returns a value of type <code class="backtick">int</code> (we'll describe this in more detail later in the chapter on <a href="#functions">functions</a>), and that it takes two arguments: <code class="backtick">argc</code> of type <code class="backtick">int</code>, the number of arguments passed to the program from the command line, and <code class="backtick">argv</code>, of a <a href="#pointers">pointer</a>
+ type that we will get to eventually, which is an array of the arguments
+ (essentially all the words on the command line, including the program
+name). Note that it would also work to do this as one line (as K&amp;R
+typically does); the C compiler doesn't care about whitespace, so you
+can format things however you like, subject to the constraint that
+consistency will make it easier for people to read your code.</li>
+<li><p>Everything inside the curly braces is the body of the <code class="backtick">main</code> function. This includes</p>
+<ul>
+<li>The declaration <code class="backtick">int&nbsp;i;</code>, which says that <code class="backtick">i</code> will be a variable that holds an <code class="backtick">int</code> (see the chapter on <a href="#integerTypes">Integer Types</a>).</li>
+<li>Line 10, which prints an informative message using <code class="backtick">puts</code> (discussed in the chapter on <a href="#IO">input and output</a>.</li>
+<li>The <code class="backtick">for</code> loop on Lines 11–13, which executes its body for each value of <code class="backtick">i</code> from 1 to 10. We'll explain how <code class="backtick">for</code> loops work <a href="#forLoop">later</a>. Note that the body of the loop is enclosed in curly braces just like the body of the <code class="backtick">main</code> function. The only statement in the body is the call to <code class="backtick">printf</code> on Line 12; this includes a format string that specifies that we want a decimal-formatted integer followed by a newline (the <code class="backtick">\n</code>).</li>
+<li>The <code class="backtick">return&nbsp;0;</code> on Line 15 tells
+the operating system that the program worked (the convention in Unix is
+that 0 means success). If the program didn't work for some reason, we
+could have returned something else to signal an error.</li>
+</ul></li>
+</ul>
+<h1 id="linux"><span class="header-section-number">3</span> The Linux programming environment</h1>
+<p>The Zoo runs a Unix-like operating system called Linux. Most people run Unix with a command-line interface provided by a <strong>shell</strong>.
+ Each line typed to the shell tells it what program to run (the first
+word in the line) and what arguments to give it (remaining words). The
+interpretation of the arguments is up to the program.</p>
+<h2 id="the-shell"><span class="header-section-number">3.1</span> The shell</h2>
+<p>When you sign up for an account in the Zoo, you are offered a choice
+of possible shell programs. The examples below assume you have chosen <code>bash</code>, the <a href="http://www.gnu.org/software/bash/">Bourne-again shell</a> written by the GNU project. Other shells behave similarly for basic commands.</p>
+<h3 id="Getting_a_shell_prompt_in_the_Zoo"><span class="header-section-number">3.1.1</span> Getting a shell prompt in the Zoo</h3>
+<p>When you log in to a Zoo node directly, you may not automatically get
+ a shell window. If you use the default login environment (which puts
+you into the KDE window manager), you need to click on the picture of
+the display with a shell in from of it in the toolbar at the bottom of
+the screen. If you run Gnome instead (you can change your startup
+environment using the popup menu in the login box), you can click on the
+ foot in the middle of the toolbar. Either approach will pop up a
+terminal emulator from which you can run emacs, gcc, and so forth.</p>
+<p>The default login shell in the Zoo is <code class="backtick">bash</code>, and all examples of shell command lines given in these notes will assume <code class="backtick">bash</code>.
+ You can choose a different login shell on the account sign-up page if
+you want to, but you are probably best off just learning to like <code class="backtick">bash</code>.</p>
+<h3 id="The_Unix_filesystem"><span class="header-section-number">3.1.2</span> The Unix filesystem</h3>
+<p>Most of what one does with Unix programs is manipulate the
+filesystem. Unix files are unstructured blobs of data whose names are
+given by paths consisting of a sequence of directory names separated by
+slashes: for example <code>/home/accts/some-user/cs223/hw1.c</code>. At any time you are in a current working directory (type <code>pwd</code> to find out what it is and <code>cd&nbsp;new-directory</code>
+ to change it). You can specify a file below the current working
+directory by giving just the last part of the pathname. The special
+directory names <code class="backtick">.</code> and <code class="backtick">..</code> can also be used to refer to the current directory and its parent. So <code class="backtick">/home/accts/some-user/cs223/hw1.c</code> is just <code class="backtick">hw1.c</code> or <code class="backtick">./hw1.c</code> if your current working directory is <code class="backtick">/home/accts/some-user/cs223</code>, <code class="backtick">cs223/hw1.c</code> if your current working directory is <code class="backtick">/home/accts/some-user</code>, and <code class="backtick">../cs223/hw1.c</code> if your current working directory is <code class="backtick">/home/accts/some-user/illegal-downloads</code>.</p>
+<p>All Zoo machines share a common filesystem, so any files you create
+or change on one Zoo machine will show up in the same place on all the
+others.</p>
+<h3 id="Unix_command-line_programs"><span class="header-section-number">3.1.3</span> Unix command-line programs</h3>
+<p>Here are some handy Unix commands:</p>
+<dl>
+<dt>man</dt>
+<dd><p><code>man</code>&nbsp;<em>program</em> will show you the on-line documentation (the <em>man page</em>) for a program (e.g., try <code>man&nbsp;man</code> or <code>man&nbsp;ls</code>). Handy if you want to know what a program does. On Linux machines like the ones in the Zoo you can also get information using <code>info&nbsp;program</code>, which has an Emacs-like interface.</p>
+<p>You can also use <code>man</code> <em>function</em> to see documentation for standard library functions. The command <code>man -k</code> <em>string</em> will search for man pages whose titles contain <em>string</em>.</p>
+<p>Sometimes there is more than one man page with the same name. In this case <code>man -k</code> will distingiush them by different manual section numbers, e.g., <code>printf (1)</code> (a shell command) vs. <code>printf (3)</code> (a library routine). To get a man page from a specific section, use <code>man</code> <em>section</em> <em>name</em>, e.g. <code>man 3 printf</code>.</p>
+</dd>
+<dt>ls</dt>
+<dd><p><code>ls</code> lists all the files in the current directory. Some useful variants:</p>
+<ul>
+<li><code>ls&nbsp;/some/other/dir</code>; list files in that directory instead.</li>
+<li><code>ls&nbsp;-l</code>; long output format showing modification dates and owners.</li>
+</ul>
+</dd>
+<dt>mkdir</dt>
+<dd><code>mkdir&nbsp;dir</code> will create a new directory in the current directory named <code>dir</code>.
+</dd>
+<dt>rmdir</dt>
+<dd><code>rmdir&nbsp;dir</code> deletes a directory. It only works on directories that contain no files.
+</dd>
+<dt>cd</dt>
+<dd><code>cd&nbsp;dir</code> changes the current working directory. With no arguments, <code>cd</code> changes back to your home directory.
+</dd>
+<dt>pwd</dt>
+<dd><code>pwd</code> ("print working directory") shows what your current directory is.
+</dd>
+<dt>mv</dt>
+<dd><code>mv&nbsp;old-name&nbsp;new-name</code> changes the name of a file. You can also use this to move files between directories.
+</dd>
+<dt>cp</dt>
+<dd><code>cp&nbsp;old-name&nbsp;new-name</code> makes a copy of a file.
+</dd>
+<dt>rm</dt>
+<dd><code>rm&nbsp;file</code> deletes a file. Deleted files cannot be recovered. Use this command carefully.
+</dd>
+<dt>chmod</dt>
+<dd><p><code>chmod</code> changes the permissions on a file or directory. See the man page for the full details of how this works. Here are some common <code>chmod</code>'s:</p>
+<ul>
+<li><code>chmod&nbsp;644&nbsp;file</code>; owner can read or write the file, others can only read it.</li>
+<li><code>chmod&nbsp;600&nbsp;file</code>; owner can read or write the file, others can't do anything with it.</li>
+<li><code>chmod&nbsp;755&nbsp;file</code>; owner can read, write, or
+execute the file, others can read or execute it. This is typically used
+for programs or for directories (where the execute bit has the special
+meaning of letting somebody find files in the directory).</li>
+<li><code>chmod&nbsp;700&nbsp;file</code>; owner can read, write, or execute the file, others can't do anything with it.</li>
+</ul>
+</dd>
+<dt><code>emacs</code>, <code>gcc</code>, <code>make</code>, <code>gdb</code>, <code>git</code></dt>
+<dd>See corresponding sections.
+</dd>
+</dl>
+<h3 id="Stopping_and_interrupting_programs"><span class="header-section-number">3.1.4</span> Stopping and interrupting programs</h3>
+<p>Sometimes you may have a running program that won't die. Aside from
+costing you the use of your terminal window, this may be annoying to
+other Zoo users, especially if the process won't die even if you close
+the terminal window or log out.</p>
+<p>There are various control-key combinations you can type at a terminal window to interrupt or stop a running program.</p>
+<dl>
+<dt>ctrl-C</dt>
+<dd>Interrupt the process. Many processes (including any program you write unless you trap SIGINT using the <code class="backtick">sigaction</code> system call) will die instantly when you do this. Some won't.
+</dd>
+<dt>ctrl-Z</dt>
+<dd>Suspend the process. This will leave a stopped process lying around. Type <code class="backtick">jobs</code> to list all your stopped processes, <code class="backtick">fg</code> to restart the last process (or <code class="backtick">fg&nbsp;%1</code> to start process <code class="backtick">%1</code> etc.), <code class="backtick">bg</code> to keep running the stopped process in the background, <code class="backtick">kill&nbsp;%1</code> to kill process <code class="backtick">%1</code> politely, <code class="backtick">kill&nbsp;-KILL&nbsp;%1</code> to kill process <code class="backtick">%1</code> whether it wants to die or not.
+</dd>
+<dt>ctrl-D</dt>
+<dd>Send end-of-file to the process. Useful if you are typing test input
+ to a process that expects to get EOF eventually or writing programs
+using <code class="backtick">cat&nbsp;&gt;&nbsp;program.c</code> (not really recommmended). For test input, you are often better putting it into a file and using input redirection (<code class="backtick">./program&nbsp;&lt;&nbsp;test-input-file</code>); this way you can redo the test after you fix the bugs it reveals.
+</dd>
+<dt>ctrl-\</dt>
+<dd>Quit the process. Sends a SIGQUIT, which asks a process to quit and dump core. Mostly useful if ctrl-C and ctrl-Z don't work.
+</dd>
+</dl>
+<p>If you have a runaway process that you can't get rid of otherwise, you can use <code class="backtick">ps&nbsp;g</code> to get a list of all your processes and their process ids. The <code class="backtick">kill</code> command can then be used on the offending process, e.g. <code class="backtick">kill&nbsp;-KILL&nbsp;6666</code> if your evil process has process id 6666. Sometimes the <code class="backtick">killall</code> command can simplify this procedure, e.g. <code class="backtick">killall&nbsp;-KILL&nbsp;evil</code> kills all process with command name <code class="backtick">evil</code>.</p>
+<h3 id="Running_your_own_programs"><span class="header-section-number">3.1.5</span> Running your own programs</h3>
+<p>If you compile your own program, you will need to prefix it with <code class="backtick">./</code> on the command line to tell the shell that you want to run a program in the current directory (called '<code class="backtick">.</code>') instead of one of the standard system directories. So for example, if I've just built a program called <code class="backtick">count</code>, I can run it by typing</p>
+<pre><code>$ ./count</code></pre>
+<p>Here the "<code class="backtick">$&nbsp;</code>" is standing in for whatever your prompt looks like; you should not type it.</p>
+<p>Any words after the program name (separated by <strong>whitespace</strong>—spaces
+ and/or tabs) are passed in as arguments to the program. Sometimes you
+may wish to pass more than one word as a single argument. You can do so
+by wrapping the argument in single quotes, as in</p>
+<pre><code>$ ./count 'this is the first argument' 'this is the second argument'</code></pre>
+<h3 id="shellRedirects"><span class="header-section-number">3.1.6</span> Redirecting input and output</h3>
+<p>Some programs take input from <strong>standard input</strong>
+(typically the terminal). If you are doing a lot of testing, you will
+quickly become tired of typing test input at your program. You can tell
+the shell to <strong>redirect</strong> standard input from a file by putting the file name after a <code class="backtick">&lt;</code> symbol, like this:</p>
+<pre><code>$ ./count &lt; huge-input-file</code></pre>
+<p>A '&gt;' symbol is used to redirect <strong>standard output</strong>, in case you don't want to read it as it flies by on your screen:</p>
+<pre><code>$ ./count &lt; huge-input-file &gt; huger-output-file</code></pre>
+<p>A useful file for both input and output is the special file <code class="backtick">/dev/null</code>. As input, it looks like an empty file. As output, it eats any characters sent to it:</p>
+<pre><code>$ ./sensory-deprivation-experiment &lt; /dev/null &gt; /dev/null</code></pre>
+<p>You can also <strong>pipe</strong> programs together, connecting the output of one to the input of the next. Good programs to put at the end of a pipe are <code class="backtick">head</code> (eats all but the first ten lines), <code class="backtick">tail</code> (eats all but the last ten lines), <code class="backtick">more</code> (lets you page through the output by hitting the space bar, and <code class="backtick">tee</code> (shows you the output but also saves a copy to a file). A typical command might be something like <code class="backtick">./spew&nbsp;|&nbsp;more</code> or <code class="backtick">./slow-but-boring&nbsp;|&nbsp;tee&nbsp;boring-output</code>.
+ Pipes can consist of a long train of programs, each of which processes
+the output of the previous one and supplies the input to the next. A
+typical case might be:</p>
+<pre><code>$ ./do-many-experiments | sort | uniq -c | sort -nr</code></pre>
+<p>which, if <code class="backtick">./do-many-experiments</code> gives
+the output of one experiment on each line, produces a list of distinct
+experimental outputs sorted by decreasing frequency. Pipes like this can
+ often substitute for hours of real programming.</p>
+<h2 id="editing"><span class="header-section-number">3.2</span> Text editors</h2>
+<p>To write your programs, you will need to use a text editor,
+preferably one that knows enough about C to provide tools like automatic
+ indentation and syntax highlighting. There are three reasonable choices
+ for this in the Zoo: <code class="backtick">kate</code>, <code class="backtick">emacs</code>, and <code class="backtick">vim</code> (which can also be run as <code class="backtick">vi</code>).
+ Kate is a GUI-style editor that comes with the KDE window system; it
+plays nicely with the mouse, but Kate skills will not translate well
+into other environements. <a href="http://en.wikipedia.org/wiki/Emacs" title="WikiPedia">Emacs</a> and <a href="http://en.wikipedia.org/wiki/Vi" title="WikiPedia">Vi</a> have been the two contenders for the <a href="http://en.wikipedia.org/wiki/Editor_war" title="WikiPedia">One True Editor</a>
+ since the 1970s—if you learn one (or both) you will be able to use the
+resulting skills everywhere. My personal preference is to use Vi, but
+Emacs has the advantage of using the same editing commands as the shell
+and <code class="backtick">gdb</code> command-line interfaces.</p>
+<h3 id="Writing_C_programs_with_Emacs"><span class="header-section-number">3.2.1</span> Writing C programs with Emacs</h3>
+<p>To start Emacs, type <code>emacs</code> at the command line. If you
+are actually sitting at a Zoo node it should put up a new window. If
+not, Emacs will take over the current window. If you have never used
+Emacs before, you should immediately type <code>C-h&nbsp;t</code> (this means hold down the Control key, type <code>h</code>, then type <code>t</code> without holding down the Control key). This will pop you into the Emacs built-in tutorial.</p>
+<h4 id="My_favorite_Emacs_commands"><span class="header-section-number">3.2.1.1</span> My favorite Emacs commands</h4>
+<p>General note: <code>C-x</code> means hold down Control and press <code>x</code>; <code>M-x</code> means hold down Alt (Emacs calls it "Meta") and press <code>x</code>. For <code>M-x</code> you can also hit Esc and then <code>x</code>.</p>
+<dl>
+<dt>C-h</dt>
+<dd>Get help. Everything you could possibly want to know about Emacs is available through this command. Some common versions: <code>C-h&nbsp;t</code> puts up the tutorial, <code>C-h&nbsp;b</code> lists every command available in the current mode, <code>C-h&nbsp;k</code> tells you what a particular sequence of keystrokes does, and <code>C-h&nbsp;l</code>
+ tells you what the last 50 or so characters you typed were (handy if
+Emacs just garbled your file and you want to know what command to avoid
+in the future).
+</dd>
+<dt>C-x u</dt>
+<dd>Undo. Undoes the last change you made to the current buffer. Type it
+ again to undo more things. A lifesaver. Note that it can only undo back
+ to the time you first loaded the file into Emacs—if you want to be able
+ to back out of bigger changes, use <code class="backtick">git</code> (described below).
+</dd>
+<dt>C-x C-s</dt>
+<dd>Save. Saves changes to the current buffer out to its file on disk.
+</dd>
+<dt>C-x C-f</dt>
+<dd>Edit a different file.
+</dd>
+<dt>C-x C-c</dt>
+<dd>Quit out of Emacs. This will ask you if you want to save any buffers that have been modified. You probably want to answer yes (<code>y</code>) for each one, but you can answer no (<code>n</code>) if you changed some file inside Emacs but want to throw the changes away.
+</dd>
+<dt>C-f</dt>
+<dd>Go forward one character.
+</dd>
+<dt>C-b</dt>
+<dd>Go back one character.
+</dd>
+<dt>C-n</dt>
+<dd>Go to the next line.
+</dd>
+<dt>C-p</dt>
+<dd>Go to the previous line.
+</dd>
+<dt>C-a</dt>
+<dd>Go to the beginning of the line.
+</dd>
+<dt>C-k</dt>
+<dd>Kill the rest of the line starting with the current position. Useful Emacs idiom: <code>C-a&nbsp;C-k</code>.
+</dd>
+<dt>C-y</dt>
+<dd>"Yank." Get back what you just killed.
+</dd>
+<dt>TAB</dt>
+<dd>Re-indent the current line. In C mode this will indent the line according to Emacs's notion of how C should be indented.
+</dd>
+<dt>M-x compile</dt>
+<dd>Compile a program. This will ask you if you want to save out any
+unsaved buffers and then run a compile command of your choice (see the
+section on compiling programs below). The exciting thing about <code>M-x&nbsp;compile</code> is that if your program has errors in it, you can type <code>C-x&nbsp;`</code> to jump to the next error, or at least where <code>gcc</code> thinks the next error is.
+</dd>
+</dl>
+<h3 id="Using_Vi_instead_of_Emacs"><span class="header-section-number">3.2.2</span> Using Vi instead of Emacs</h3>
+<p>If you don't find yourself liking Emacs very much, you might want to
+try Vim instead. Vim is a vastly enhanced reimplementation of the
+classic <code>vi</code> editor, which I personally find easier to use than Emacs. Type <code>vimtutor</code> to run the tutorial.</p>
+<p>One annoying feature of Vim is that it is hard to figure out how to
+quit. If you don't mind losing all of your changes, you can always get
+out by hitting the Escape key a few times and then typing <sub>~</sub>\\\ :qa!\\\ <sub>~</sub></p>
+<p>To run Vim, type <code class="backtick">vim</code> or <code class="backtick">vim&nbsp;filename</code> from the command line. Or you can use the graphical version <code class="backtick">gvim</code>, which pops up its own window.</p>
+<p>Vim is a <em>modal</em> editor, meaning that at any time you are in
+one of several modes (normal mode, insert mode, replace mode,
+operator-pending mode, etc.), and the interpretation of keystrokes
+depends on which mode you are in. So typing <code class="backtick">jjjj</code> in normal mode moves the cursor down four lines, while typing <code class="backtick">jjjj</code> in insert mode inserts the string <code class="backtick">jjjj</code>
+ at the current position. Most of the time you will be in either normal
+mode or insert mode. There is also a command mode entered by hitting <code class="backtick">:</code> that lets you type longer commands, similar to the Unix command-line or M-x in Emacs.</p>
+<h4 id="My_favorite_Vim_commands"><span class="header-section-number">3.2.2.1</span> My favorite Vim commands</h4>
+<h5 id="Normal_mode"><span class="header-section-number">3.2.2.1.1</span> Normal mode</h5>
+<dl>
+<dt>:h</dt>
+<dd>Get help. (Hit Enter at the end of any command that starts with a colon.) Escape
+</dd>
+<dd>Get out of whatever strange mode you are in and go back to normal
+mode. You will need to use this whenever you are done typing code and
+want to get back to typing commands.
+</dd>
+<dt>i</dt>
+<dd>Enter insert mode. You will need to do this to type anything. The command <code class="backtick">a</code> also enters insert mode, but puts new text after the current cursor position instead of before it. u
+</dd>
+<dd><p>Undo. Undoes the last change you made to the current buffer. Type
+ it again to undo more things. If you undid something by mistake, c-<code class="backtick">R</code> (control <code class="backtick">R</code>) will redo the last undo</p>
+<p>(and can also be repeated). :w</p>
+</dd>
+<dd><p>Write the current file to disk. Use <code class="backtick">:w&nbsp;filename</code> to write it to <code class="backtick">filename</code>. Use <code class="backtick">:wa</code> to write all files that you have modified. The command <code class="backtick">ZZ</code> does the</p>
+<p>same thing without having to hit Enter at the end. :e filename</p>
+</dd>
+<dd>Edit a different file.
+</dd>
+<dt>:q</dt>
+<dd>Quit. Vi will refuse to do this if you have unwritten files. See <code class="backtick">:wa</code> for how to fix this, or use <code class="backtick">:q!</code> if you want to throw away your changes and quit anyway. The shortcuts <code class="backtick">:x</code> and <code class="backtick">:wq</code> do a write of the current file followed by quitting.
+</dd>
+<dt>h, j, k, l</dt>
+<dd>Move the cursor left, down, up, or right. You can also use the arrow keys (in both normal mode and insert mode).
+</dd>
+<dt>x</dt>
+<dd>Delete the current character.
+</dd>
+<dt>D</dt>
+<dd>Delete to end of line.
+</dd>
+<dt>dd</dt>
+<dd>Delete all of the current line. This is a special case of a more general <code class="backtick">d</code> command. If you precede it with a number, you can delete multiple lines: <code class="backtick">5dd</code> deletes the next 5 lines. If you replace the second <code class="backtick">d</code> with a motion command, you delete until wherever you land: <code class="backtick">d$</code> deletes to end of line (<code class="backtick">D</code> is faster), <code class="backtick">dj</code> deletes this line and the line after it, <code class="backtick">d%</code> deletes the next matching group of parantheses/braces/brackets and whatever is between them, <code class="backtick">dG</code> deletes to end of file—there are many possibilities. All of these save what you deleted into register <code class="backtick">""</code> so you can get them back with <code class="backtick">p</code>.
+</dd>
+<dt>yy</dt>
+<dd>Like <code class="backtick">dd</code>, but only saves the line to register <code class="backtick">""</code> and doesn't delete it. (Think <em>copy</em>). All the variants of <code class="backtick">dd</code> work with <code class="backtick">yy</code>: <code class="backtick">5yy</code>, <code class="backtick">y$</code>, <code class="backtick">yj</code>, <code class="backtick">y%</code>, etc.
+</dd>
+<dt>p</dt>
+<dd>Pull whatever is in register <code class="backtick">""</code>. (Think <em>paste</em>).
+</dd>
+<dt>&lt;&lt; and &gt;&gt;</dt>
+<dd>Outdent or indent the current line one tab stop.
+</dd>
+<dt>:make</dt>
+<dd>Run <code class="backtick">make</code> in the current directory. You can also give it arguments, e.g., <code class="backtick">:make&nbsp;myprog</code>, <code class="backtick">:make&nbsp;test</code>. Use <code class="backtick">:cn</code> to go to the next error if you get errors.
+</dd>
+<dt>:!</dt>
+<dd>Run a command, e.g., <code class="backtick">:!&nbsp;echo&nbsp;hello&nbsp;world</code> or <code class="backtick">:!&nbsp;gdb&nbsp;myprogram</code>.
+ Returns to Vim when the command exits (control-C can sometimes be
+helpful if your command isn't exiting when it should). This works best
+if you ran Vim from a shell window; it doesn't work very well if Vim is
+running in its own window.
+</dd>
+</dl>
+<h5 id="Insert_mode"><span class="header-section-number">3.2.2.1.2</span> Insert mode</h5>
+<dl>
+<dt>control-P and control-N</dt>
+<dd>These are completion commands that attempt to expand a partial word
+to something it matches elsewhere in the buffer. So if you are a good
+person and have named a variable <code class="backtick">informativeVariableName</code> instead of <code class="backtick">ivn</code>, you can avoid having to type the entire word by typing <code class="backtick">inf</code>&lt;control-P&gt; if it's the only word in your buffer that starts with <code class="backtick">inf</code>.
+</dd>
+<dt>control-O and control-I</dt>
+<dd>Jump to the last cursor position before a big move / back to the place you jumped from.
+</dd>
+<dt>ESC</dt>
+<dd>Get out of insert mode!
+</dd>
+</dl>
+<h4 id="Settings"><span class="header-section-number">3.2.2.2</span> Settings</h4>
+<p>Unlike Emacs, Vim's default settings are not very good for editing C programs. You can fix this by creating a file called <code class="backtick">.vimrc</code> in your home directory with the following commands:</p>
+<div>
+<pre class="vim"><code>set shiftwidth=4
+set autoindent
+set backup
+set cindent
+set hlsearch
+set incsearch
+set showmatch
+set number
+syntax on
+filetype plugin on
+filetype indent on
+</code></pre>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sample.vimrc" class="uri">examples/sample.vimrc</a>
+</div>
+<p>(You can download this file by clicking on the link.)</p>
+<p>In Vim, you can type e.g. <code class="backtick">:help&nbsp;backup</code> to find out what each setting does. Note that because <code class="backtick">.vimrc</code> starts with a <code class="backtick">.</code>, it won't be visible to <code class="backtick">ls</code> unless you use <code class="backtick">ls&nbsp;-a</code> or <code class="backtick">ls&nbsp;-A</code>.</p>
+<h2 id="compilationTools"><span class="header-section-number">3.3</span> Compilation tools</h2>
+<h3 id="gcc"><span class="header-section-number">3.3.1</span> The GNU C compiler <code>gcc</code></h3>
+<p>A C program will typically consist of one or more files whose names end with <code>.c</code>. To compile <code>foo.c</code>, you can type <code>gcc&nbsp;foo.c</code>. Assuming <code>foo.c</code> contains no errors egregious enough to be detected by the extremely forgiving C compiler, this will produce a file named <code>a.out</code> that you can then execute by typing <code>./a.out</code>.</p>
+<p>If you want to debug your program using <code>gdb</code> or give it a different name, you will need to use a longer command line. Here's one that compiles <code>foo.c</code> to <code>foo</code> (run it using <code>./foo</code>) and includes the information that <code>gdb</code> needs: <code>gcc&nbsp;-g3&nbsp;-o&nbsp;foo&nbsp;foo.c</code></p>
+<p>If you want to use C99 features, you will need to tell <code>gcc</code> to use C99 instead of its own default dialect of C. You can do this either by adding the argument <code>-std=c99</code> as in <code>gcc -std=c99 -o foo foo.c</code> or by calling <code>gcc</code> as <code>c99</code> as in <code>c99 -o foo foo.c</code>.</p>
+<p>By default, gcc doesn't check everything that might be wrong with
+your program. But if you give it a few extra arguments, it will warn you
+ about many (but not all) potential problems: <code>c99 -g3&nbsp;-Wall&nbsp;-pedantic&nbsp;-o&nbsp;foo&nbsp;foo.c</code>.</p>
+<h3 id="make"><span class="header-section-number">3.3.2</span> Make</h3>
+<p>For complicated programs involving multiple source files, you are probably better off using <code>make</code> than calling <code>gcc</code>
+ directly. Make is a "rule-based expert system" that figures out how to
+compile programs given a little bit of information about their
+components.</p>
+<p>For example, if you have a file called <code>foo.c</code>, try typing <code>make&nbsp;foo</code> and see what happens.</p>
+<p>In general you will probably want to write a <code>Makefile</code>, which is named <code>Makefile</code> or <code>makefile</code> and tells <code>make</code> how to compile programs in the same directory. Here's a typical Makefile:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode makefile"><code class="sourceCode makefile"><span class="co"># Any line that starts with a sharp is a comment and is ignored</span>
+<span class="co"># by Make.</span>
+
+<span class="co"># These lines set variables that control make's default rules.</span>
+<span class="co"># We STRONGLY recommend putting "-Wall -pedantic -g3" in your CFLAGS.</span>
+<span class="dt">CC</span><span class="ch">=</span><span class="st">gcc</span>
+<span class="dt">CFLAGS=-std</span><span class="ch">=</span><span class="st">c99 -Wall -pedantic -g3</span>
+
+<span class="co"># The next line is a dependency line.</span>
+<span class="co"># It says that if somebody types "make all"</span>
+<span class="co"># make must first make "hello-world".</span>
+<span class="co"># By default the left-hand-side of the first dependency is what you</span>
+<span class="co"># get if you just type "make" with no arguments.</span>
+<span class="dv">all:</span><span class="dt"> hello-world</span>
+
+<span class="co"># How do we make hello-world?</span>
+<span class="co"># The dependency line says you need to first make hello-world.o</span>
+<span class="co"># and hello-library.o</span>
+<span class="dv">hello-world:</span><span class="dt"> hello-world.o hello-library.o</span>
+ <span class="co"># Subsequent lines starting with a TAB character give</span>
+ <span class="co"># commands to execute.</span>
+ <span class="co"># This command uses make built-in variables to avoid</span>
+ <span class="co"># retyping (and getting things wrong):</span>
+ <span class="co"># $@ = target hello-world</span>
+ <span class="co"># $^ = dependencies hello-world.o and hello-library.o</span>
+ <span class="ch">$(</span><span class="dt">CC</span><span class="ch">)</span> <span class="ch">$(</span><span class="dt">CFLAGS</span><span class="ch">)</span> -o <span class="ch">$@</span> <span class="ch">$^</span>
+ <span class="co"># You can put whatever commands you want.</span>
+ echo <span class="st">"I just built hello-world! Hooray!"</span>
+
+<span class="co"># Here we are saying that hello-world.o and hello-library.o</span>
+<span class="co"># should be rebuilt whenever their corresponding source file</span>
+<span class="co"># or hello-library.h changes.</span>
+<span class="co"># There are no commands attached to these dependency lines, so</span>
+<span class="co"># make will have to figure out how to do that somewhere else</span>
+<span class="co"># (probably from the builtin .c -&gt; .o rule).</span>
+<span class="dv">hello-world.o:</span><span class="dt"> hello-world.c hello-library.h</span>
+<span class="dv">hello-library.o:</span><span class="dt"> hello-library.c hello-library.h</span>
+
+<span class="co"># Command lines can do more than just build things. For example,</span>
+<span class="co"># "make test" will rebuild hello-world (if necessary) and then run it.</span>
+<span class="dv">test:</span><span class="dt"> hello-world</span>
+ ./hello-world
+
+<span class="co"># This lets you type "make clean" and get rid of anything you can</span>
+<span class="co"># rebuild. The $(RM) variable is predefined to "rm -f"</span>
+<span class="dv">clean:</span>
+ <span class="ch">$(</span><span class="dt">RM</span><span class="ch">)</span> hello-world *.o</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/usingMake/Makefile" class="uri">examples/usingMake/Makefile</a>
+</div>
+<p>Given a Makefile, make looks at each dependency line and asks: (a)
+does the target on the left hand side exist, and (b) is it older than
+the files it depends on. If so, it looks for a set of commands for
+rebuilding the target, after first rebuilding any of the files it
+depends on; the commands it runs will be underneath some dependency line
+ where the target appears on the left-hand side. It has built-in rules
+for doing common tasks like building <code>.o</code> files (which contain machine code) from <code>.c</code> files (which contain C source code). If you have a fake target like <code>all</code> above, it will try to rebuild everything <code>all</code> depends on because there is no file named <code>all</code> (one hopes).</p>
+<h4 id="Make_gotchas"><span class="header-section-number">3.3.2.1</span> Make gotchas</h4>
+<p>Make really really cares that the command lines start with a TAB
+character. TAB looks like eight spaces in Emacs and other editors, but
+it isn't the same thing. If you put eight spaces in (or a space and a
+TAB), Make will get horribly confused and give you an incomprehensible
+error message about a "missing separator". This misfeature is so scary
+that I avoided using make for years because I didn't understand what was
+ going on. Don't fall into that trap—make really is good for you,
+especially if you ever need to recompile a huge program when only a few
+source files have changed.</p>
+<p>If you use GNU Make (on a zoo node), note that beginning with version
+ 3.78, GNU Make prints a message that hints at a possible SPACEs-vs-TAB
+problem, like this:</p>
+<pre><code>$ make
+Makefile:23:*** missing separator (did you mean TAB instead of 8 spaces?). Stop.</code></pre>
+<p>If you need to repair a Makefile that uses spaces, one way of converting leading spaces into TABs is to use the <code>unexpand</code> program:</p>
+<pre><code>$ mv Makefile Makefile.old
+$ unexpand Makefile.old &gt; Makefile</code></pre>
+<h2 id="debugging"><span class="header-section-number">3.4</span> Debugging tools</h2>
+<p>The standard debugger on the Zoo is <code class="backtick">gdb</code>. Also useful is the memory error checker <code>valgrind</code>. Below are some notes on debugging in general and using these programs in particular.</p>
+<h3 id="Debugging_in_general"><span class="header-section-number">3.4.1</span> Debugging in general</h3>
+<p>Basic method of all debugging:</p>
+<ol style="list-style-type: decimal">
+<li>Know what your program is supposed to do.</li>
+<li>Detect when it doesn't.</li>
+<li>Fix it.</li>
+</ol>
+<p>A tempting mistake is to skip step 1, and just try randomly tweaking
+things until the program works. Better is to see what the program is
+doing internally, so you can see exactly where and when it is going
+wrong. A second temptation is to attempt to intuit where things are
+going wrong by staring at the code or the program's output. Avoid this
+temptation as well: let the computer tell you what it is really doing
+inside your program instead of guessing.</p>
+<h3 id="Assertions"><span class="header-section-number">3.4.2</span> Assertions</h3>
+<p>Every non-trivial C program should include <code class="backtick">&lt;assert.h&gt;</code>, which gives you the <code class="backtick">assert</code> macro (see Appendix B6 of K&amp;R). The <code class="backtick">assert</code> macro tests if a condition is true and halts your program with an error message if it isn't:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ assert(<span class="dv">2+2</span> == <span class="dv">5</span>);
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/debugging/no.c" class="uri">examples/debugging/no.c</a>
+</div>
+<p>Compiling and running this program produces the following output:</p>
+<pre><code>$ gcc -o no no.c
+$ ./no
+no: no.c:6: main: Assertion `2+2 == 5' failed.</code></pre>
+<p>Line numbers and everything, even if you compile with the optimizer
+turned on. Much nicer than a mere segmentation fault, and if you run it
+under the debugger, the debugger will stop exactly on the line where the
+ <code class="backtick">assert</code> failed so you can poke around and see why.</p>
+<h3 id="gdb"><span class="header-section-number">3.4.3</span> The GNU debugger <code>gdb</code></h3>
+<p>The standard debugger on Linux is called <code class="backtick">gdb</code>. This lets you run your program under remote control, so that you can stop it and see what is going on inside.</p>
+<p>You can also use <code>ddd</code>, which is a graphical front-end for <code>gdb</code>. There is an <a href="http://www.gnu.org/software/ddd/manual/html_mono/ddd.html">extensive tutorial</a> available for <code>ddd</code>, so we will concentrate on the command-line interface to <code>gdb</code> here.</p>
+<p>We'll look at a contrived example. Suppose you have the following program <code>bogus.c</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="co">/* Print the sum of the integers from 1 to 1000 */</span>
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> sum;
+
+ sum = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i -= <span class="dv">1000</span>; i++) {
+ sum += i;
+ }
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, sum);
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/debugging/bogus.c" class="uri">examples/debugging/bogus.c</a>
+</div>
+<p>Let's compile and run it and see what happens. Note that we include the flag <code>-g3</code> to tell the compiler to include debugging information. This allows <code>gdb</code> to translate machine addresses back into identifiers and line numbers in the original program for us.</p>
+<pre><code>$ c99 -g3 -o bogus bogus.c
+$ ./bogus
+-34394132
+$</code></pre>
+<p>That doesn't look like the sum of 1 to 1000. So what went wrong? If
+we were clever, we might notice that the test in the for loop is using
+the mysterious <code>-=</code> operator instead of the <code>&lt;=</code>
+ operator that we probably want. But let's suppose we're not so clever
+right now—it's four in the morning, we've been working on <code>bogus.c</code> for twenty-nine straight hours, and there's a <code>-=</code>
+ up there because in our befuddled condition we know in our bones that
+it's the right operator to use. We need somebody else to tell us that we
+ are deluding ourselves, but nobody is around this time of night. So
+we'll have to see what we can get the computer to tell us.</p>
+<p>The first thing to do is fire up <code>gdb</code>, the debugger. This
+ runs our program in stop-motion, letting us step through it a piece at a
+ time and watch what it is actually doing. In the example below gdb is
+run from the command line. You can also run it directly from Emacs with <code>M-x&nbsp;gdb</code>,
+ which lets Emacs track and show you where your program is in the source
+ file with a little arrow, or (if you are logged in directly on a Zoo
+machine) by running <code class="backtick">ddd</code>, which wraps <code class="backtick">gdb</code> in a graphical user interface.</p>
+<pre><code>$ gdb bogus
+GNU gdb 4.17.0.4 with Linux/x86 hardware watchpoint and FPU support
+Copyright 1998 Free Software Foundation, Inc.
+GDB is free software, covered by the GNU General Public License, and you are
+welcome to change it and/or distribute copies of it under certain conditions.
+Type "show copying" to see the conditions.
+There is absolutely no warranty for GDB. Type "show warranty" for details.
+This GDB was configured as "i386-redhat-linux"...
+(gdb) run
+Starting program: /home/accts/aspnes/tmp/bogus
+-34394132
+
+Program exited normally.</code></pre>
+<p>So far we haven't learned anything. To see our program in action, we
+need to slow it down a bit. We'll stop it as soon as it enters <code>main</code>, and step through it one line at a time while having it print out the values of the variables.</p>
+<pre><code>(gdb) break main
+Breakpoint 1 at 0x8048476: file bogus.c, line 9.
+(gdb) run
+Starting program: /home/accts/aspnes/tmp/bogus
+
+Breakpoint 1, main (argc=1, argv=0xbffff9ac) at bogus.c:9
+9 sum = 0;
+(gdb) display sum
+1: sum = 1
+(gdb) n
+10 for(i = 0; i -= 1000; i++)
+1: sum = 0
+(gdb) display i
+2: i = 0
+(gdb) n
+11 sum += i;
+2: i = -1000
+1: sum = 0
+(gdb) n
+10 for(i = 0; i -= 1000; i++)
+2: i = -1000
+1: sum = -1000
+(gdb) n
+11 sum += i;
+2: i = -1999
+1: sum = -1000
+(gdb) n
+10 for(i = 0; i -= 1000; i++)
+2: i = -1999
+1: sum = -2999
+(gdb) quit
+The program is running. Exit anyway? (y or n) y
+$</code></pre>
+<p>Here we are using <code>break&nbsp;main</code> to tell the program to stop as soon as it enters <code>main</code>, <code>display</code> to tell it to show us the value of the variables <code>i</code> and <code>sum</code> whenever it stops, and <code>n</code> (short for <code>next</code>) to execute the program one line at a time.</p>
+<p>When stepping through a program, gdb displays the line it will execute <em>next</em> as well as any variables you've told it to display. This means that any changes you see in the variables are the result of the <em>previous</em> displayed line. Bearing this in mind, we see that <code>i</code> drops from 0 to -1000 the very first time we hit the top of the <code>for</code> loop and drops to -1999 the next time. So something bad is happening in the top of that <code>for</code> loop, and if we squint at it a while we might begin to suspect that <code>i&nbsp;-=&nbsp;1000</code> is not the nice simple test we might have hoped it was.</p>
+<h4 id="My_favorite_gdb_commands"><span class="header-section-number">3.4.3.1</span> My favorite gdb commands</h4>
+<dl>
+<dt>help</dt>
+<dd>Get a description of gdb's commands.
+</dd>
+<dt>run</dt>
+<dd>Runs your program. You can give it arguments that get passed in to
+your program just as if you had typed them to the shell. Also used to
+restart your program from the beginning if it is already running.
+</dd>
+<dt>quit</dt>
+<dd>Leave gdb, killing your program if necessary.
+</dd>
+<dt>break</dt>
+<dd>Set a breakpoint, which is a place where gdb will automatically stop your program. Some examples: - <code>break&nbsp;somefunction</code> stops before executing the first line <code>somefunction</code>. - <code>break&nbsp;117</code> stops before executing line number 117.
+</dd>
+<dt>list</dt>
+<dd>Show part of your source file with line numbers (handy for figuring out where to put breakpoints). Examples: - <code>list&nbsp;somefunc</code> lists all lines of <code>somefunc</code>. - <code>list&nbsp;117-123</code> lists lines 117 through 123.
+</dd>
+<dt>next</dt>
+<dd>Execute the next line of the program, including completing any procedure calls in that line.
+</dd>
+<dt>step</dt>
+<dd>Execute the next step of the program, which is either the next line
+if it contains no procedure calls, or the entry into the called
+procedure.
+</dd>
+<dt>finish</dt>
+<dd>Continue until you get out of the current procedure (or hit a
+breakpoint). Useful for getting out of something you stepped into that
+you didn't want to step into.
+</dd>
+<dt>cont</dt>
+<dd>(Or <code>continue</code>). Continue until (a) the end of the
+program, (b) a fatal error like a Segmentation Fault or Bus Error, or
+(c) a breakpoint. If you give it a numeric argument (e.g., <code>cont&nbsp;1000</code>) it will skip over that many breakpoints before stopping.
+</dd>
+<dt>print</dt>
+<dd>Print the value of some expression, e.g. <code>print&nbsp;i</code>.
+</dd>
+<dt>display</dt>
+<dd>Like <code>print</code>, but runs automatically every time the program stops. Useful for watching values that change often.
+</dd>
+<dt>set disable-randomization off</dt>
+<dd>Not something you will need every day, but you should try this
+before running your program if it is producing segmentation faults
+outside of <code>gdb</code> but not inside. Normally the Linux kernel
+randomizes the position of bits of your program before running it, to
+make its response to buffer overflow attacks less predictable. By
+default, <code>gdb</code> turns this off so that the behavior of your
+program is consistent from one execution to the next. But sometimes this
+ means that a pointer that had been bad with address randomization
+(causing a segmentation fault) turns out not to be bad without. This
+option will restore the standard behavior outside <code>gdb</code> and give you some hope of finding what went wrong.
+</dd>
+</dl>
+<h4 id="Debugging_strategies"><span class="header-section-number">3.4.3.2</span> Debugging strategies</h4>
+<p>In general, the idea behind debugging is that a bad program starts
+out sane, but after executing for a while it goes bananas. If you can
+find the exact moment in its execution where it first starts acting up,
+you can see exactly what piece of code is causing the problem and have a
+ reasonably good chance of being able to fix it. So a typical debugging
+strategy is to put in a breakpoint (using <code>break</code>) somewhere before the insanity hits, "instrument" the program (using <code>display</code>) so that you can watch it going insane, and step through it (using <code>next</code>, <code>step</code>, or breakpoints and <code>cont</code>) until you find the point of failure. Sometimes this process requires restarting the program (using <code>run</code>) if you skip over this point without noticing it immediately.</p>
+<p>For large or long-running programs, it often makes sense to do binary
+ search to find the point of failure. Put in a breakpoint somewhere
+(say, on a function that is called many times or at the top of a major
+loop) and see what the state of the program is after going through the
+breakpoint 1000 times (using something like <code>cont&nbsp;1000</code>).
+ If it hasn't gone bonkers yet, try restarting and going through 2000
+times. Eventually you bracket the error as occurring (for example)
+somewhere between the 4000th and 8000th occurrence of the breakpoint.
+Now try stepping through 6000 times; if the program is looking good, you
+ know the error occurs somewhere between the 6000th and 8000th
+breakpoint. A dozen or so more experiments should be enough isolate the
+bug to a specific line of code.</p>
+<p><em>The key to all debugging</em> is knowing what your code is
+supposed to do. If you don't know this, you can't tell the lunatic who
+thinks he's Napoleon from lunatic who really is Napoleon. If you're
+confused about what your code is supposed to be doing, you need to
+figure out what exactly you want it to do. If you can figure that out,
+often it will be obvious what is going wrong. If it isn't obvious, you
+can always go back to <code>gdb</code>.</p>
+<h4 id="common-applications-of-gdb"><span class="header-section-number">3.4.3.3</span> Common applications of <code>gdb</code></h4>
+<p>Here are some typical classes of bugs and how to squish them with <code>gdb</code>. (The same instructions usually work for <code>ddd</code>.)</p>
+<h5 id="watching-your-program-run"><span class="header-section-number">3.4.3.3.1</span> Watching your program run</h5>
+<ol style="list-style-type: decimal">
+<li>Compile your program with the <code>-g3</code> flag. You can still run <code>gdb</code> if you don't do this, but it won't be able to show you variable names or source lines.</li>
+<li>Run <code>gdb</code> with <code>gdb</code> <em>programname</em>.</li>
+<li>Type <code>break main</code> to stop at the start of the <code>main</code> routine.</li>
+<li>Run your program with <code>run</code> <em>arguments</em>. The <code>run</code> command stands in for the program name. You can also redirect input as in the shell with <code>run</code> <em>arguments</em> &lt; <em>filename</em>.</li>
+<li>When the program stops, you can display variables in the current function or expressions involving these variables using <code>display</code>, as in <code>display x</code>, <code>display a[i]</code>, <code>display z+17</code>. In <code>ddd</code>, double-clicking on a variable name will have the same effect. Use <code>undisplay</code> to get rid of any displays you don't want.</li>
+<li>To step through your program, use <code>next</code> (always goes to next line in the current function, not dropping down into function calls), <code>step</code> (go to the next executed line, even if it is inside a called function), <code>finish</code> (run until the current function returns), and <code>cont</code> (run until the end of the program or the next breakpoint).</li>
+</ol>
+<p>This can be handy if you don't particularly know what is going on in your program and want to see.</p>
+<h5 id="dealing-with-failed-assertions"><span class="header-section-number">3.4.3.3.2</span> Dealing with failed assertions</h5>
+<p>Run the program as described above. When you hit the bad <code>assert</code>, you will stop several functions deep from where it actually happened. Use <code>up</code> to get up to the function that has the call to <code>assert</code> then use <code>print</code> or <code>display</code> to figure out what is going on.</p>
+<p>Example program:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> x;
+
+ x = <span class="dv">3</span>;
+
+ assert(x+x == <span class="dv">4</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/debugging/assertFailed.c" class="uri">examples/debugging/assertFailed.c</a>
+</div>
+<p>With <code>gdb</code> in action:</p>
+<pre><code>$ gcc -g3 -o assertFailed assertFailed.c
+22:59:39 (Sun Feb 15) zeniba aspnes ~/g/classes/223/notes/examples/debugging
+$ gdb assertFailed
+GNU gdb (Ubuntu 7.7.1-0ubuntu5~14.04.2) 7.7.1
+Copyright (C) 2014 Free Software Foundation, Inc.
+License GPLv3+: GNU GPL version 3 or later &lt;http://gnu.org/licenses/gpl.html&gt;
+This is free software: you are free to change and redistribute it.
+There is NO WARRANTY, to the extent permitted by law. Type "show copying"
+and "show warranty" for details.
+This GDB was configured as "i686-linux-gnu".
+Type "show configuration" for configuration details.
+For bug reporting instructions, please see:
+&lt;http://www.gnu.org/software/gdb/bugs/&gt;.
+Find the GDB manual and other documentation resources online at:
+&lt;http://www.gnu.org/software/gdb/documentation/&gt;.
+For help, type "help".
+Type "apropos word" to search for commands related to "word"...
+Reading symbols from assertFailed...done.
+(gdb) run
+Starting program: /home/aspnes/g/classes/223/notes/examples/debugging/assertFailed
+assertFailed: assertFailed.c:12: main: Assertion `x+x == 4' failed.
+
+Program received signal SIGABRT, Aborted.
+0xb7fdd416 in __kernel_vsyscall ()
+(gdb) up
+#1 0xb7e43577 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
+56 ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
+(gdb) up
+#2 0xb7e469a3 in __GI_abort () at abort.c:89
+89 abort.c: No such file or directory.
+(gdb) up
+#3 0xb7e3c6c7 in __assert_fail_base (fmt=0xb7f7a8b4 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n",
+ assertion=assertion@entry=0x804850f "x+x == 4", file=file@entry=0x8048500 "assertFailed.c",
+ line=line@entry=12, function=function@entry=0x8048518 &lt;__PRETTY_FUNCTION__.2355&gt; "main") at assert.c:92
+92 assert.c: No such file or directory.
+(gdb) up
+#4 0xb7e3c777 in __GI___assert_fail (assertion=0x804850f "x+x == 4", file=0x8048500 "assertFailed.c", line=12,
+ function=0x8048518 &lt;__PRETTY_FUNCTION__.2355&gt; "main") at assert.c:101
+101 in assert.c
+(gdb) up
+#5 0x0804845d in main (argc=1, argv=0xbffff434) at assertFailed.c:12
+12 assert(x+x == 4);
+(gdb) print x
+$1 = 3</code></pre>
+<p>Here we see that <code>x</code> has value 3, which may or may not be the right value, but certainly violates the assertion.</p>
+<h5 id="dealing-with-segmentation-faults"><span class="header-section-number">3.4.3.3.3</span> Dealing with segmentation faults</h5>
+<p>Very much like the previous case. Run <code>gdb</code> until the segmentation fault hits, then look around for something wrong.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> a[<span class="dv">1000</span>];
+ <span class="dt">int</span> i;
+
+ i = -<span class="dv">1771724</span>;
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, a[i]);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/debugging/segmentationFault.c" class="uri">examples/debugging/segmentationFault.c</a>
+</div>
+<pre><code>$ gcc -g3 -o segmentationFault segmentationFault.c
+23:04:18 (Sun Feb 15) zeniba aspnes ~/g/classes/223/notes/examples/debugging
+$ gdb segmentationFault
+GNU gdb (Ubuntu 7.7.1-0ubuntu5~14.04.2) 7.7.1
+[...]
+Reading symbols from segmentationFault...done.
+(gdb) run
+Starting program: /home/aspnes/g/classes/223/notes/examples/debugging/segmentationFault
+
+Program received signal SIGSEGV, Segmentation fault.
+0x08048435 in main (argc=1, argv=0xbffff434) at segmentationFault.c:13
+13 printf("%d\n", a[i]);
+(gdb) print a[i]
+$1 = 0
+(gdb) print i
+$2 = -1771724</code></pre>
+<p>Curiously, <code>gdb</code> has no problem coming up with a value for <code>a[i]</code>. But <code>i</code> looks pretty suspicious.</p>
+<h5 id="dealing-with-infinite-loops"><span class="header-section-number">3.4.3.3.4</span> Dealing with infinite loops</h5>
+<p>Run <code>gdb</code>, wait a while, then hit control-C. This will stop <code>gdb</code>
+ wherever it is. If you have an infinite loop, it's likely that you will
+ be in it, and that the index variables will be doing something
+surprising. Use <code>display</code> to keep an eye on them and do <code>next</code> a few times.</p>
+<pre><code>
+$ gcc -g3 -o infiniteLoop infiniteLoop.c
+23:08:05 (Sun Feb 15) zeniba aspnes ~/g/classes/223/notes/examples/debugging
+$ gdb infiniteLoop
+GNU gdb (Ubuntu 7.7.1-0ubuntu5~14.04.2) 7.7.1
+[...]
+Reading symbols from infiniteLoop...done.
+(gdb) run
+Starting program: /home/aspnes/g/classes/223/notes/examples/debugging/infiniteLoop
+^C
+Program received signal SIGINT, Interrupt.
+main (argc=1, argv=0xbffff434) at infiniteLoop.c:11
+11 i *= 37;
+(gdb) display i
+1: i = 0
+(gdb) n
+10 for(i = 0; i &lt; 10; i += 0) {
+1: i = 0
+(gdb) n
+11 i *= 37;
+1: i = 0
+(gdb) n
+10 for(i = 0; i &lt; 10; i += 0) {
+1: i = 0
+(gdb) n
+11 i *= 37;
+1: i = 0
+(gdb) n
+10 for(i = 0; i &lt; 10; i += 0) {
+1: i = 0
+(gdb) n
+11 i *= 37;
+1: i = 0</code></pre>
+<h5 id="mysterious-variable-changes"><span class="header-section-number">3.4.3.3.5</span> Mysterious variable changes</h5>
+<p>Sometimes pointer botches don't manifest as good, honest segmentation
+ faults but instead as mysterious changes to seemingly unrelated
+variables. You can catch these in the act using conditional breakpoints.
+ The downside is that you can only put conditional breakpoints on
+particular lines.</p>
+<p>Here's a program that violates array bounds (which C doesn't detect):</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> x;
+ <span class="dt">int</span> a[<span class="dv">10</span>];
+ <span class="dt">int</span> i;
+
+ x = <span class="dv">5</span>;
+
+ <span class="kw">for</span>(i = -<span class="dv">1</span>; i &lt; <span class="dv">11</span>; i++) {
+ a[i] = <span class="dv">37</span>;
+ }
+
+ assert(x == <span class="dv">5</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/debugging/mysteryChange.c" class="uri">examples/debugging/mysteryChange.c</a>
+</div>
+<p>In the debugging session below, it takes a couple of attempts to catch the change in <code>x</code> before hitting the failed assertion.</p>
+<pre><code>$ gcc -g3 -o mysteryChange mysteryChange.c
+23:15:41 (Sun Feb 15) zeniba aspnes ~/g/classes/223/notes/examples/debugging
+$ gdb mysteryChange
+GNU gdb (Ubuntu 7.7.1-0ubuntu5~14.04.2) 7.7.1
+[...]
+Reading symbols from mysteryChange...done.
+(gdb) run
+Starting program: /home/aspnes/g/classes/223/notes/examples/debugging/mysteryChange
+mysteryChange: mysteryChange.c:18: main: Assertion `x == 5' failed.
+
+Program received signal SIGABRT, Aborted.
+0xb7fdd416 in __kernel_vsyscall ()
+(gdb) list main
+2 #include &lt;stdlib.h&gt;
+3 #include &lt;assert.h&gt;
+4
+5 int
+6 main(int argc, char **argv)
+7 {
+8 int x;
+9 int a[10];
+10 int i;
+11
+(gdb) list
+12 x = 5;
+13
+14 for(i = -1; i &lt; 11; i++) {
+15 a[i] = 37;
+16 }
+17
+18 assert(x == 5);
+19
+20 return 0;
+21 }
+(gdb) break 14 if x != 5
+Breakpoint 1 at 0x804842e: file mysteryChange.c, line 14.
+(gdb) run
+The program being debugged has been started already.
+Start it from the beginning? (y or n) y
+
+Starting program: /home/aspnes/g/classes/223/notes/examples/debugging/mysteryChange
+mysteryChange: mysteryChange.c:18: main: Assertion `x == 5' failed.
+
+Program received signal SIGABRT, Aborted.
+0xb7fdd416 in __kernel_vsyscall ()
+(gdb) break 15 if x != 5
+Breakpoint 2 at 0x8048438: file mysteryChange.c, line 15.
+(gdb) run
+The program being debugged has been started already.
+Start it from the beginning? (y or n) y
+
+Starting program: /home/aspnes/g/classes/223/notes/examples/debugging/mysteryChange
+
+Breakpoint 2, main (argc=1, argv=0xbffff434) at mysteryChange.c:15
+15 a[i] = 37;
+(gdb) print i
+$1 = 0
+(gdb) print a[0]
+$2 = 134520832
+(gdb) print a[-1]
+$3 = 37
+(gdb) print x
+$4 = 37</code></pre>
+<p>One thing to note is that a breakpoint stops before the line it is on executes. So when we hit the breakpoint on line 15 (<code>gdb</code> having observed that <code>x != 5</code> is true), <code>i</code> has the value 0, but the damage happened in the previous interation when <code>i</code>
+ was -1. If we want to see exactly what happened then, we'd need to go
+back in time. We can't do this, but we could set an earlier breakpoint
+and run the program again.</p>
+<h3 id="valgrind"><span class="header-section-number">3.4.4</span> Valgrind</h3>
+<p>The <code class="backtick">valgrind</code> program can be used to
+detect some (but not all) common errors in C programs that use pointers
+and dynamic storage allocation. On the Zoo, you can run <code class="backtick">valgrind</code> on your program by putting <code class="backtick">valgrind</code> at the start of the command line:</p>
+<pre><code>valgrind ./my-program arg1 arg2 &lt; test-input</code></pre>
+<p>This will run your program and produce a report of any allocations
+and de-allocations it did. It will also warn you about common errors
+like using unitialized memory, dereferencing pointers to strange places,
+ writing off the end of blocks allocated using <code class="backtick">malloc</code>, or failing to free blocks.</p>
+<p>You can suppress all of the output except errors using the <code class="backtick">-q</code> option, like this:</p>
+<pre><code>valgrind -q ./my-program arg1 arg2 &lt; test-input</code></pre>
+<p>You can also turn on more tests, e.g.</p>
+<pre><code>valgrind -q --tool=memcheck --leak-check=yes ./my-program arg1 arg2 &lt; test-input</code></pre>
+<p>See <code class="backtick">valgrind&nbsp;--help</code> for more information about the (many) options, or look at the documentation at <a href="http://valgrind.org/" class="uri">http://valgrind.org/</a> for detailed information about what the output means. For some common <code class="backtick">valgrind</code> messages, see the examples section below.</p>
+<p>If you want to run <code>valgrind</code> on your own machine, you may be able to find a version that works at <a href="http://valgrind.org/" class="uri">http://valgrind.org</a>.
+ Unfortunately, this is only likely to work if you are running a
+Unix-like operating system (which includes Linux and Mac OSX, but not
+Windows).</p>
+<h4 id="Compilation_flags"><span class="header-section-number">3.4.4.1</span> Compilation flags</h4>
+<p>You can run <code class="backtick">valgrind</code> on any program (try <code class="backtick">valgrind&nbsp;ls</code>); it does not require special compilation. However, the output of <code class="backtick">valgrind</code> will be more informative if you compile your program with debugging information turned on using the <code class="backtick">-g</code> or <code class="backtick">-g3</code> flags (this is also useful if you plan to watch your program running using <code class="backtick">gdb</code>, ).</p>
+<h4 id="Automated_testing"><span class="header-section-number">3.4.4.2</span> Automated testing</h4>
+<p>Unless otherwise specified, automated testing of your program will be done using the script in <code class="backtick">/c/cs223/bin/vg</code>; this runs <code class="backtick">/c/cs223/bin/valgrind</code> with the <code class="backtick">--tool=memcheck</code>, <code class="backtick">--leak-check=yes</code>, and <code class="backtick">-q</code> options, throws away your program's output, and replaces it with <code class="backtick">valgrind</code>'s output. If you have a program named <code class="backtick">./prog</code>, running <code class="backtick">/c/cs223/bin/vg&nbsp;./prog</code> should produce no output.</p>
+<h4 id="Examples_of_some_common_valgrindErrors"><span class="header-section-number">3.4.4.3</span> Examples of some common valgrind errors</h4>
+<p>Here are some examples of <code class="backtick">valgrind</code> output. In each case the example program is compiled with <code class="backtick">-g3</code> so that <code class="backtick">valgrind</code> can report line numbers from the source code.</p>
+<h5 id="Uninitialized_values"><span class="header-section-number">3.4.4.3.1</span> Uninitialized values</h5>
+<p>Consider this unfortunate program, which attempts to compare two strings, one of which we forgot to ensure was null-terminated:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">char</span> a[<span class="dv">2</span>];
+
+ a[<span class="dv">0</span>] = 'a';
+
+ <span class="kw">if</span>(!strcmp(a, <span class="st">"a"</span>)) {
+ puts(<span class="st">"a is </span><span class="ch">\"</span><span class="st">a</span><span class="ch">\"</span><span class="st">"</span>);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/valgrindErrors/uninitialized.c" class="uri">examples/valgrindErrors/uninitialized.c</a>
+</div>
+<p>Run without valgrind, we see no errors, because we got lucky and it turned out our hand-built string was null-terminated anyway:</p>
+<pre><code>$ ./uninitialized
+a is "a"</code></pre>
+<p>But <code class="backtick">valgrind</code> is not fooled:</p>
+<pre><code>$ valgrind -q ./uninitialized
+==4745== Conditional jump or move depends on uninitialised value(s)
+==4745== at 0x4026663: strcmp (mc_replace_strmem.c:426)
+==4745== by 0x8048435: main (uninitialized.c:10)
+==4745==
+==4745== Conditional jump or move depends on uninitialised value(s)
+==4745== at 0x402666C: strcmp (mc_replace_strmem.c:426)
+==4745== by 0x8048435: main (uninitialized.c:10)
+==4745==
+==4745== Conditional jump or move depends on uninitialised value(s)
+==4745== at 0x8048438: main (uninitialized.c:10)
+==4745== </code></pre>
+<p>Here we get a lot of errors, but they are all complaining about the same call to <code class="backtick">strcmp</code>. Since it's unlikely that <code class="backtick">strcmp</code>
+ itself is buggy, we have to assume that we passed some uninitialized
+location into it that it is looking at. The fix is to add an assignment <code class="backtick">a[1]&nbsp;=&nbsp;'\0'</code> so that no such location exists.</p>
+<h5 id="Bytes_definitely_lost"><span class="header-section-number">3.4.4.3.2</span> Bytes definitely lost</h5>
+<p>Here is a program that calls <code class="backtick">malloc</code> but not <code class="backtick">free</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">char</span> *s;
+
+ s = malloc(<span class="dv">26</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/valgrindErrors/missing_free.c" class="uri">examples/valgrindErrors/missing_free.c</a>
+</div>
+<p>With no extra arguments, <code class="backtick">valgrind</code> will not look for this error. But if we turn on <code class="backtick">--leak-check=yes</code>, it will complain:</p>
+<pre><code>$ valgrind -q --leak-check=yes ./missing_free
+==4776== 26 bytes in 1 blocks are definitely lost in loss record 1 of 1
+==4776== at 0x4024F20: malloc (vg_replace_malloc.c:236)
+==4776== by 0x80483F8: main (missing_free.c:9)
+==4776== </code></pre>
+<p>Here the stack trace in the output shows where the bad block was allocated: inside <code class="backtick">malloc</code> (specifically the paranoid replacement <code class="backtick">malloc</code> supplied by <code class="backtick">valgrind</code>), which was in turn called by <code class="backtick">main</code> in line 9 of <code class="backtick">missing_free.c</code>.
+ This lets us go back and look at what block was allocated in that line
+and try to trace forward to see why it wasn't freed. Sometimes this is
+as simple as forgetting to include a <code class="backtick">free</code>
+statement anywhere, but in more complicated cases it may be because I
+somehow lose the pointer to the block by overwriting the last variable
+that points to it or by embedding it in some larger structure whose
+components I forget to free individually.</p>
+<h5 id="Invalid_write_or_read_operations"><span class="header-section-number">3.4.4.3.3</span> Invalid write or read operations</h5>
+<p>These are usually operations that you do off the end of a block from <code class="backtick">malloc</code> or on a block that has already been freed.</p>
+<p>An example of the first case:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">char</span> *s;
+
+ s = malloc(<span class="dv">1</span>);
+ s[<span class="dv">0</span>] = 'a';
+ s[<span class="dv">1</span>] = '\<span class="dv">0</span>';
+
+ puts(s);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/valgrindErrors/invalid_operations.c" class="uri">examples/valgrindErrors/invalid_operations.c</a>
+</div>
+<pre><code>==7141== Invalid write of size 1
+==7141== at 0x804843B: main (invalid_operations.c:12)
+==7141== Address 0x419a029 is 0 bytes after a block of size 1 alloc'd
+==7141== at 0x4024F20: malloc (vg_replace_malloc.c:236)
+==7141== by 0x8048428: main (invalid_operations.c:10)
+==7141==
+==7141== Invalid read of size 1
+==7141== at 0x4026063: __GI_strlen (mc_replace_strmem.c:284)
+==7141== by 0x409BCE4: puts (ioputs.c:37)
+==7141== by 0x8048449: main (invalid_operations.c:14)
+==7141== Address 0x419a029 is 0 bytes after a block of size 1 alloc'd
+==7141== at 0x4024F20: malloc (vg_replace_malloc.c:236)
+==7141== by 0x8048428: main (invalid_operations.c:10)
+==7141== </code></pre>
+<p>An example of the second:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">char</span> *s;
+
+ s = malloc(<span class="dv">2</span>);
+ free(s);
+
+ s[<span class="dv">0</span>] = 'a';
+ s[<span class="dv">1</span>] = '\<span class="dv">0</span>';
+
+ puts(s);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/valgrindErrors/freed_block.c" class="uri">examples/valgrindErrors/freed_block.c</a>
+</div>
+<pre><code>==7144== Invalid write of size 1
+==7144== at 0x804846D: main (freed_block.c:13)
+==7144== Address 0x419a028 is 0 bytes inside a block of size 2 free'd
+==7144== at 0x4024B3A: free (vg_replace_malloc.c:366)
+==7144== by 0x8048468: main (freed_block.c:11)
+==7144==
+==7144== Invalid write of size 1
+==7144== at 0x8048477: main (freed_block.c:14)
+==7144== Address 0x419a029 is 1 bytes inside a block of size 2 free'd
+==7144== at 0x4024B3A: free (vg_replace_malloc.c:366)
+==7144== by 0x8048468: main (freed_block.c:11)
+==7144==
+==7144== Invalid read of size 1
+==7144== at 0x4026058: __GI_strlen (mc_replace_strmem.c:284)
+==7144== by 0x409BCE4: puts (ioputs.c:37)
+==7144== by 0x8048485: main (freed_block.c:16)
+[... more lines of errors deleted ...]</code></pre>
+<p>In both cases the problem is that we are operating on memory that is
+not guaranteed to be allocated to us. For short programs like these, we
+might get lucky and have the program work anyway. But we still want to
+avoid bugs like this because we might not get lucky.</p>
+<p>How do we know which case is which? If I write off the end of an existing block, I'll see something like <code class="backtick">Address&nbsp;0x419a029&nbsp;is&nbsp;0&nbsp;bytes&nbsp;after&nbsp;a&nbsp;block&nbsp;of&nbsp;size&nbsp;1&nbsp;alloc'd</code>,
+ telling me that I am working on an address after a block that is still
+allocated. When I try to write to a freed block, the message changes to <code class="backtick">Address&nbsp;0x419a029&nbsp;is&nbsp;1&nbsp;bytes&nbsp;inside&nbsp;a&nbsp;block&nbsp;of&nbsp;size&nbsp;2&nbsp;free'd</code>, where the <code class="backtick">free'd</code>
+ part tells me I freed something I probably shouldn't have. Fixing the
+first class of bugs is usually just a matter of allocating a bigger
+block (but don't just do this without figuring out <em>why</em> you need
+ a bigger block, or you'll just be introducing random mutations into
+your code that may cause other problems elsewhere). Fixing the second
+class of bugs usually involves figuring out why you freed this block
+prematurely. In some cases you may need to re-order what you are doing
+so that you don't free a block until you are completely done with it.</p>
+<h3 id="Not_recommended:_debugging_output"><span class="header-section-number">3.4.5</span> Not recommended: debugging output</h3>
+<p>A tempting but usually bad approach to debugging is to put lots of <code class="backtick">printf</code> statements in your code to show what is going on. The problem with this compared to using <code class="backtick">assert</code> is that there is no built-in test to see if the output is actually what you'd expect. The problem compared to <code class="backtick">gdb</code>
+ is that it's not flexible: you can't change your mind about what is
+getting printed out without editing the code. A third problem is that
+the output can be misleading: in particular, <code class="backtick">printf</code>
+ output is usually buffered, which means that if your program dies
+suddenly there may be output still in the buffer that is never flushed
+to <code class="backtick">stdout</code>. This can be very confusing, and can lead you to believe that your program fails earlier than it actually does.</p>
+<p>If you really need to use <code class="backtick">printf</code> or something like it for debugging output, here are a few rules of thumb to follow to mitigate the worst effects:</p>
+<ol style="list-style-type: decimal">
+<li>Use <code class="backtick">fprintf(stderr,&nbsp;...)</code> instead of <code class="backtick">printf(...)</code>;
+ this allows you to redirect your program's regular output somewhere
+that keeps it separate from the debugging output (but beware of
+misleading interleaving of the two streams—buffering may mean that
+output to <code class="backtick">stdout</code> and <code class="backtick">stderr</code> appears to arrive out of order). It also helps that output to <code class="backtick">stderr</code> is usually unbuffered, avoiding the problem of lost output.</li>
+<li>If you must output to <code class="backtick">stdout</code>, put <code class="backtick">fflush(stdout)</code> after any output operation you suspect is getting lost in the buffer. The <code class="backtick">fflush</code> function forces any buffered output to be emitted immediately.</li>
+<li>Keep all arguments passed to <code class="backtick">printf</code> as simple as possible and beware of faults in your debugging code itself. If you write <code class="backtick">printf("a[key]&nbsp;==&nbsp;%d\n",&nbsp;a[key])</code> and <code class="backtick">key</code> is some bizarre value, you will never see the result of this <code class="backtick">printf</code> because your program will segfault while evaluating <code class="backtick">a[key]</code>. Naturally, this is more likely to occur if the argument is <code class="backtick">a[key]-&gt;size[LEFTOVERS].cleanupFunction(a[key])</code> than if it's just <code class="backtick">a[key]</code>,
+ and if it happens it will be harder to figure out where in this complex
+ chain of array indexing and pointer dereferencing the disaster
+happened. Better is to wait for your program to break in <code class="backtick">gdb</code>, and use the <code class="backtick">print</code>
+ statement on increasingly large fragments of the offending expression
+to see where the bogus array index or surprising null pointer is hiding.</li>
+<li>Wrap your debugging output in an <code>#ifdef</code> so you can turn it on and off easily.</li>
+</ol>
+<p>Bearing in mind that this is a bad idea, here is an example of how one might do it as well as possible:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="co">/* initialize the application */</span>
+<span class="dt">void</span>
+init(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> x;
+
+ x = *((<span class="dt">int</span> *) <span class="bn">0xbad1dea</span>); <span class="co">/* if we are lucky, maybe the optimizer will remove it? */</span>
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ init();
+
+<span class="ot">#ifdef DEBUGGING_OUTPUT</span>
+ <span class="co">/*</span>
+<span class="co"> * this type of debugging output is not recommended</span>
+<span class="co"> * but if you do it anyway:</span>
+<span class="co"> *</span>
+<span class="co"> * 1. Use stderr, which flushes automatically.</span>
+<span class="co"> * 2. Be wary of buffered data on stdout.</span>
+<span class="co"> * 3. Wrap your debugging statement in an #ifdef,</span>
+<span class="co"> * so it is not active by default.</span>
+<span class="co"> */</span>
+ fputs(<span class="st">"Returned from init() in main()</span><span class="ch">\n</span><span class="st">"</span>, stderr);
+<span class="ot">#endif</span>
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/debugging/usingPrintf.c" class="uri">examples/debugging/usingPrintf.c</a>
+</div>
+<p>Note that we get much more useful information if we run this under <code>gdb</code> (which will stop exactly on the bad line in <code>init</code>), but not seeing the result of the <code>fputs</code> at least tells us something.</p>
+<h2 id="performanceTuning"><span class="header-section-number">3.5</span> Performance tuning</h2>
+<p>Chapter 7 of Kernighan and Pike, <em>The Practice of Programming</em>
+ (Addison-Wesley, 1998) gives an excellent overview of performance
+tuning. This page will be limited to some Linux-specific details and an
+example.</p>
+<h3 id="Timing_under_Linux"><span class="header-section-number">3.5.1</span> Timing under Linux</h3>
+<p>Use <code class="backtick">time</code>, e.g.</p>
+<pre><code>$ time wc /usr/share/dict/words
+ 45378 45378 408865 /usr/share/dict/words
+
+real 0m0.010s
+user 0m0.006s
+sys 0m0.004s</code></pre>
+<p>This measures "real time" (what it sounds like), "user time" (the
+amount of time the program runs), and "system time" (the amount of time
+the operating system spends supporting your program, e.g. by loading it
+from disk and doing I/O). Real time need not be equal to the sum of user
+ time and system time, since the operating system may be simultaneously
+running other programs.</p>
+<p>Particularly for fast programs, times can vary from one execution to the next, e.g.</p>
+<pre><code>$ time wc /usr/share/dict/words
+ 45378 45378 408865 /usr/share/dict/words
+
+real 0m0.009s
+user 0m0.008s
+sys 0m0.001s
+$ time wc /usr/share/dict/words
+ 45378 45378 408865 /usr/share/dict/words
+
+real 0m0.009s
+user 0m0.007s
+sys 0m0.002s</code></pre>
+<p>This arises because of measurement errors and variation in how long
+different operations take. But usually the variation will not be much.</p>
+<p>Note also that <code class="backtick">time</code> is often a builtin operation of your shell, so the output format may vary depending on what shell you use.</p>
+<h3 id="profiling"><span class="header-section-number">3.5.2</span> Profiling with gprof</h3>
+<p>The problem with <code class="backtick">time</code> is that it only
+tells you how much time your whole program took, but not where it spent
+its time. This is similar to looking at a program without a debugger:
+you can't see what's happening inside. If you want to see where your
+program is spending its time, you need to use a profiler.</p>
+<p>For example, here's a short but slow program for calculating the number of primes less than some limit passed as <code class="backtick">argv[1]</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="co">/* return 1 if n is prime, 0 otherwise */</span>
+<span class="dt">int</span>
+isPrime(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> factor;
+
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ <span class="co">/* else */</span>
+ <span class="kw">for</span>(factor = <span class="dv">2</span>; factor &lt; n; factor++) {
+ <span class="kw">if</span>(n % factor == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">return</span> <span class="dv">1</span>;
+}
+
+<span class="co">/* return number of primes &lt; n */</span>
+<span class="dt">int</span>
+countPrimes(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> count;
+
+ count = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ <span class="kw">if</span>(isPrime(i)) count++;
+ }
+
+ <span class="kw">return</span> count;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">if</span>(argc != <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s n</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, countPrimes(atoi(argv[<span class="dv">1</span>])));
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/profiling/countPrimes.c" class="uri">examples/profiling/countPrimes.c</a>
+</div>
+<p>And now we'll time <code class="backtick">countPrimes&nbsp;100000</code>:</p>
+<pre><code>$ c99 -g3 -o countPrimes countPrimes.c
+$ time ./countPrimes 100000
+9592
+
+real 0m4.711s
+user 0m4.608s
+sys 0m0.004s</code></pre>
+<p>This shows that the program took just under five seconds of real
+time, of which most was spent in user mode and a very small fraction was
+ spent in kernel (sys) mode. The user-mode part corresponds to the code
+we wrote and any library routines we call that don't require special
+privileges from the operation system. The kernel-mode part will mostly
+be I/O (not much in this case). Real time is generally less useful than
+CPU time, because it depends on how loaded the CPU is. Also, none of
+these times are especially precise, because the program only gets
+charged for time on a context switch (when it switches between user and
+kernel mode or some other program takes over the CPU for a bit) or when
+the kernel decides to see what it is up to (typically every 10
+milliseconds).</p>
+<p>The overall cost is not too bad, but the reason I picked 100000 and
+not some bigger number was that it didn't terminate fast enough for
+larger inputs. We'd like to see why it is taking so long, to have some
+idea what to try to speed up. So we'll compile it with the <code class="backtick">-pg</code> option to <code class="backtick">gcc</code>, which inserts <strong>profiling</strong> code that counts how many times each function is called and how long (on average) each call takes.</p>
+<p>Because the profile is not very smart about shared libraries, we also including the <code>--static</code>
+ option to force the resulting program to be statically linked. This
+means that all the code that is used by the program is baked into the
+executable instead of being linked in at run-time. (Normally we don't do
+ this because it makes for big executables and big running programs,
+since statically-linked libraries can't be shared between more than one
+running program.)</p>
+<pre><code>$ c99 -pg --static -g3 -o countPrimes countPrimes.c
+$ time ./countPrimes 100000
+9592
+
+real 0m4.723s
+user 0m4.668s
+sys 0m0.000s</code></pre>
+<p>Hooray! We've made the program slightly slower. But we also just produced a file <code class="backtick">gmon.out</code> that we can read with <code class="backtick">gprof</code>. Note that we have to pass the name of the program so that <code>gprof</code> can figure out which executable generated <code>gmon.out</code>.</p>
+<pre><code>$ gprof countPrimes
+Flat profile:
+
+Each sample counts as 0.01 seconds.
+ % cumulative self self total
+ time seconds seconds calls s/call s/call name
+100.00 4.66 4.66 100000 0.00 0.00 isPrime
+ 0.00 4.66 0.00 1 0.00 4.66 countPrimes
+ 0.00 4.66 0.00 1 0.00 4.66 main
+
+[...much explanatory text deleted]</code></pre>
+<p>It looks like we are spending all of our time in <code>isPrime</code>, at least if we read the columns on the left. The per-call columns are not too helpful because of granularity: <code>isPrime</code>
+ is too fast for the profiler to wake up and detect how long it runs
+for. The total columns are less suspicious because they are obtained by
+sampling: from time to time, the profiler looks and sees what function
+it's in, and charges each function a fraction of the total CPU time
+proportional to how often it gets sampled. So we probable aren't really
+spending zero time in <code>countPrimes</code> and <code>main</code>, but the amount of time we do spend is small enough not to be detected.</p>
+<p>This is handy because it means we don't need to bother trying to
+speed up the rest of the program. We have two things we can try:</p>
+<ol style="list-style-type: decimal">
+<li>Call <code>isPrime</code> less.</li>
+<li>Make <code>isPrime</code> faster.</li>
+</ol>
+<p>Let's start by seeing if we can make <code>isPrime</code> faster.</p>
+<p>What <code class="backtick">isPrime</code> is doing is testing if a number <code class="backtick">n</code> is prime by the most direct way possible: dividing by all numbers less than <code class="backtick">n</code> until it finds a factor. That's a lot of divisions: if <code class="backtick">n</code> is indeed prime, it's linear in <code class="backtick">n</code>. Since division is a relatively expensive operation, the first thing to try is to get rid of some.</p>
+<p>Here's a revised version of <code class="backtick">isPrime</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return 1 if n is prime, 0 otherwise */</span>
+<span class="dt">int</span>
+isPrime(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> factor;
+
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) { <span class="kw">return</span> <span class="dv">0</span>; }
+ <span class="kw">if</span>(n % <span class="dv">2</span> == <span class="dv">0</span>) {
+ <span class="co">/* special case for the only even prime */</span>
+ <span class="kw">return</span> n == <span class="dv">2</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">for</span>(factor = <span class="dv">3</span>; factor &lt; n; factor+=<span class="dv">2</span>) {
+ <span class="kw">if</span>(n % factor == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">return</span> <span class="dv">1</span>;
+}</code></pre></div>
+<p><a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/profiling/countPrimesSkipEvenFactors.c" class="uri">examples/profiling/countPrimesSkipEvenFactors.c</a></p>
+<p>The trick is to check first if <code class="backtick">n</code> is divisible by <code class="backtick">2</code>,
+ and only test odd potential factors thereafter. This requires some
+extra work to handle 2, but maybe the extra code complexity will be
+worth it.</p>
+<p>Let's see how the timing goes:</p>
+<pre><code>$ c99 -pg --static -g3 -o countPrimes ./countPrimesSkipEvenFactors.c
+$ time ./countPrimes 100000
+9592
+
+real 0m2.608s
+user 0m2.400s
+sys 0m0.004s
+$ gprof countPrimes
+Flat profile:
+
+Each sample counts as 0.01 seconds.
+ % cumulative self self total
+ time seconds seconds calls s/call s/call name
+100.00 2.29 2.29 100000 0.00 0.00 isPrime
+ 0.00 2.29 0.00 1 0.00 2.29 countPrimes
+ 0.00 2.29 0.00 1 0.00 2.29 main
+
+[...]</code></pre>
+<p>Twice as fast! And the answer is still the same, too—this is important.</p>
+<p>Can we test even fewer factors? Suppose <code class="backtick">n</code> has a non-trivial factor <code class="backtick">x</code>. Then <code class="backtick">n</code> equals <code class="backtick">x*y</code> for some <code class="backtick">y</code> which is also nontrivial. One of <code class="backtick">x</code> or <code class="backtick">y</code> will be no bigger than the square root of <code class="backtick">n</code>. So perhaps we can stop when we reach the square root of <code class="backtick">n</code>,</p>
+<p>Let's try it:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;math.h&gt;</span>
+
+<span class="co">/* return 1 if n is prime, 0 otherwise */</span>
+<span class="dt">int</span>
+isPrime(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> factor;
+
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) { <span class="kw">return</span> <span class="dv">0</span>; }
+ <span class="kw">if</span>(n % <span class="dv">2</span> == <span class="dv">0</span>) {
+ <span class="co">/* special case for the only even prime */</span>
+ <span class="kw">return</span> n == <span class="dv">2</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">for</span>(factor = <span class="dv">3</span>; factor &lt; sqrt(n)+<span class="dv">1</span>; factor+=<span class="dv">2</span>) {
+ <span class="kw">if</span>(n % factor == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">return</span> <span class="dv">1</span>;
+}
+}</code></pre></div>
+<p><a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/profiling/countPrimesSqrt.c" class="uri">examples/profiling/countPrimesSqrt.c</a></p>
+<p>I added <code class="backtick">+1</code> to the return value of <code class="backtick">sqrt</code> both to allow for <code>factor</code> to be equal to the square root of <code>n</code>, and because the output of <code class="backtick">sqrt</code> is not exact, and it would be embarrassing if I announced that 25 was prime because I stopped at 4.9999999997.</p>
+<p>Using the math library not only requires including <code class="backtick">&lt;math.h&gt;</code> but also requires compiling with the <code class="backtick">-lm</code> flag after all <code class="backtick">.c</code> or <code class="backtick">.o</code> files, to link in the library routines:</p>
+<pre><code>$ c99 -pg --static -g3 -o countPrimes ./countPrimesSqrt.c -lm
+$ time ./countPrimes 1000000
+78498
+
+real 0m1.008s
+user 0m0.976s
+sys 0m0.000s
+$ gprof countPrimes
+Flat profile:
+
+Each sample counts as 0.01 seconds.
+ % cumulative self self total
+ time seconds seconds calls ms/call ms/call name
+ 50.00 0.02 0.02 100000 0.00 0.00 isPrime
+ 50.00 0.04 0.02 __sqrt_finite
+ 0.00 0.04 0.00 1 0.00 20.00 countPrimes
+ 0.00 0.04 0.00 1 0.00 20.00 main
+
+[...]</code></pre>
+<p>Whoosh!</p>
+<p>Can we optimize further? Let's see what happens on a bigger input:</p>
+<pre><code>$ time ./countPrimes 1000000
+78498
+
+real 0m0.987s
+user 0m0.960s
+sys 0m0.000s
+$ gprof countPrimes
+Flat profile:
+
+Each sample counts as 0.01 seconds.
+ % cumulative self self total
+ time seconds seconds calls ms/call ms/call name
+ 51.04 0.49 0.49 __sqrt_finite
+ 44.79 0.92 0.43 1000000 0.00 0.00 isPrime
+ 3.65 0.96 0.04 sqrt
+ 0.52 0.96 0.01 1 5.00 435.00 main
+ 0.00 0.96 0.00 1 0.00 430.00 countPrimes
+
+[...]</code></pre>
+<p>This is still very good, although we're spending a lot of time in <code>sqrt</code> (more specifically, its internal helper routine <code>__sqrt_finite</code>). Can we do better?</p>
+<p>Maybe moving the <code class="backtick">sqrt</code> out of the loop in <code class="backtick">isPrime</code> will make a difference:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return 1 if n is prime, 0 otherwise */</span>
+<span class="dt">int</span>
+isPrime(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> factor;
+ <span class="dt">int</span> sqrtValue;
+
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) { <span class="kw">return</span> <span class="dv">0</span>; }
+ <span class="kw">if</span>(n % <span class="dv">2</span> == <span class="dv">0</span>) {
+ <span class="co">/* special case for the only even prime */</span>
+ <span class="kw">return</span> n == <span class="dv">2</span>;
+ }
+ <span class="co">/* else */</span>
+ sqrtValue = sqrt(n) + <span class="dv">1</span>;
+ <span class="kw">for</span>(factor = <span class="dv">3</span>; factor &lt; sqrtValue; factor+=<span class="dv">2</span>) {
+ <span class="kw">if</span>(n % factor == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">return</span> <span class="dv">1</span>;
+}</code></pre></div>
+<p><a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/profiling/countPrimesSqrtOutsideLoop.c" class="uri">examples/profiling/countPrimesSqrtOutsideLoop.c</a></p>
+<pre><code>$ c99 -pg --static -g3 -o countPrimes ./countPrimesSqrtOutsideLoop.c -lm
+$ time ./countPrimes 1000000
+78498
+
+real 0m0.413s
+user 0m0.392s
+sys 0m0.000s
+$ gprof countPrimes
+Flat profile:
+
+Each sample counts as 0.01 seconds.
+ % cumulative self self total
+ time seconds seconds calls ms/call ms/call name
+ 97.44 0.38 0.38 1000000 0.00 0.00 isPrime
+ 2.56 0.39 0.01 1 10.00 390.00 countPrimes
+ 0.00 0.39 0.00 1 0.00 390.00 main
+
+[...]</code></pre>
+<p>This worked! We are now spending almost so little time in <code>sqrt</code> that the profiler doesn't notice it.</p>
+<p>What if we get rid of the call to <code class="backtick">sqrt</code> and test if <code class="backtick">factor&nbsp;*&nbsp;factor&nbsp;&lt;=&nbsp;n</code> instead? This way we could dump the math library:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return 1 if n is prime, 0 otherwise */</span>
+<span class="dt">int</span>
+isPrime(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> factor;
+
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) { <span class="kw">return</span> <span class="dv">0</span>; }
+ <span class="kw">if</span>(n % <span class="dv">2</span> == <span class="dv">0</span>) {
+ <span class="co">/* special case for the only even prime */</span>
+ <span class="kw">return</span> n == <span class="dv">2</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">for</span>(factor = <span class="dv">3</span>; factor*factor &lt;= n; factor+=<span class="dv">2</span>) {
+ <span class="kw">if</span>(n % factor == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">return</span> <span class="dv">1</span>;
+}</code></pre></div>
+<p><a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/profiling/countPrimesSquaring.c" class="uri">examples/profiling/countPrimesSquaring.c</a></p>
+<pre><code>$ c99 -pg --static -g3 -o countPrimes ./countPrimesSquaring.c
+$ time ./countPrimes 1000000
+78498
+
+real 0m0.450s
+user 0m0.428s
+sys 0m0.000s</code></pre>
+<p>This is slower, but not much slower. We might need to decide how much
+ we care about avoiding floating-point computation in our program.</p>
+<p>At this point we could decide that <code class="backtick">countPrimes</code>
+ is fast enough, or maybe we could look for further improvements, say,
+by testing out many small primes at the beginning instead of just <code class="backtick">2</code>, calling <code class="backtick">isPrime</code> only on odd values of <code class="backtick">i</code>,
+ or reading a computational number theory textbook to find out how we
+ought to be doing this. A reasonable strategy for code for your own use
+is often to start running one version and make improvements on a
+separate copy while it's running. If the first version terminates before
+ you are done writing new code, it's probably fast enough.</p>
+<h4 id="effect-of-optimization-during-compilation"><span class="header-section-number">3.5.2.1</span> Effect of optimization during compilation</h4>
+<p>We didn't use any optimization flags for this example, because the
+optimizer can do a lot of rewriting that can make the output of the
+profiler confusing. For example, at high optimization levels, the
+compiler will often avoid function-call overhead by inserting the body
+of a helper function directly into its caller. But this can make a big
+difference in performance, so in real life you will want to compile with
+ optimization turned on. Here's how the performance of <code>countPrimes 100000</code> is affected by optimization level:</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">Version</th>
+<th align="right">No optimization</th>
+<th align="right">With -O1</th>
+<th align="right">With -O2</th>
+<th align="right">With -O3</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><code>countPrimes.c</code></td>
+<td align="right">4.600</td>
+<td align="right">4.060</td>
+<td align="right">3.928</td>
+<td align="right">3.944</td>
+</tr>
+<tr class="even">
+<td align="left"><code>countPrimesSkipEvenFactors.c</code></td>
+<td align="right">2.260</td>
+<td align="right">1.948</td>
+<td align="right">1.964</td>
+<td align="right">1.984</td>
+</tr>
+<tr class="odd">
+<td align="left"><code>countPrimesSqrt.c</code></td>
+<td align="right">0.036</td>
+<td align="right">0.028</td>
+<td align="right">0.028</td>
+<td align="right">0.028</td>
+</tr>
+<tr class="even">
+<td align="left"><code>countPrimesSqrtOutsideLoop.c</code></td>
+<td align="right">0.012</td>
+<td align="right">0.012</td>
+<td align="right">0.008</td>
+<td align="right">0.008</td>
+</tr>
+<tr class="odd">
+<td align="left"><code>countPrimesSquaring.c</code></td>
+<td align="right">0.012</td>
+<td align="right">0.012</td>
+<td align="right">0.008</td>
+<td align="right">0.012</td>
+</tr>
+</tbody>
+</table>
+<p>In each case, the reported time is the sum of user and system time in seconds.<a href="#fn3" class="footnoteRef" id="fnref3"><sup>3</sup></a></p>
+<p>For the smarter routines, more optimization doesn't necessarily help,
+ although some of this may be experimental error since I was too lazy to
+ get a lot of samples by running each program more than once, and the
+times for the faster programs are so small that granularity is going to
+be an issue.</p>
+<p>Here's the same table using <code>countPrimes 10000000</code> on the three fastest programs:</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">Version</th>
+<th align="right">No optimization</th>
+<th align="right">With -O1</th>
+<th align="right">With -O2</th>
+<th align="right">With -O3</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><code>countPrimesSqrt.c</code></td>
+<td align="right">24.236</td>
+<td align="right">18.840</td>
+<td align="right">18.720</td>
+<td align="right">18.564</td>
+</tr>
+<tr class="even">
+<td align="left"><code>countPrimesSqrtOutsideLoop.c</code></td>
+<td align="right">9.388</td>
+<td align="right">9.364</td>
+<td align="right">9.368</td>
+<td align="right">9.360</td>
+</tr>
+<tr class="odd">
+<td align="left"><code>countPrimesSquaring.c</code></td>
+<td align="right">9.748</td>
+<td align="right">9.248</td>
+<td align="right">9.236</td>
+<td align="right">9.160</td>
+</tr>
+</tbody>
+</table>
+<p>Again there are the usual caveats that I am a lazy person and should
+probably be doing more do deal with sampling and granularity issues, but
+ if you believe these numbers, we actually win by going to <code>countPrimesSquaring</code> once the optimizer is turned on. I suspect that it is benefiting from <a href="http://en.wikipedia.org/wiki/Strength_reduction">strength reduction</a>, which would generate the product <code>factor*factor</code> in <code>isPrime</code> incrementally using addition rather than multiplying from scratch each time.</p>
+<p>It's also worth noting that the optimizer works better if we leave a lot of easy optimization lying around. For <code>countPrimesSqrt.c</code>, my guess is that most of the initial gains are from avoiding function call overhead on <code>sqrt</code>
+ by compiling it in-line. But even the optimizer is not smart enough to
+recognize that we are computing the same value over and over again, so
+we still win by pulling <code>sqrt</code> out of the loop in <code>countPrimesSqrtOutsideLoop.c</code>.</p>
+<p>If I wanted to see if my guesses about the optimizer were correct, there I could use <code>gcc -S</code> and look at the assembler code. But see earlier comments about laziness.</p>
+<h2 id="versionControl"><span class="header-section-number">3.6</span> Version control</h2>
+<p>When you are programming, you will make mistakes. If you program long
+ enough, these will eventually include true acts of boneheadedness like
+accidentally deleting all of your source files. You are also likely to
+spend some of your time trying out things that don't work, at the end of
+ which you'd like to go back to the last version of your program that
+did work. All these problems can be solved by using a <strong>version control system</strong>.</p>
+<p>There are six respectable version control systems installed on the Zoo: <code class="backtick">rcs</code>, <code class="backtick">cvs</code>, <code class="backtick">svn</code>, <code class="backtick">bzr</code>, <code class="backtick">hg</code>, and <code class="backtick">git</code>. If you are familiar with any of them, you should use that. If you have to pick one from scratch, I recommend using <code class="backtick">git</code>. A brief summary of <code>git</code> is given below. For more details, see the tutorials available at <a href="http://git-scm.com/" class="uri">http://git-scm.com</a>.</p>
+<h3 id="Setting_up_Git"><span class="header-section-number">3.6.1</span> Setting up Git</h3>
+<p>Typically you run <code class="backtick">git</code> inside a directory that holds some project you are working on (say, <code class="backtick">hw1</code>). Before you can do anything with <code class="backtick">git</code>, you will need to create the <em>repository</em>, which is a hidden directory <code class="backtick">.git</code> that records changes to your files:</p>
+<pre><code>$ mkdir git-demo
+$ cd git-demo
+$ git init
+Initialized empty Git repository in /home/classes/cs223/class/aspnes.james.ja54/git-demo/.git/</code></pre>
+<p>Now let's create a file and add it to the repository:</p>
+<pre><code>$ echo 'int main(int argc, char **argv) { return 0; }' &gt; tiny.c
+$ git add tiny.c</code></pre>
+<p>The <code class="backtick">git&nbsp;status</code> command will tell us that Git knows about <code class="backtick">tiny.c</code>, but hasn't commited the changes to the repository yet:</p>
+<pre><code>$ git status
+# On branch master
+#
+# Initial commit
+#
+# Changes to be committed:
+# (use "git rm --cached &lt;file&gt;..." to unstage)
+#
+# new file: tiny.c
+#</code></pre>
+<p>The <code class="backtick">git&nbsp;commit</code> command will commit
+ the actual changes, along with a message saying what you did. For short
+ messages, the easiest way to do this is to include the message on the
+command line:</p>
+<pre><code>$ git commit -a -m"add very short C program"
+[master (root-commit) 5393616] add very short C program
+ Committer: James Aspnes &lt;ja54@tick.zoo.cs.yale.edu&gt;
+Your name and email address were configured automatically based
+on your username and hostname. Please check that they are accurate.
+You can suppress this message by setting them explicitly:
+
+ git config --global user.name "Your Name"
+ git config --global user.email you@example.com
+
+If the identity used for this commit is wrong, you can fix it with:
+
+ git commit --amend --author='Your Name &lt;you@example.com&gt;'
+
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+ create mode 100644 tiny.c</code></pre>
+<p>The <code class="backtick">-a</code> argument tells Git to include any changes I made to files it already knows about. The <code class="backtick">-m</code> argument sets the commit message.</p>
+<p>Because this is the first time I ever did a commit, and because I
+didn't tell Git who I was before, it complains that its guess for my
+name and email address may be wrong. It also tells me what to do to get
+it to shut up about this next time:</p>
+<pre><code>$ git config --global user.name "James Aspnes"
+$ git config --global user.email "aspnes@cs.yale.edu"
+$ git commit --amend --author="James Aspnes &lt;aspnes@cs.yale.edu&gt;" -m"add a very short C program"
+[master a44e1e1] add a very short C program
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+ create mode 100644 tiny.c</code></pre>
+<p>Note that I repeated the <code class="backtick">-m</code> business to <code class="backtick">git&nbsp;commit&nbsp;--amend</code>; if I hadn't, it would have run the default editor (<code class="backtick">vim</code>) to let me edit my commit message. If I don't like <code class="backtick">vim</code>, I can change the default using <code class="backtick">git&nbsp;config&nbsp;--global&nbsp;core.editor</code>, e.g.:</p>
+<pre><code>$ git config --global core.editor "emacs -nw"</code></pre>
+<p>I can see what commits I've done so far using <code class="backtick">git&nbsp;log</code>:</p>
+<pre><code>$ git log
+commit a44e1e195de4ce785cd95cae3b93c817d598a9ee
+Author: James Aspnes &lt;aspnes@cs.yale.edu&gt;
+Date: Thu Dec 29 20:21:21 2011 -0500
+
+ add a very short C program</code></pre>
+<h3 id="Editing_files"><span class="header-section-number">3.6.2</span> Editing files</h3>
+<p>Suppose I edit <code class="backtick">tiny.c</code> using my favorite editor to turn it into the classic hello-world program:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ puts(<span class="st">"hello, world"</span>);
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<p>I can see what files have changed using <code class="backtick">git&nbsp;status</code>:</p>
+<pre><code>$ git status
+# On branch master
+# Changed but not updated:
+# (use "git add &lt;file&gt;..." to update what will be committed)
+# (use "git checkout -- &lt;file&gt;..." to discard changes in working directory)
+#
+# modified: tiny.c
+#
+no changes added to commit (use "git add" and/or "git commit -a")</code></pre>
+<p>Notice how Git reminds me to use <code class="backtick">git&nbsp;commit&nbsp;-a</code> to include these changes in my next commit. I can also do <code class="backtick">git&nbsp;add&nbsp;tiny.c</code> if I just want include the changes to <code class="backtick">tiny.c</code> (maybe I made changes to a different file that I want to commit separately), but usually that's too much work.</p>
+<p>If I want to know the details of the changes since my last commit, I can do <code class="backtick">git&nbsp;diff</code>:</p>
+<pre><code>$ git diff
+diff --git a/tiny.c b/tiny.c
+index 0314ff1..f8d9dcd 100644
+--- a/tiny.c
++++ b/tiny.c
+@@ -1 +1,8 @@
+-int main(int argc, char **argv) { return 0; }
++#include &lt;stdio.h&gt;
++
++int
++main(int argc, char **argv)
++{
++ puts("hello, world");
++ return 0;
++}</code></pre>
+<p>Since I like these changes, I do a commit:</p>
+<pre><code>$ git commit -a -m"expand previous program to hello world"
+[master 13a73be] expand previous program to hello world
+ 1 files changed, 8 insertions(+), 1 deletions(-)</code></pre>
+<p>Now there are two commits in my log:</p>
+<pre><code>$ git log | tee /dev/null
+commit 13a73bedd3a48c173898d1afec05bd6fa0d7079a
+Author: James Aspnes &lt;aspnes@cs.yale.edu&gt;
+Date: Thu Dec 29 20:34:06 2011 -0500
+
+ expand previous program to hello world
+
+commit a44e1e195de4ce785cd95cae3b93c817d598a9ee
+Author: James Aspnes &lt;aspnes@cs.yale.edu&gt;
+Date: Thu Dec 29 20:21:21 2011 -0500
+
+ add a very short C program</code></pre>
+<h3 id="Renaming_files"><span class="header-section-number">3.6.3</span> Renaming files</h3>
+<p>You can rename a file with <code class="backtick">git&nbsp;mv</code>. This is just like regular <code class="backtick">mv</code>, except that it tells Git what you are doing.</p>
+<pre><code>$ git mv tiny.c hello.c
+$ git status
+# On branch master
+# Changes to be committed:
+# (use "git reset HEAD &lt;file&gt;..." to unstage)
+#
+# renamed: tiny.c -&gt; hello.c
+#</code></pre>
+<p>These changes don't get written to the repository unless you do another <code class="backtick">git&nbsp;commit</code>:</p>
+<pre><code>$ git commit -a -m"give better name to hello program"
+[master 6d2116c] give better name to hello program
+ 1 files changed, 0 insertions(+), 0 deletions(-)
+ rename tiny.c =&gt; hello.c (100%)</code></pre>
+<h3 id="Adding_and_removing_files"><span class="header-section-number">3.6.4</span> Adding and removing files</h3>
+<p>To add a file, create it and then call <code class="backtick">git&nbsp;add</code>:</p>
+<pre><code>$ cp hello.c goodbye.c
+$ git status
+# On branch master
+# Untracked files:
+# (use "git add &lt;file&gt;..." to include in what will be committed)
+#
+# goodbye.c
+nothing added to commit but untracked files present (use "git add" to track)
+$ git add goodbye.c
+$ git commit -a -m"we need a second program to say goodbye"
+[master 454b24c] we need a second program to say goodbye
+ 1 files changed, 8 insertions(+), 0 deletions(-)
+ create mode 100644 goodbye.c</code></pre>
+<p>To remove a file, use <code>git rm</code>:</p>
+<pre><code>$ git rm goodbye.c
+$ git status
+# On branch master
+# Changed but not updated:
+# (use "git add/rm &lt;file&gt;..." to update what will be committed)
+# (use "git checkout -- &lt;file&gt;..." to discard changes in working directory)
+#
+# deleted: goodbye.c
+#
+no changes added to commit (use "git add" and/or "git commit -a")
+$ git commit -a -m"no, goodbye.c was a bad idea"
+[master defa0e0] no, goodbye.c was a bad idea
+ 1 files changed, 0 insertions(+), 8 deletions(-)
+ delete mode 100644 goodbye.c</code></pre>
+<h3 id="Recovering_files_from_the_repository"><span class="header-section-number">3.6.5</span> Recovering files from the repository</h3>
+<p>If you make a mistake, you can back out using the repository. Here I will delete my <code class="backtick">hello.c</code> file and then get it back using <code class="backtick">git&nbsp;checkout&nbsp;--&nbsp;hello.c</code>:</p>
+<pre><code>$ rm hello.c
+$ ls
+$ git checkout -- hello.c
+$ ls
+hello.c</code></pre>
+<p>I can also get back old versions of files by putting the commit id before the <code class="backtick">--</code>:</p>
+<pre><code>$ git checkout a44e1 -- tiny.c
+$ ls
+hello.c tiny.c</code></pre>
+<p>The commit id can be any unique prefix of the ridiculously long hex name shown by <code class="backtick">git&nbsp;log</code>.</p>
+<p>Having recovered <code class="backtick">tiny.c</code>, I will keep it around by adding it to a new commit:</p>
+<pre><code>$ git commit -a -m"keep tiny.c around"
+[master 23d6219] keep tiny.c around
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+ create mode 100644 tiny.c</code></pre>
+<h3 id="Undoing_bad_commits"><span class="header-section-number">3.6.6</span> Undoing bad commits</h3>
+<p>Suppose I commit a change that I didn't want to make. For example,
+let's suppose I decide to add some punctuation to the greeting in <code class="backtick">hello.c</code> but botch my edit:</p>
+<pre><code>$ vim hello.c
+$ git commit -a -m"add exclamation point"
+[master f40d8d3] add exclamation point
+ 1 files changed, 1 insertions(+), 1 deletions(-)</code></pre>
+<p>Only now does it occur to me to test my program:</p>
+<pre><code>$ c99 -o hello hello.c
+$ ./hello
+hello, wolrd!</code></pre>
+<p>Disaster!</p>
+<p>I can use <code class="backtick">git&nbsp;diff</code> to see what went wrong. The command below compares the current working directory to <code class="backtick">HEAD^</code>, the commit before the most recent commit:<a href="#fn4" class="footnoteRef" id="fnref4"><sup>4</sup></a></p>
+<pre><code>$ git diff HEAD^ | tee /dev/null
+diff --git a/hello.c b/hello.c
+index f8d9dcd..dc227a8 100644
+--- a/hello.c
++++ b/hello.c
+@@ -3,6 +3,6 @@
+ int
+ main(int argc, char **argv)
+ {
+- puts("hello, world");
++ puts("hello, wolrd!");
+ return 0;
+ }</code></pre>
+<p>And I see my mistake leaping out at me on the new line I added (which <code class="backtick">git&nbsp;diff</code> puts a <code class="backtick">+</code> in front of). But now what do I do? I already commited the change, which means that I can't get it out of the history.<a href="#fn5" class="footnoteRef" id="fnref5"><sup>5</sup></a></p>
+<p>Instead, I use <code class="backtick">git&nbsp;revert</code> on <code class="backtick">HEAD</code>, the most recent commit:</p>
+<pre><code>$ git revert HEAD
+[master fca3166] Revert "add exclamation point"
+ 1 files changed, 1 insertions(+), 1 deletions(-)</code></pre>
+<p>(Not shown here is where it popped up a <code class="backtick">vim</code> session to let me edit the commit message; I just hit <code class="backtick">:x</code>&lt;ENTER&gt; to get out of it without changing the default.)</p>
+<p>Now everything is back to the way it was before the bad commit:</p>
+<pre><code>$ ./hello
+hello, world</code></pre>
+<h3 id="Looking_at_old_versions"><span class="header-section-number">3.6.7</span> Looking at old versions</h3>
+<p>Running <code class="backtick">git&nbsp;log</code> will now show me the entire history of my project, newest commits first:</p>
+<pre><code>fca3166a697c6d72fb9e8aec913bb8e36fb5fe4e Revert "add exclamation point"
+f40d8d386890103abacd0bf4142ecad62eed5aeb add exclamation point
+23d6219c9380ba03d9be0672f0a7b25d18417731 keep tiny.c around
+defa0e0430293ca910f077d5dd19fccc47ab0521 no, goodbye.c was a bad idea
+454b24c307121b5a597375a99a37a825b0dc7e81 we need a second program to say goodbye
+6d2116c4c72a6ff92b8b276eb88ddb556d1b8fdd give better name to hello program
+13a73bedd3a48c173898d1afec05bd6fa0d7079a expand previous program to hello world
+a44e1e195de4ce785cd95cae3b93c817d598a9ee add a very short C program</code></pre>
+<p>If I want to look at an old version (say, after I created <code class="backtick">goodbye.c</code>), I can go back to it using <code class="backtick">git&nbsp;checkout</code>:</p>
+<pre><code>$ git checkout 454b2
+Note: checking out '454b2'.
+
+You are in 'detached HEAD' state. You can look around, make experimental
+changes and commit them, and you can discard any commits you make in this
+state without impacting any branches by performing another checkout.
+
+If you want to create a new branch to retain commits you create, you may
+do so (now or later) by using -b with the checkout command again. Example:
+
+ git checkout -b new_branch_name
+
+HEAD is now at 454b24c... we need a second program to say goodbye
+$ ls
+goodbye.c hello hello.c</code></pre>
+<p>Now I have both <code class="backtick">goodbye.c</code> and <code class="backtick">hello.c</code>, as well as my compiled program <code class="backtick">hello</code>,
+ since I didn't tell Git about it. Note that I also got lots of
+horrendous warnings about the fact that I am living in the past and
+shouldn't expect to make any permanent changes here.</p>
+<p>To go back to the last commit, use <code class="backtick">git&nbsp;checkout&nbsp;master</code>:</p>
+<pre><code>$ git checkout master
+Previous HEAD position was 454b24c... we need a second program to say goodbye
+Switched to branch 'master'
+$ ls
+hello hello.c tiny.c</code></pre>
+<h3 id="More_information_about_Git"><span class="header-section-number">3.6.8</span> More information about Git</h3>
+<p>All Git commands take a <code class="backtick">--help</code> argument that brings up their manual page. There is also extensive documentation at <a href="http://git-scm.com/">http://git-scm.com</a>.</p>
+<h2 id="submitScript"><span class="header-section-number">3.7</span> Submitting assignments</h2>
+<p>The submit command is is found in <code>/c/cs223/bin</code> on the Zoo. Here is the documentation (adapted from comments in the script):</p>
+<pre><code>submit assignment-number file(s)
+unsubmit assignment-number file(s)
+check assignment-number
+makeit assignment-number [file]
+protect assignment-number file(s)
+unprotect assignment-number file(s)
+retrieve assignment-number file[s]
+testit assignment-number test
+
+The submit program can be invoked in eight different ways:
+
+ /c/cs223/bin/submit 1 Makefile tokenize.c unique.c time.log
+
+submits the named source files as your solution to Homework #1;
+
+ /c/cs223/bin/check 2
+
+lists the files that you have submitted for Homework #2;
+
+ /c/cs223/bin/unsubmit 3 error.submit bogus.solution
+
+deletes the named files that you had submitted previously for Homework #3
+(i.e., withdraws them from submission, which is useful if you accidentally
+submit the wrong file);
+
+ /c/cs223/bin/makeit 4 tokenize unique
+
+runs "make" on the files that you submitted previously for Homework #4;
+
+ /c/cs223/bin/protect 5 tokenize.c time.log
+
+protects the named files that you submitted previously for Homework #5 (so
+they cannot be deleted accidentally); and
+
+ /c/cs223/bin/unprotect 6 unique.c time.log
+
+unprotects the named files that you submitted previously for Homework #6
+(so they can be deleted); and
+
+ /c/cs223/bin/retrieve 7 Csquash.c
+
+retrieves copies of the named files that you submitted previously for Homework #7
+
+ /c/cs223/bin/testit 8 BigTest
+
+runs the test script /c/cs223/Hwk8/test.BigTest.</code></pre>
+<p>The <code>submit</code> program will only work if there is a directory with your name and login on it under <code>/c/cs223/class</code>.
+ If there is no such directory, you need to make sure that you have
+correctly signed up for CS223 using the web form. Note that it may take
+up to an hour for this directory to appear after you sign up.</p>
+<h1 id="c"><span class="header-section-number">4</span> The C programming language</h1>
+<p>The C programming language was developed at Bell Laboratories in the
+early 1970s as the system programming language for Unix, based on the
+earlier and even more primitive languages BCPL and B. When originally
+developed, it was targeted at machines that were extremely limited by
+modern standards: the first Unix implementation (and the B compiler that
+ supported it) ran on a DEC PDP-7 with only 8192 18-bit words of memory (<a href="http://cm.bell-labs.com/who/dmr/chist.html">Dennis
+ M. Ritchie, The development of the C language, in Thomas J. Bergin,
+Jr., and Richard G. Gibson, Jr., History of Programming Languages-II
+ed., ACM Press, 1996</a>). So using as few resources as possible, both in the compiler and in the resulting code, was a priority.</p>
+<p>This priority is reflected in the features (and lack of features) of
+C, and is partly responsible for its success. Programs written in C
+place almost no demands on the system they run on and give the
+programmer nearly complete control over their execution: this allows
+programs that were previously written in assembly language, like
+operating system kernels and device drivers, to be implemented in C. So C
+ is often the first language ported to any new architecture, and many
+higher-level languages are either executed using interpreters written in
+ C or use C as in intermediate language in the compilation process.</p>
+<p>Since its initial development, C has gone through four major versions:</p>
+<ul>
+<li>The original <strong>K&amp;R C</strong> defined in the 1978 first edition of Kernighan and Ritchie's book <em>The C Programming Language</em>;</li>
+<li><strong>ANSI C</strong>, from 1988, which fixed some oddities in the syntax and which is documented in the 1988 second edition of <em>The C Programming Language</em>;</li>
+<li><strong>C99</strong>, from 1999, the ISO/IEC 9899:1999 standard for
+C, which added some features from C++ and many new features for
+high-performance numerical computing;</li>
+<li><strong>C11</strong>, from 2011, the ISO/IEC 9899:2011 standard for
+C, which relaxed some of the requirements of C99 that most compilers
+hadn't bothered implementing and which added a few extra features.</li>
+</ul>
+<p>Unfortunately, C99 and C11 both exemplify the uselessness of standards committees in general and the <a href="http://www.iso.org/">ISO</a>
+ in particular. Because the ISO has no power to enforce standards on
+compiler writers, and because they will charge you CHF 198 just to look
+at the C11 standard, many compiler writers have ignored much of C99 and
+C11. In particular, Microsoft pretty much gave up on adding any features
+ after ANSI C, and support for C99 and C11 is spotty in <code>gcc</code> and <code>clang</code>,
+ the two dominant open source C compilers. So if you want to write
+portable C code, it is safest to limit yourself to features in ANSI C.</p>
+<p>For this class, we will permit you to use any feature of C99 that <code>gcc</code> supports, which also includes all features of ANSI C. You can compile with C99 support by using <code>gcc --std=c99</code> or by calling <code>gcc</code> as <code>c99</code>, as in <code>c99 -o hello hello.c</code>. Compiling with straight <code>gcc</code> will give you GNU's own peculiar dialect of C, which is basically ANSI C with some extras. For maximum portability when using <code>gcc</code>, it is safest to use <code>gcc -ansi -pedantic</code>, which expects straight ANSI C and will complain about any extensions.</p>
+<h2 id="CProgramStructure"><span class="header-section-number">4.1</span> Structure of a C program</h2>
+<p>A C program consists of one or more files (which act a little bit
+like modules in more structured programming languages, each of which
+typically contains <strong>definitions</strong> of <strong>functions</strong>, each of which consists of <strong>statements</strong>, which are either <strong>compound statements</strong> like <code>if</code>, <code>while</code>, etc. or <strong>expressions</strong> that typically perform some sort of arithmetic or call other functions. Files may also include <strong>declarations</strong>
+ of global variables (not recommended), and functions will often contain
+ declarations of local variables that can only be used inside that
+function.</p>
+<p>Here is a typical small C program that sums a range of integers.
+Since this is our first real program, it's a little heavy on the
+comments (shown between <code>/*</code> and <code>*/</code>).</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt; </span><span class="co">/* This is needed to get the declarations of fprintf and printf */</span>
+<span class="ot">#include &lt;stdlib.h&gt; </span><span class="co">/* This is needed to get the declaration of atoi */</span>
+
+<span class="co">/* Return the sum of all integers i</span>
+<span class="co"> * such that start &lt;= i and i &lt; end. */</span>
+<span class="dt">int</span>
+sumRange(<span class="dt">int</span> start, <span class="dt">int</span> end)
+{
+ <span class="dt">int</span> i; <span class="co">/* loop variable */</span>
+ <span class="dt">int</span> sum; <span class="co">/* sum of all values so far */</span>
+
+ <span class="co">/* a mathematician would use a formula for this,</span>
+<span class="co"> * but we are computer programmers! */</span>
+ sum = <span class="dv">0</span>;
+
+ <span class="co">/* The three parts of the header for this loop mean:</span>
+<span class="co"> * 1. Set i to start initially.</span>
+<span class="co"> * 2. Keep doing the loop as long as i is less than end.</span>
+<span class="co"> * 3. After each iteration, add 1 to i.</span>
+<span class="co"> */</span>
+ <span class="kw">for</span>(i = start; i &lt; end; i++) {
+ sum += i; <span class="co">/* This adds i to sum */</span>
+ }
+
+ <span class="co">/* This exits the function immediately,</span>
+<span class="co"> * sending the value of sum back to the caller. */</span>
+ <span class="kw">return</span> sum;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> start; <span class="co">/* initial value in range */</span>
+ <span class="dt">int</span> end; <span class="co">/* one past the last value in the range */</span>
+
+ <span class="co">/* This tests for the wrong number of arguments.</span>
+<span class="co"> * The != operator returns true (1) if its arguments are not equal,</span>
+<span class="co"> * and false (0) otherwise.</span>
+<span class="co"> * Note that the program name itself counts as an argument</span>
+<span class="co"> * (which is why we want the argument count to be 3)</span>
+<span class="co"> * and appears in position 0 in the argument vector</span>
+<span class="co"> * (which means we can get it using argv[0]). */</span>
+ <span class="kw">if</span>(argc != <span class="dv">3</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st"> start end"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ <span class="co">/* Convert start and end positions from strings to ints */</span>
+ start = atoi(argv[<span class="dv">1</span>]);
+ end = atoi(argv[<span class="dv">2</span>]);
+
+ <span class="co">/* Call sumRange and print the result */</span>
+ printf(<span class="st">"sumRange(%d, %d) = %d</span><span class="ch">\n</span><span class="st">"</span>, start, end, sumRange(start, end));
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sumRange.c" class="uri">examples/sumRange.c</a>
+</div>
+<p>This is what the program does if we compile and run it:</p>
+<pre><code>$ c99 -g -Wall -pedantic -o sumRange sumRange.c
+$ ./sumRange 1 100
+sumRange(1, 100) = 4950</code></pre>
+<p>The <code>sumRange.c</code> program contains two functions, <code>sumRange</code> and <code>main</code>. The <code>sumRange</code> function does the actual work, while <code>main</code>
+ is the main routine of the program that gets called with the
+command-line arguments when the program is run. Every C program must
+have a routine named <code>main</code> with these particular arguments.</p>
+<p>In addition, <code>main</code> may call three library functions, <code>fprintf</code> (which in this case is used to generate error messages), <code>printf</code> (which generates ordinary output), and <code>atoi</code>
+ (which is used to translate the command-line arguments into numerical
+values). These functions must all be declared before they can be used.
+In the case of <code>sumRange</code>, putting the definition of <code>sumRange</code> before the definition of <code>main</code> is enough. For the library routines, the <strong>include files</strong> <code>stdio.h</code> and <code>stdlib.h</code>
+ contain declarations of the functions that contain enough information
+about there return types and arguments that the compiler knows how to
+generate machine code to call them. These files are included in <code>sumRange.c</code> by the <strong>C preprocessor</strong>, which pastes in the contents of any file specified by the <code>#include</code> command, strips out any comments (delimited by <code>/*</code> and <code>*/</code>, or by <code>//</code>
+ and the end of the line if you are using C99), and does some other
+tricks that allow you to muck with the source code before the actual
+compiler sees it (see <a href="#macros">Macros</a>). You can see what the output of the preprocessor looks like by calling the C compiler with the <code>-E</code> option, as in <code>c99 -E sumRange.c</code>.</p>
+<p>The <strong>body</strong> of each function consists of some <strong>variable declarations</strong> followed by a sequence of <strong>statements</strong>
+ that tell the computer what to do. Unlike some languages, every
+variable used in a C program must be declared. A declaration specifies
+the <strong>type</strong> of a variable, which tells the compiler how
+much space to allocate for it and how to interpret some operations on
+its value. Statements may be <strong>compound statements</strong> like the <code>if</code> statement in <code>main</code> that executes its body only if the program is called with the wrong number of command-line arguments or the <code>for</code> loop in <code>sumRange</code> that executes its body as long as the test in its header remains true; or they may be <strong>simple statements</strong> that consist of a single <strong>expression</strong> followed by a semicolon.</p>
+<p>An <strong>expression</strong> is usually either a bare function call whose value is discarded (for example, the calls to <code>fprintf</code> and <code>printf</code> in <code>main</code>), or an arithmetic expression (which may include function calls, like the calls to <code>atoi</code> or in <code>main</code>) whose value is assigned to some variable using the <strong>assignment operator</strong> <code>=</code> or sometimes variants like <code>+=</code> (which is shorthand for adding a value to an existing variable: <code>x += y</code> is equivalent to <code>x = x+y</code>).</p>
+<p>When you compile a C program, after running the preprocessor, the compiler generates <strong>assembly language</strong>
+ code that is a human-readable description of the ultimate machine code
+for your target CPU. Assembly language strips out all the human-friendly
+ features of your program and reduces it to simple instructions usually
+involving moving things from one place to the other or performing a
+single arithmetic operation. For example, the C line</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> x = y + <span class="dv">1</span>; <span class="co">/* add 1 to y, store result in x */</span></code></pre></div>
+<p>gets translated into x86 assembly as</p>
+<div class="sourceCode"><pre class="sourceCode gnuassembler"><code class="sourceCode gnuassembler"> movl -<span class="dv">24</span>(%rbp), %edi
+ addl $1, %edi
+ movl %edi, -<span class="dv">28</span>(%rbp)</code></pre></div>
+<p>These three operations copy the value of <code>y</code> into the CPU register <code>%edi</code>, add 1 to the <code>%edi</code> register, and then copy the value back into <code>x</code>. This corresponds directly to what you would have to do to evaluate <code>x = y + 1</code> if you could only do one very basic operation at a time and couldn't do arithmetic operations on memory locations: fetch <code>y</code>, add 1, store <code>x</code>. Note that the CPU doesn't know about the names <code>y</code> and <code>x</code>; instead, it computes their addresses by adding -24 and -28 respectively to the base pointer register <code>%rbp</code>. This is why it can be hard to debug compiled code unless you tell the compiler to keep around extra information.</p>
+<p>For an arbitrary C program, if you are using <code>gcc</code>, you can see what your code looks like in assembly language using the <code>-S</code> option. For example, <code>c99 -S sumRange.c</code> will create a file <code>sumRange.s</code> that looks like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode gnuassembler"><code class="sourceCode gnuassembler"> <span class="kw">.file</span> <span class="st">"sumRange.c"</span>
+ <span class="kw">.text</span>
+ <span class="kw">.globl</span> sumRange
+ <span class="kw">.type</span> sumRange, <span class="co">@function</span>
+<span class="kw">sumRange:</span>
+.LFB0<span class="kw">:</span>
+ .cfi_startproc
+ pushl %ebp
+ .cfi_def_cfa_offset <span class="dv">8</span>
+ .cfi_offset <span class="dv">5</span>, -<span class="dv">8</span>
+ movl %esp, %ebp
+ .cfi_def_cfa_register <span class="dv">5</span>
+ subl $16, %esp
+ movl $0, -<span class="dv">4</span>(%ebp)
+ movl <span class="dv">8</span>(%ebp), %eax
+ movl %eax, -<span class="dv">8</span>(%ebp)
+ jmp .L2
+.L3<span class="kw">:</span>
+ movl -<span class="dv">8</span>(%ebp), %eax
+ addl %eax, -<span class="dv">4</span>(%ebp)
+ addl $1, -<span class="dv">8</span>(%ebp)
+.L2<span class="kw">:</span>
+ movl -<span class="dv">8</span>(%ebp), %eax
+ cmpl <span class="dv">12</span>(%ebp), %eax
+ jl .L3
+ movl -<span class="dv">4</span>(%ebp), %eax
+ leave
+ .cfi_restore <span class="dv">5</span>
+ .cfi_def_cfa <span class="dv">4</span>, <span class="dv">4</span>
+ ret
+ .cfi_endproc
+.LFE0<span class="kw">:</span>
+ <span class="kw">.size</span> sumRange, .-sumRange
+ <span class="kw">.section</span> <span class="kw">.rodata</span>
+.LC0<span class="kw">:</span>
+ <span class="kw">.string</span> <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st"> start end"</span>
+.LC1<span class="kw">:</span>
+ <span class="kw">.string</span> <span class="st">"sumRange(%d, %d) = %d</span><span class="ch">\n</span><span class="st">"</span>
+ <span class="kw">.text</span>
+ <span class="kw">.globl</span> main
+ <span class="kw">.type</span> main, <span class="co">@function</span>
+<span class="kw">main:</span>
+.LFB1<span class="kw">:</span>
+ .cfi_startproc
+ pushl %ebp
+ .cfi_def_cfa_offset <span class="dv">8</span>
+ .cfi_offset <span class="dv">5</span>, -<span class="dv">8</span>
+ movl %esp, %ebp
+ .cfi_def_cfa_register <span class="dv">5</span>
+ andl $-<span class="dv">16</span>, %esp
+ subl $32, %esp
+ cmpl $3, <span class="dv">8</span>(%ebp)
+ je .L6
+ movl <span class="dv">12</span>(%ebp), %eax
+ movl (%eax), %edx
+ movl stderr, %eax
+ movl %edx, <span class="dv">8</span>(%esp)
+ movl $.LC0, <span class="dv">4</span>(%esp)
+ movl %eax, (%esp)
+ call fprintf
+ movl $1, %eax
+ jmp .L7
+.L6<span class="kw">:</span>
+ movl <span class="dv">12</span>(%ebp), %eax
+ addl $4, %eax
+ movl (%eax), %eax
+ movl %eax, (%esp)
+ call atoi
+ movl %eax, <span class="dv">24</span>(%esp)
+ movl <span class="dv">12</span>(%ebp), %eax
+ addl $8, %eax
+ movl (%eax), %eax
+ movl %eax, (%esp)
+ call atoi
+ movl %eax, <span class="dv">28</span>(%esp)
+ movl <span class="dv">28</span>(%esp), %eax
+ movl %eax, <span class="dv">4</span>(%esp)
+ movl <span class="dv">24</span>(%esp), %eax
+ movl %eax, (%esp)
+ call sumRange
+ movl %eax, <span class="dv">12</span>(%esp)
+ movl <span class="dv">28</span>(%esp), %eax
+ movl %eax, <span class="dv">8</span>(%esp)
+ movl <span class="dv">24</span>(%esp), %eax
+ movl %eax, <span class="dv">4</span>(%esp)
+ movl $.LC1, (%esp)
+ call printf
+ movl $0, %eax
+.L7<span class="kw">:</span>
+ leave
+ .cfi_restore <span class="dv">5</span>
+ .cfi_def_cfa <span class="dv">4</span>, <span class="dv">4</span>
+ ret
+ .cfi_endproc
+.LFE1<span class="kw">:</span>
+ <span class="kw">.size</span> main, .-main
+ <span class="kw">.ident</span> <span class="st">"GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"</span>
+ <span class="kw">.section</span> .note.GNU-stack,<span class="st">""</span>,<span class="co">@progbits</span></code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sumRange.s" class="uri">examples/sumRange.s</a>
+</div>
+<p>You usually don't need to look at assembly language, but it can
+sometimes be enlightening to see what the compiler is doing with your
+code. One thing that I find interesting about this particular code
+(which is for the x86 architecture) is that most of the instructions are
+ <code>movl</code>, the x86 instruction for copying a 32-bit quantity
+from one location to another: most of what this program is doing is
+copying data into the places expected by the library functions it is
+calling. Also noteworthy is that the beautiful compound statements like <code>if</code> and <code>for</code> that so eloquently express the intent of the programmer get turned into a pile of jump (<code>jmp</code>) and conditional jump (<code>jl</code>, <code>je</code>) instructions, the machine code versions of the often dangerous and confusing <code>goto</code> statement. This is because CPUs are dumb: they don't know how to carry out an <code>if</code>
+ branch or a loop, and all they can do instead is be told to replace the
+ value of their program counter register with some new value instead of
+just incrementing it as they usually do.</p>
+<p>Assembly language is not the last stage in this process. The <strong>assembler</strong> (<code>as</code>) is a program that translates the assembly language in <code>sumRange.s</code> into machine code (which will be store in <code>sumRange.o</code>
+ if we aren't compiling a single program all at once). Machine code is
+not human-readable, and is close to the raw stream of bytes that gets
+stored in the computer's memory to represent a running program. The
+missing parts are that the addresses of each function and global
+variables are generally left unspecified, so that they can be moved
+around to make room for other functions and variables coming from other
+files and from system libraries. The job of stitching all of these
+pieces together, putting everything in the right place, filling in any
+placeholder addresses, and generating the <strong>executable file</strong> <code>sumRange</code> that we can actually run is given to the <strong>linker</strong> <code>ld</code>.</p>
+<p>The whole process looks like this:</p>
+<pre><code>sumRange.c (source code)
+ |
+ v
+[preprocessor (cpp)]
+ |
+ v
+preprocessed version of sumRange.c
+ |
+ v
+[compiler (gcc)]
+ |
+ v
+sumRange.s (assembly code)
+ |
+ v
+[assembler (as)]
+ |
+ v
+sumRange.o (machine code)
+ |
+ v
+[linker (ld)] &lt;- system library (glibc.a)
+ |
+ v
+sumRange (executable)</code></pre>
+<p>The good news is, you don't actually have to run all of these steps yourself; instead, <code>gcc</code> (which you may be calling as <code>c99</code>) will take care of everything for you, particularly for simple programs like <code>sumRange.c</code> that fit in a single file.</p>
+<h2 id="numericTypes"><span class="header-section-number">4.2</span> Numeric data types</h2>
+<p>All data stored inside a computer is ultimately represented as a sequence of <strong>bits</strong>, 0 or 1 values, typically organized into <strong>words</strong> consisting of several 8-bit <strong>bytes</strong>.<a href="#fn6" class="footnoteRef" id="fnref6"><sup>6</sup></a></p>
+<p>A typical desktop computer might have enough RAM to store <span class="math inline">2<sup>32</sup></span> bytes (4 gigabytes); the Zoo machines store <span class="math inline">2<sup>35</sup></span> bytes (32 gigabytes). However, the <strong>address space</strong> of a process might be much larger: on a 64-bit machine, the address space is <span class="math inline">2<sup>64</sup></span> bytes. There's no way to store <span class="math inline">2<sup>64</sup></span> different addresses in <span class="math inline">2<sup>35</sup></span> bytes of RAM; instead, a <strong>memory mapper</strong>,
+ typically built in to the CPU, translates the large addresses of the
+parts of the address space that are actually used into smaller addresses
+ corresponding to actual RAM locations. In some cases, regions of memory
+ that have not been used in a while will be <strong>swapped out</strong> to disk, leaving more RAM free for other parts of the process (or other processes). This technique is known as <strong>virtual memory</strong>
+ and is usually invisible to the programmer. The use of virtual memory
+can increase the available space beyond the size of the RAM a little
+bit, but if you try to run a process that is actively using
+significantly more space that can be stored in RAM, it will slow down
+dramatically, because disk drives are roughly ten million times slower
+than memory.</p>
+<p>The most basic kind of data represents integer values from some bounded range. C supports several <strong>integer data types</strong>,
+ varying in their size (and thus range), and whether or not they are
+considered to be signed. These are described in more detail <a href="#integerTypes">below</a>.</p>
+<p>For numerical computation, integer data types can be inconvenient. So C also supports <strong>floating-point types</strong> that consist of a fixed-size <strong>mantissa</strong>, which is essentially an integer, together with an <strong>exponent</strong> that is used to multiply the mantissa by <span class="math inline">2<sup><em>x</em></sup></span> for some <span class="math inline"><em>x</em></span>.
+ These allow very small or very large values to be represented with
+small relative error, but do not allow exact computation because of the
+limited precision of the mantissa. Floating-point types are also
+described <a href="#floatingPointTypes">below</a>.</p>
+<p>All other data is represented by converting it to either integer or
+floating-point numbers. For example, text characters in C are
+represented as small integer values, so that the character constant <code>'z'</code> representation a lower-case "z" is exactly the same as the integer constant <code>122</code> (which is the <a href="http://en.wikipedia.org/wiki/ASCII">ASCII</a> code for "z"). A string like <code>"hi there"</code>
+ is represented by a sequence of 8-bit ASCII characters, with a special 0
+ character to mark the end of the string. Strings that go beyond the
+English characters available in the ASCII encoding are typically
+represented using <a href="http://www.unicode.org/">Unicode</a> and
+encoded as sequences of bytes using a particular representation called
+UTF-8. The color of a pixel in an image might be represented as three
+8-bit integers representing the intensity of red, green, and blue in the
+ color, while an image itself might be a long sequence of such 3-byte
+RGB values. At the bottom, every operation applied to these more complex
+ data types translates into a whole lot of copies and arithmetic
+operations on individual bytes and words.</p>
+<p>From the CPU's point of view, even much of this manipulation consists
+ of operating on integers that happen to represent addresses instead of
+data. So when a C program writes a zero to the 19th entry in a sequence
+of 4-byte integers, somewhere in the implementation of this operation
+the CPU will be adding <span class="math inline">4 ⋅ 19</span> to a base
+ address for the sequence to computer where to write this value. Unlike
+many higher-level languages, C allows the program direct access to
+address computations via <strong>pointer types</strong>, which are tricky enough to get <a href="#pointers">their own chapter</a>.
+ Indeed, most of the structured types that C provides for representing
+more complicated data can best be understood as a thin layer of
+abstraction on top of pointers. We will see examples of these in later
+chapters as well.</p>
+<p>For now, we concentrate on integer and floating-point types, and on the operations that can be applied to them.</p>
+<h3 id="integerTypes"><span class="header-section-number">4.2.1</span> Integer types in C</h3>
+<p>Most variables in C programs tend to hold integer values, and indeed
+most variables in C programs tend to be the default-width integer type <code>int</code>.
+ Declaring a variable to have a particular integer type controls how
+much space is used to store the variable (any values too big to fit will
+ be truncated) and specifies that the arithmetic on the variable is done
+ using integer operations.</p>
+<h4 id="basicIntegerTypes"><span class="header-section-number">4.2.1.1</span> Basic integer types</h4>
+<p>The standard C integer types are:</p>
+<table style="width:96%;">
+<colgroup>
+<col width="37%">
+<col width="25%">
+<col width="33%">
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">Name</th>
+<th align="left">Typical size</th>
+<th align="left">Signed by default?</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><p><code>char</code></p></td>
+<td align="left"><p>8 bits</p></td>
+<td align="left"><p>unspecified</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>short</code></p></td>
+<td align="left"><p>16 bits</p></td>
+<td align="left"><p>signed</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>int</code></p></td>
+<td align="left"><p>32 bits</p></td>
+<td align="left"><p>signed</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>long</code></p></td>
+<td align="left"><p>32 bits</p></td>
+<td align="left"><p>signed</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>long long</code></p></td>
+<td align="left"><p>64 bits</p></td>
+<td align="left"><p>signed</p></td>
+</tr>
+</tbody>
+</table>
+<p>The typical size is for 32-bit architectures like the Intel i386. Some 64-bit machines might have 64-bit <code class="backtick">int</code>s and <code class="backtick">long</code>s, and some microcontrollers have 16-bit <code class="backtick">int</code>s.
+ Particularly bizarre architectures might have even wilder sizes, but
+you are not likely to see this unless you program vintage 1970s
+supercomputers. The general convention is that <code class="backtick">int</code> is the most convenient size for whatever computer you are using and should be used by default.</p>
+<p>Many compilers also support a <code class="backtick">long&nbsp;long</code>
+ type that is usually twice the length of a long (e.g. 64 bits on i386
+machines). This type was not officially added to the C standard prior to
+ C99, so it may or may not be available if you insist on following the
+ANSI specification strictly.</p>
+<p>Each of these types comes in signed and unsigned variants.<br>
+This controls the interpretation of some operations (mostly comparisons
+and shifts) and determines the range of the type: for example, an <code>unsigned char</code> holds values in the range 0 through 255 while a <code>signed char</code> holds values in the range -128 through 127, and in general an unsigned <span class="math inline"><em>n</em></span>-bit type runs from 0 through <span class="math inline">2<sup><em>n</em></sup> − 1</span> while the signed version runs from <span class="math inline">−2<sup><em>n</em> − 1</sup></span> through <span class="math inline">2<sup><em>n</em> − 1</sup> − 1</span>. The representation of signed integers uses <strong>two's-complement</strong> notation, which means that a positive value <span class="math inline"><em>x</em></span> is represented as the unsigned value <span class="math inline"><em>x</em></span> while a negative value <span class="math inline">−<em>x</em></span> is represented as the unsigned value <span class="math inline">2<sup><em>n</em></sup> − <em>x</em></span>. For example, if we had a peculiar implementation of C that used 3-bit <code>int</code>s, the binary values and their interpretation as <code>int</code> or <code>unsigned int</code> would look like this:</p>
+<table style="width:71%;">
+<colgroup>
+<col width="9%">
+<col width="29%">
+<col width="31%">
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">bits</th>
+<th align="left">as <code>unsigned int</code></th>
+<th align="left">as <code>int</code></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><p>000</p></td>
+<td align="left"><p>0</p></td>
+<td align="left"><p>0</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>001</p></td>
+<td align="left"><p>1</p></td>
+<td align="left"><p>1</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>010</p></td>
+<td align="left"><p>2</p></td>
+<td align="left"><p>2</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>011</p></td>
+<td align="left"><p>3</p></td>
+<td align="left"><p>3</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>100</p></td>
+<td align="left"><p>4</p></td>
+<td align="left"><p>-4</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>101</p></td>
+<td align="left"><p>5</p></td>
+<td align="left"><p>-3</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>110</p></td>
+<td align="left"><p>6</p></td>
+<td align="left"><p>-2</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>111</p></td>
+<td align="left"><p>7</p></td>
+<td align="left"><p>-1</p></td>
+</tr>
+</tbody>
+</table>
+<p>The reason we get one extra negative value for an unsigned integer
+type is this allows us to interpret the first bit as the sign, which
+makes life a little easier for whoever is implementing our CPU. Two
+useful features of this representation are:</p>
+<ol style="list-style-type: decimal">
+<li>We can convert freely between signed and unsigned values as long as we are in the common range of both, and</li>
+<li>Addition and subtraction work exactly the same we for both signed
+and unsigned values. For example, on our hypothetical 3-bit machine, <span class="math inline">1 + 5</span> represented as <span class="math inline">001 + 101 = 110</span> gives the same answer as <span class="math inline">1 + ( − 3)=001 + 101 = 110</span>. In the first case we interpret <span class="math inline">110</span> as <span class="math inline">6</span>, while in the second we interpret it as <span class="math inline">−2</span>, but both answers are right in their respective contexts.</li>
+</ol>
+<p>Note that in order to make this work, we can't detect overflow: when
+the CPU adds two 3-bit integers, it doesn't know if we are adding <span class="math inline">7 + 6 = 111 + 110 = 1101 = 13</span> or <span class="math inline">( − 1)+(−2)=111 + 110 = 101 = ( − 3)</span>. In both cases the result is truncated to <span class="math inline">101</span>, which gives the incorrect answer <span class="math inline">5</span> when we are adding unsigned values.</p>
+<p>This can often lead to surprising uncaught errors in C programs,
+although using more than 3 bits will make overflow less likely. It is
+usually a good idea to pick a size for a variable that is substantially
+larger than the largest value you expect the variable to hold (although
+most people just default to <code>int</code>), unless you are very short
+ on space or time (larger values take longer to read and write to
+memory, and may make some arithmetic operations take longer).</p>
+<p>Taking into account signed and unsigned versions, the full collection of integer types looks like this:</p>
+<table>
+<colgroup>
+<col width="33%">
+<col width="33%">
+<col width="33%">
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="left"><p><code>char</code></p></td>
+<td align="left"><p><code>signed char</code></p></td>
+<td align="left"><p><code>unsigned char</code></p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>short</code></p></td>
+<td align="left"></td>
+<td align="left"><p><code>unsigned short</code></p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>int</code></p></td>
+<td align="left"></td>
+<td align="left"><p><code>unsigned int</code></p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>long</code></p></td>
+<td align="left"></td>
+<td align="left"><p><code>unsigned long</code></p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>long long</code></p></td>
+<td align="left"></td>
+<td align="left"><p><code>unsigned long long</code></p></td>
+</tr>
+</tbody>
+</table>
+<p>So these are all examples of declarations of integer variables:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> i;
+ <span class="dt">char</span> c;
+ <span class="dt">signed</span> <span class="dt">char</span> temperature; <span class="co">/* degrees Celsius, only valid for Earth's surface */</span>
+ <span class="dt">long</span> netWorthInPennies;
+ <span class="dt">long</span> <span class="dt">long</span> billGatesNetWorthInPennies;
+ <span class="dt">unsigned</span> <span class="dt">short</span> shaveAndAHaircutTwoBytes;</code></pre></div>
+<p>For <code class="backtick">char</code>s, whether the character is signed <span class="math inline">( − 128…127)</span> or unsigned <span class="math inline">(0…255)</span> is at the whim of the compiler. If it matters, declare your variables as <code class="backtick">signed&nbsp;char</code> or <code class="backtick">unsigned&nbsp;char</code>. For storing actual 8-bit characters that you aren't doing arithmetic on, it shouldn't matter.</p>
+<p>There is a slight gotcha for character processing with input function like <code>getchar</code> and <code>getc</code>. These return the special value <code>EOF</code> (defined in <code>stdio.h</code> to be <span class="math inline">−1</span>) to indicate end of file. But <span class="math inline">255</span>, which represents <code>'ÿ'</code> in the ISO Latin-1 alphabet and in Unicode and which may also appear quite often in binary files, will map to <span class="math inline">−1</span> if you put it in a character. So you should store the output of these functions in an <code>int</code> if you need to test for end of file. Once you have done this test, it's OK to put the non-end-of-file character back in a <code>char</code>.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="co">/* right */</span>
+ <span class="dt">int</span> c;
+
+ <span class="kw">while</span>((c = getchar()) != EOF) {
+ putchar(c);
+ }</code></pre></div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="co">/* WRONG */</span>
+ <span class="dt">char</span> c;
+
+ <span class="kw">while</span>((c = getchar()) != EOF) { <span class="co">/* &lt;- DON'T DO THIS! */</span>
+ putchar(c);
+ }</code></pre></div>
+<h4 id="C99_fixed-width_types"><span class="header-section-number">4.2.1.2</span> C99 fixed-width types</h4>
+<p>C99 provides a <code>stdint.h</code> header file that defines integer types with known size independent of the machine architecture. So in C99, you can use <code>int8_t</code> instead of <code>signed&nbsp;char</code> to guarantee a signed type that holds exactly 8 bits, or <code>uint64_t</code> instead of <code>unsigned&nbsp;long&nbsp;long</code> to get a 64-bit unsigned integer type. The full set of types typically defined are <code>int8_t</code>, <code>int16_t</code>, <code>int32_t</code>, and <code>int64_t</code> for signed integers and <code>uint8_t</code>, <code>uint16_t</code>, <code>uint32_t</code>, and <code>uint64_t</code>
+ for unsigned integers. There are also types for integers that contain
+the fewest number of bits greater than some minimum (e.g., <code>int_least16_t</code>
+ is a signed type with at least 16 bits, chosen to minimize space) or
+that are the fastest type with at least the given number of bits (e.g., <code>int_fast16_t</code> is a signed type with at least 16 bits, chosen to minimize time). The <code>stdint.h</code> file also defines constants giving the minimum and maximum values of these and standard integer types; for example, <code>INT_MIN</code> and <code>INT_MAX</code> give the smallest and largest values that can be stored in an <code>int</code>.</p>
+<p>All of these types are defined as aliases for standard integer types using <a href="#typedef"><code>typedef</code></a>; the main advantage of using <code>stdint.h</code> over defining them yourself is that if somebody ports your code to a new architecture, <code>stdint.h</code> should take care of choosing the right types automatically. The main disadvantage is that, like many C99 features, <code>stdint.h</code>
+ is not universally available on all C compilers. Also, because these
+fixed-width types are a late addition to the language, the built-in
+routines for printing and parsing integers, as well as the mechanisms
+for specifying the size of an integer constant, are not adapted to deal
+with them.</p>
+<p>But if you do need to print or parse types defined in <code class="backtick">stdint.h</code>, the larger <code class="backtick">inttypes.h</code> header defines macros that give the corresponding format strings for <code>printf</code> and <code>scanf</code>. The <code>inttypes.h</code> file includes <code>stdint.h</code>, so you do not need to include both. Below is an example of a program that uses the various features provided by <code>inttypes.h</code> and <code>stdint.h</code>.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include &lt;inttypes.h&gt;</span>
+
+<span class="co">/* example of using fixed-width types */</span>
+
+<span class="co">/* largest value we can apply 3x+1 to without overflow */</span>
+<span class="ot">#define MAX_VALUE ((UINT64_MAX - 1) / 3)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">uint64_t</span> big;
+
+ <span class="kw">if</span>(argc != <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s number</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ <span class="co">/* parse argv[1] as a uint64_t */</span>
+ <span class="co">/* SCNu64 expands to the format string for scanning uint64_t (without the %) */</span>
+ <span class="co">/* We then rely on C concatenating adjacent string constants. */</span>
+ sscanf(argv[<span class="dv">1</span>], <span class="st">"%"</span> SCNu64, &amp;big);
+
+ <span class="co">/* do some arithmetic on big */</span>
+ <span class="kw">while</span>(big != <span class="dv">1</span>) {
+ <span class="co">/* PRIu64 expands to the format string for printing uint64_t */</span>
+ printf(<span class="st">"%"</span> PRIu64 <span class="st">"</span><span class="ch">\n</span><span class="st">"</span>, big);
+
+ <span class="kw">if</span>(big % <span class="dv">2</span> == <span class="dv">0</span>) {
+ big /= <span class="dv">2</span>;
+ } <span class="kw">else</span> <span class="kw">if</span>(big &lt;= MAX_VALUE) {
+ big = <span class="dv">3</span>*big + <span class="dv">1</span>;
+ } <span class="kw">else</span> {
+ <span class="co">/* overflow! */</span>
+ puts(<span class="st">"overflow"</span>);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+ }
+
+ puts(<span class="st">"Reached 1"</span>);
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/integerTypes/fixedWidth.c" class="uri">examples/integerTypes/fixedWidth.c</a>
+</div>
+<h3 id="sizeTypes"><span class="header-section-number">4.2.2</span> <code>size_t</code> and <code>ptrdiff_t</code></h3>
+<p>The type aliases <code>size_t</code> and <code>ptrdiff_t</code> are provided in <code>stddef.h</code> to represent the return types of the <code>sizeof</code> operator and <a href="#pointerArithmetic">pointer subtraction</a>. On a 32-bit architecture, <code>size_t</code> will be equivalent to the unsigned 32-bit integer type <code>uint32_t</code> (or just <code>unsigned int</code>) and <code>ptrdiff_t</code> will be equivalent to the signed 32-bit integer type <code>int32_t</code> (<code>int</code>). On a 64-bit architecture, <code>size_t</code> will be equivalent to <code>uint64_t</code> and <code>ptrdiff_t</code> will be equivalent to <code>int64_t</code>.</p>
+<p>The place where you will most often see <code>size_t</code> is as an argument to <a href="#malloc"><code>malloc</code></a>, where it gives the number of bytes to allocate.</p>
+<p>Because <code>stdlib.h</code> includes <code>stddef.h</code>, it is often not necessary to include <code>stddef.h</code> explicitly.</p>
+<h4 id="integerConstants"><span class="header-section-number">4.2.2.1</span> Integer constants</h4>
+<p>Constant integer values in C can be written in any of four different ways:</p>
+<ul>
+<li>In the usual decimal notation, e.g. <code class="backtick">0</code>, <code class="backtick">1</code>, <code class="backtick">-127</code>, <code class="backtick">9919291</code>, <code class="backtick">97</code>.</li>
+<li>In <strong>octal</strong> or base 8, when the leading digit is <code class="backtick">0</code>, e.g. <code class="backtick">01</code> for 1, <code class="backtick">010</code> for 8, <code class="backtick">0777</code> for 511, <code class="backtick">0141</code> for 97. Octal is not used much any more, but it is still conventional for representing Unix file permissions.</li>
+<li>In <strong>hexadecimal</strong> or base 16, when prefixed with <code class="backtick">0x</code>. The letters <code class="backtick">a</code> through <code class="backtick">f</code> are used for the digits 10 through 15. For example, <code class="backtick">0x61</code> is another way to write 97.</li>
+<li>Using a <strong>character constant</strong>, which is a single <a href="http://en.wikipedia.org/wiki/ASCII" title="WikiPedia">ASCII</a> character or an <strong>escape sequence</strong> inside single quotes. The value is the ASCII value of the character: <code class="backtick">'a'</code> is 97.<a href="#fn7" class="footnoteRef" id="fnref7"><sup>7</sup></a> Unlike languages with separate character types, C characters are identical to integers; you can (but shouldn't) calculate <span class="math inline">97<sup>2</sup></span> by writing <code class="backtick">'a'*'a'</code>. You can also store a character anywhere.</li>
+</ul>
+<p>Except for character constants, you can insist that an integer constant is unsigned or long by putting a <code class="backtick">u</code> or <code class="backtick">l</code> after it. So <code class="backtick">1ul</code> is an <code class="backtick">unsigned&nbsp;long</code> version of 1. By default integer constants are (signed) <code class="backtick">int</code>s. For <code class="backtick">long&nbsp;long</code> constants, use <code class="backtick">ll</code>, e.g., the <code class="backtick">unsigned&nbsp;long&nbsp;long</code> constant <code class="backtick">0xdeadbeef01234567ull</code>. It is also permitted to write the <code class="backtick">l</code> as <code class="backtick">L</code>, which can be less confusing if the <code class="backtick">l</code> looks too much like a <code class="backtick">1</code>.</p>
+<p>Some examples:</p>
+<table style="width:79%;">
+<colgroup>
+<col width="20%">
+<col width="58%">
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="left"><p>'a'</p></td>
+<td align="left"><p><code>int</code></p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>97</p></td>
+<td align="left"><p><code>int</code></p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>97u</p></td>
+<td align="left"><p><code>unsigned int</code></p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>0xbea00diful</p></td>
+<td align="left"><p><code>unsigned long</code>, written in hexadecimal</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>0777s</p></td>
+<td align="left"><p><code>short</code>, written in octal</p></td>
+</tr>
+</tbody>
+</table>
+<p>A curious omission is that there is no way to write a binary integer
+directly in C. So if you want to write the bit pattern 00101101, you
+will need to encode it in hexadecimal as <code>0x2d</code> (or octal as <code>055</code>). Another potential trap is that leading zeros matter: <code>012</code> is an octal value corresponding to what normal people call 10.</p>
+<h5 id="naming-constants"><span class="header-section-number">4.2.2.1.1</span> Naming constants</h5>
+<p>Having a lot of numeric constants in your program—particularly if the
+ same constant shows up in more than one place—is usually a sign of bad
+programming. There are a few constants, like 0 and 1, that make sense on
+ their own, but many constant values are either mostly arbitrary, or
+might change if the needs of the program change. It's helpful to assign
+these constants names that explain their meaning, instead of requiring
+the user to guess why there is a <code>37</code> here or an <code>0x1badd00d</code>
+ there. This is particularly important if the constants might change in
+later versions of the program, since even though you could change every <code>37</code> in your program into a <code>38</code>, this might catch other <code>37</code> values that have different intended meanings.</p>
+<p>For example, suppose that you have a function (call it <code class="backtick">getchar</code>)
+ that needs to signal that sometimes it didn't work. The usual way is to
+ return a value that the function won't normally return. Now, you could
+just tell the user what value that is:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* get a character (as an `int` ASCII code) from `stdin` */</span>
+<span class="co">/* return -1 on end of file */</span>
+<span class="dt">int</span> getchar(<span class="dt">void</span>);</code></pre></div>
+<p>and now the user can write</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">while</span>((c = getchar()) != -<span class="dv">1</span>) {
+ ...
+ }</code></pre></div>
+<p>But then somebody reading the code has to remember that <code class="backtick">-1</code> means "end of file" and not "signed version of <code class="backtick">0xff</code>" or "computer room on fire, evacuate immediately." It's much better to define a constant <code class="backtick">EOF</code> that happens to equal <code class="backtick">-1</code>, because among other things if you change the special return value from <code class="backtick">getchar</code> later then this code will still work (assuming you fixed the definition of <code class="backtick">EOF</code>):</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">while</span>((c = getchar()) != EOF) {
+ ...
+ }</code></pre></div>
+<p>So how do you declare a constant in C? The traditional approach is to
+ use the C preprocessor, the same tool that gets run before the compiler
+ to expand out <code class="backtick">#include</code> directives. To define <code class="backtick">EOF</code>, the file <code class="backtick">/usr/include/stdio.h</code> includes the text</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define EOF (-1)</span></code></pre></div>
+<p>What this means is that whenever the characters <code class="backtick">EOF</code> appear in a C program as a separate word (e.g. in <code class="backtick">1+EOF*3</code> but not in <code class="backtick">appurtenancesTherEOF</code>), then the preprocessor will replace them with the characters <code class="backtick">(-1)</code>. The parentheses around the <code class="backtick">-1</code> are customary to ensure that the <code class="backtick">-1</code> gets treated as a separate constant and not as part of some larger expression. So from the compiler's perspective, <code>EOF</code> really is <code>-1</code>, but from the programmer's perspective, it's end-of-file. This is a special case of the C preprocessor's <a href="#macros">macro</a> mechanism.</p>
+<p>In general, any time you have a non-trivial constant in a program, it should be <code class="backtick">#define</code>d.
+ Examples are things like array dimensions, special tags or return
+values from functions, maximum or minimum values for some quantity, or
+standard mathematical constants (e.g., <code class="backtick">/usr/include/math.h</code> defines <code class="backtick">M_PI</code> as pi to umpteen digits). This allows you to write</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">char</span> buffer[MAX_FILENAME_LENGTH<span class="dv">+1</span>];
+
+ area = M_PI*r*r;
+
+ <span class="kw">if</span>(status == COMPUTER_ROOM_ON_FIRE) {
+ evacuate();
+ }</code></pre></div>
+<p>instead of</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">char</span> buffer[<span class="dv">513</span>];
+
+ area = <span class="fl">3.141592319</span>*r*r;
+
+ <span class="kw">if</span>(status == <span class="dv">136</span>) {
+ evacuate();
+ }</code></pre></div>
+<p>which is just an invitation to errors (including the one in the area computation).</p>
+<p>Like <code class="backtick">typedef</code>s, <code class="backtick">#define</code>s that are intended to be globally visible are best done in header files; in large programs you will want to <code class="backtick">#include</code> them in many source files. The usual convention is to write <code class="backtick">#define</code>d names in all-caps to remind the user that they are macros and not real variables.</p>
+<h4 id="integerOperators"><span class="header-section-number">4.2.2.2</span> Integer operators</h4>
+<h5 id="Arithmetic_operators"><span class="header-section-number">4.2.2.2.1</span> Arithmetic operators</h5>
+<p>The usual <code class="backtick">+</code> (addition), <code class="backtick">-</code> (negation or subtraction), and <code class="backtick">*</code>
+ (multiplication) operators work on integers pretty much the way you'd
+expect. The only caveat is that if the result lies outside of the range
+of whatever variable you are storing it in, it will be truncated instead
+ of causing an error:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">unsigned</span> <span class="dt">char</span> c;
+
+ c = -<span class="dv">1</span>; <span class="co">/* sets c = 255 */</span>
+ c = <span class="dv">255</span> + <span class="dv">255</span>; <span class="co">/* sets c = 254 */</span>
+ c = <span class="dv">256</span> * <span class="dv">1772717</span>; <span class="co">/* sets c = 0 */</span></code></pre></div>
+<p>This can be a source of subtle bugs if you aren't careful. The usual
+giveaway is that values you thought should be large positive integers
+come back as random-looking negative integers.</p>
+<p>Division (<code class="backtick">/</code>) of two integers also truncates: <code class="backtick">2/3</code> is 0, <code class="backtick">5/3</code> is 1, etc. For positive integers it will always round down.</p>
+<p>Prior to C99, if either the numerator or denominator is negative, the
+ behavior was unpredictable and depended on what your processor chose to
+ do. In practice this meant you should never use <code class="backtick">/</code>
+ if one or both arguments might be negative. The C99 standard specified
+that integer division always removes the fractional part, effectively
+rounding toward 0; so <code class="backtick">(-3)/2</code> is <code class="backtick">-1</code>, <code class="backtick">3/-2</code> is <code class="backtick">-1</code>, and <code class="backtick">(-3)/-2</code> is <code class="backtick">1</code>.</p>
+<p>There is also a remainder operator <code class="backtick">%</code> with e.g. <code class="backtick">2%3</code> = 2, <code class="backtick">5%3</code> = 2, <code class="backtick">27&nbsp;%&nbsp;2</code> = 1, etc. The sign of the modulus is ignored, so <code class="backtick">2%-3</code> is also <code class="backtick">2</code>. The sign of the dividend carries over to the remainder: <code class="backtick">(-3)%2</code> and <code class="backtick">(-3)%(-2)</code> are both <code class="backtick">-1</code>. The reason for this rule is that it guarantees that <code class="backtick">y&nbsp;==&nbsp;x*(y/x)&nbsp;+&nbsp;y%x</code> is always true.</p>
+<h5 id="Bitwise_operators"><span class="header-section-number">4.2.2.2.2</span> Bitwise operators</h5>
+<p>In addition to the arithmetic operators, integer types support <strong>bitwise logical</strong>
+ operators that apply some Boolean operation to all the bits of their
+arguments in parallel. What this means is that the i-th bit of the
+output is equal to some operation applied to the i-th bit(s) of the
+input(s). The bitwise logical operators are <code class="backtick">~</code> (bitwise negation: used with one argument as in <code class="backtick">~0</code> for the all-1's binary value), <code class="backtick">&amp;</code>
+ (bitwise AND), '|' (bitwise OR), and '^' (bitwise XOR, i.e. sum mod 2).
+ These are mostly used for manipulating individual bits or small groups
+of bits inside larger words, as in the expression <code class="backtick">x&nbsp;&amp;&nbsp;0x0f</code>, which strips off the bottom four bits stored in <code class="backtick">x</code>.</p>
+<p>Examples:</p>
+<table style="width:83%;">
+<colgroup>
+<col width="20%">
+<col width="20%">
+<col width="20%">
+<col width="20%">
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><code>x</code></th>
+<th align="left"><code>y</code></th>
+<th align="left">expression</th>
+<th align="left">value</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><p>0011</p></td>
+<td align="left"><p>0101</p></td>
+<td align="left"><p><code>x&amp;y</code></p></td>
+<td align="left"><p>0001</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>0011</p></td>
+<td align="left"><p>0101</p></td>
+<td align="left"><p><code>x|y</code></p></td>
+<td align="left"><p>0111</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>0011</p></td>
+<td align="left"><p>0101</p></td>
+<td align="left"><p><code>x^y</code></p></td>
+<td align="left"><p>0110</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>0011</p></td>
+<td align="left"><p>0101</p></td>
+<td align="left"><p><code>~x</code></p></td>
+<td align="left"><p>1100</p></td>
+</tr>
+</tbody>
+</table>
+<p>The shift operators <code class="backtick">&lt;&lt;</code> and <code class="backtick">&gt;&gt;</code> shift the bit sequence left or right: <code class="backtick">x&nbsp;&lt;&lt;&nbsp;y</code> produces the value <span class="math inline"><em>x</em> ⋅ 2<sup><em>y</em></sup></span> (ignoring overflow); this is equivalent to shifting every bit in <code class="backtick">x</code> <code class="backtick">y</code> positions to the left and filling in <code class="backtick">y</code> zeros for the missing positions. In the other direction, <code class="backtick">x&nbsp;&gt;&gt;&nbsp;y</code> produces the value <span class="math inline">⌊<em>x</em> ⋅ 2<sup>−</sup><em>y</em>⌋</span> by shifting <code class="backtick">x</code> <code class="backtick">y</code> positions to the right. The behavior of the right shift operator depends on whether <code class="backtick">x</code>
+ is unsigned or signed; for unsigned values, it shifts in zeros from the
+ left end always; for signed values, it shifts in additional copies of
+the leftmost bit (the sign bit). This makes <code class="backtick">x&nbsp;&gt;&gt;&nbsp;y</code> have the same sign as <code class="backtick">x</code> if <code class="backtick">x</code> is signed.</p>
+<p>If <code class="backtick">y</code> is negative, it reverses the direction of the shift; so <code class="backtick">x&nbsp;&lt;&lt;&nbsp;-2</code> is equivalent to <code class="backtick">x&nbsp;&gt;&gt;&nbsp;2</code>.</p>
+<p>Examples (<code class="backtick">unsigned&nbsp;char&nbsp;x</code>):</p>
+<table style="width:78%;">
+<colgroup>
+<col width="19%">
+<col width="19%">
+<col width="19%">
+<col width="19%">
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><code>x</code></th>
+<th align="left"><code>y</code></th>
+<th align="left"><code>x &lt;&lt; y</code></th>
+<th align="left"><code>x &gt;&gt; y</code></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><p>00000001</p></td>
+<td align="left"><p>1</p></td>
+<td align="left"><p>00000010</p></td>
+<td align="left"><p>00000000</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>11111111</p></td>
+<td align="left"><p>3</p></td>
+<td align="left"><p>11111000</p></td>
+<td align="left"><p>00011111</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>10111001</p></td>
+<td align="left"><p>-2</p></td>
+<td align="left"><p>00101110</p></td>
+<td align="left"><p>11100100</p></td>
+</tr>
+</tbody>
+</table>
+<p>Examples (<code class="backtick">signed&nbsp;char&nbsp;x</code>):</p>
+<table style="width:78%;">
+<colgroup>
+<col width="19%">
+<col width="19%">
+<col width="19%">
+<col width="19%">
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left"><code>x</code></th>
+<th align="left"><code>y</code></th>
+<th align="left"><code>x &lt;&lt; y</code></th>
+<th align="left"><code>x &gt;&gt; y</code></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><p>00000001</p></td>
+<td align="left"><p>1</p></td>
+<td align="left"><p>00000010</p></td>
+<td align="left"><p>00000000</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>11111111</p></td>
+<td align="left"><p>3</p></td>
+<td align="left"><p>11111000</p></td>
+<td align="left"><p>11111111</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>10111001</p></td>
+<td align="left"><p>-2</p></td>
+<td align="left"><p>11101110</p></td>
+<td align="left"><p>11100100</p></td>
+</tr>
+</tbody>
+</table>
+<p>Shift operators are often used with bitwise logical operators to set
+or extract individual bits in an integer value. The trick is that <code class="backtick">(1&nbsp;&lt;&lt;&nbsp;i)</code> contains a 1 in the <code class="backtick">i</code>-th least significant bit and zeros everywhere else. So <code class="backtick">x&nbsp;&amp;&nbsp;(1&lt;&lt;i)</code> is nonzero if and only if <code class="backtick">x</code> has a 1 in the <code class="backtick">i</code>-th place. This can be used to print out an integer in binary format (which standard <code class="backtick">printf</code> won't do).</p>
+<p>The following program gives an example of this technique. For example, when called as <code>./testPrintBinary 123</code>, it will print <code>111010</code> followed by a newline.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="co">/* print out all bits of n */</span>
+<span class="dt">void</span>
+print_binary(<span class="dt">unsigned</span> <span class="dt">int</span> n)
+{
+ <span class="dt">unsigned</span> <span class="dt">int</span> mask = <span class="dv">0</span>;
+
+ <span class="co">/* this grotesque hack creates a bit pattern 1000... */</span>
+ <span class="co">/* regardless of the size of an unsigned int */</span>
+ mask = ~mask ^ (~mask &gt;&gt; <span class="dv">1</span>);
+
+ <span class="kw">for</span>(; mask != <span class="dv">0</span>; mask &gt;&gt;= <span class="dv">1</span>) {
+ putchar((n &amp; mask) ? '<span class="dv">1</span>' : '<span class="dv">0</span>');
+ }
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">if</span>(argc != <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s n</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ print_binary(atoi(argv[<span class="dv">1</span>]));
+ putchar(<span class="ch">'\n'</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/integerTypes/testPrintBinary.c" class="uri">examples/integerTypes/testPrintBinary.c</a>
+</div>
+<p>In the other direction, we can set the <code class="backtick">i</code>-th bit of <code class="backtick">x</code> to 1 by doing <code class="backtick">x&nbsp;|&nbsp;(1&nbsp;&lt;&lt;&nbsp;i)</code> or to 0 by doing <code class="backtick">x&nbsp;&amp;&nbsp;~(1&nbsp;&lt;&lt;&nbsp;i)</code>. See the section on <a href="#bitManipulation">bit manipulation</a>. for applications of this to build arbitrarily-large bit vectors.</p>
+<h5 id="Logical_operators"><span class="header-section-number">4.2.2.2.3</span> Logical operators</h5>
+<p>To add to the confusion, there are also three <strong>logical</strong> operators that work on the <strong>truth-values</strong> of integers, where 0 is defined to be false and anything else is defined by be true. These are <code class="backtick">&amp;&amp;</code> (logical AND), <code class="backtick">||</code>, (logical OR), and <code class="backtick">!</code> (logical NOT). The result of any of these operators is always 0 or 1 (so <code class="backtick">!!x</code>, for example, is 0 if <code class="backtick">x</code> is 0 and 1 if <code class="backtick">x</code> is anything else). The <code class="backtick">&amp;&amp;</code> and <code class="backtick">||</code>
+ operators evaluate their arguments left-to-right and ignore the second
+argument if the first determines the answer (this is the only place in C
+ where argument evaluation order is specified); so</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dv">0</span> &amp;&amp; executeProgrammer();
+ <span class="dv">1</span> || executeProgrammer();</code></pre></div>
+<p>is in a very weak sense perfectly safe code to run.</p>
+<p>Watch out for confusing <code class="backtick">&amp;</code> with <code class="backtick">&amp;&amp;</code>. The expression <code class="backtick">1&nbsp;&amp;&nbsp;2</code> evaluates to 0, but <code class="backtick">1&nbsp;&amp;&amp;&nbsp;2</code> evaluates to 1. The statement <code class="backtick">0&nbsp;&amp;&nbsp;executeProgrammer();</code> is also unlikely to do what you want.</p>
+<p>Yet another logical operator is the <strong>ternary operator</strong> <code class="backtick">?:</code>, where <code class="backtick">x&nbsp;?&nbsp;y&nbsp;:&nbsp;z</code> equals the value of <code class="backtick">y</code> if <code class="backtick">x</code> is nonzero and <code class="backtick">z</code> if <code class="backtick">x</code> is zero. Like <code class="backtick">&amp;&amp;</code> and <code class="backtick">||</code>, it only evaluates the arguments it needs to:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> fileExists(badFile) ? deleteFile(badFile) : createFile(badFile);</code></pre></div>
+<p>Most uses of <code class="backtick">?:</code> are better done using an <a href="#conditionals">if-then-else statement</a>.</p>
+<h5 id="Relational_operators"><span class="header-section-number">4.2.2.2.4</span> Relational operators</h5>
+<p>Logical operators usually operate on the results of <strong>relational operators</strong> or comparisons: these are <code class="backtick">==</code> (equality), <code class="backtick">!=</code> (inequality), <code class="backtick">&lt;</code> (less than), <code class="backtick">&gt;</code> (greater than), <code class="backtick">&lt;=</code> (less than or equal to) and <code class="backtick">&gt;=</code> (greater than or equal to). So, for example,</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(size &gt;= MIN_SIZE &amp;&amp; size &lt;= MAX_SIZE) {
+ puts(<span class="st">"just right"</span>);
+ }</code></pre></div>
+<p>tests if <code class="backtick">size</code> is in the (inclusive) range [<code class="backtick">MIN_SIZE</code>..<code class="backtick">MAX_SIZE</code>].</p>
+<p>Beware of confusing <code class="backtick">==</code> with <code class="backtick">=</code>. The code</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="co">/* DANGER! DANGER! DANGER! */</span>
+ <span class="kw">if</span>(x = <span class="dv">5</span>) {
+ ...</code></pre></div>
+<p>is perfectly legal C, and will set <code class="backtick">x</code> to 5 rather than testing if it's equal to 5. Because 5 happens to be nonzero, the body of the <code class="backtick">if</code> statement will always be executed. This error is so common and so dangerous that <code class="backtick">gcc</code> will warn you about any tests that look like this if you use the <code class="backtick">-Wall</code> option. Some programmers will go so far as to write the test as <code class="backtick">5&nbsp;==&nbsp;x</code> just so that if their finger slips, they will get a syntax error on <code class="backtick">5&nbsp;=&nbsp;x</code> even without special compiler support.</p>
+<h4 id="integerStringConversion"><span class="header-section-number">4.2.2.3</span> Converting to and from strings</h4>
+<p>To input or output integer values, you will need to convert them from or to strings. Converting from a string is easy using the <code class="backtick">atoi</code> or <code class="backtick">atol</code> functions declared in <code class="backtick">stdlib.h</code>; these take a string as an argument and return an <code class="backtick">int</code> or <code class="backtick">long</code>, respectively. C99 also provides <code>atoll</code> for converting to <code>long long</code>. These routines have no ability to signal an error other than returning 0, so if you do <code>atoi("Sweden")</code>, that's what you'll get.</p>
+<p>Output is usually done using <code class="backtick">printf</code> (or <code class="backtick">sprintf</code> if you want to write to a string without producing output). Use the <code class="backtick">%d</code> format specifier for <code class="backtick">int</code>s, <code class="backtick">short</code>s, and <code class="backtick">char</code>s that you want the numeric value of, <code class="backtick">%ld</code> for <code class="backtick">long</code>s, and <code class="backtick">%lld</code> for <code class="backtick">long&nbsp;long</code>s.</p>
+<p>A contrived program that uses all of these features is given below:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="co">/* This program can be used to show how atoi etc. handle overflow. */</span>
+<span class="co">/* For example, try "overflow 1000000000000". */</span>
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">char</span> c;
+ <span class="dt">int</span> i;
+ <span class="dt">long</span> l;
+ <span class="dt">long</span> <span class="dt">long</span> ll;
+
+ <span class="kw">if</span>(argc != <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s n</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ c = atoi(argv[<span class="dv">1</span>]);
+ i = atoi(argv[<span class="dv">1</span>]);
+ l = atol(argv[<span class="dv">1</span>]);
+ ll = atoll(argv[<span class="dv">1</span>]);
+
+ printf(<span class="st">"char: %d int: %d long: %ld long long: %lld"</span>, c, i, l, ll);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/integerTypes/overflow.c" class="uri">examples/integerTypes/overflow.c</a>
+</div>
+<h3 id="floatingPointTypes"><span class="header-section-number">4.2.3</span> Floating-point types</h3>
+<p>Real numbers are represented in C by the <strong>floating point</strong> types <code class="backtick">float</code>, <code class="backtick">double</code>, and <code class="backtick">long&nbsp;double</code>.
+ Just as the integer types can't represent all integers because they fit
+ in a bounded number of bytes, so also the floating-point types can't
+represent all real numbers. The difference is that the integer types can
+ represent values within their range exactly, while floating-point types
+ almost always give only an approximation to the correct value, albeit
+across a much larger range. The three floating point types differ in how
+ much space they use (32, 64, or 80 bits on x86 CPUs; possibly different
+ amounts on other machines), and thus how much precision they provide.
+Most math library routines expect and return <code class="backtick">double</code>s (e.g., <code class="backtick">sin</code> is declared as <code class="backtick">double&nbsp;sin(double)</code>, but there are usually <code class="backtick">float</code> versions as well (<code class="backtick">float&nbsp;sinf(float)</code>).</p>
+<h4 id="Floating_point_basics"><span class="header-section-number">4.2.3.1</span> Floating point basics</h4>
+<p>The core idea of floating-point representations (as opposed to <strong>fixed point representations</strong> as used by, say, <code class="backtick">int</code>s), is that a number <span class="math inline"><em>x</em></span> is written as <span class="math inline"><em>m</em> ⋅ <em>b</em><sup><em>e</em></sup></span> where <span class="math inline"><em>m</em></span> is a <strong>mantissa</strong> or fractional part, <span class="math inline"><em>b</em></span> is a <strong>base</strong>, and <span class="math inline"><em>e</em></span> is an <strong>exponent</strong>. On modern computers the base is almost always <span class="math inline">2</span>, and for most floating-point representations the mantissa will be scaled to be between <span class="math inline">1</span> and <span class="math inline"><em>b</em></span>. This is done by adjusting the exponent, e.g.</p>
+<table>
+<tbody>
+<tr class="odd">
+<td align="left"><span class="math inline">1 = 1 ⋅ 2<sup>0</sup></span></td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline">2 = 1 ⋅ 2<sup>1</sup></span></td>
+</tr>
+<tr class="odd">
+<td align="left"><span class="math inline">0.375 = 1.5 ⋅ 2<sup>−2</sup></span></td>
+</tr>
+</tbody>
+</table>
+<p>etc.</p>
+<p>The mantissa is usually represented in base <span class="math inline">2</span>, as a binary fraction. So (in a very low-precision format), $1 would be <span class="math inline">1.000 ⋅ 2<sup>0</sup></span>, <span class="math inline">2</span> would be <span class="math inline">1.000 ⋅ 2<sup>1</sup></span>, and <span class="math inline">0.375 = 3/8</span> would be <span class="math inline">1.100 ⋅ 2<sup>−2</sup></span>, where the first <span class="math inline">1</span> after the decimal point counts as <span class="math inline">1/2</span>, the second as <span class="math inline">1/4</span>, etc. Note that for a properly-scaled (or <strong>normalized</strong>) floating-point number in base <span class="math inline">2</span> the digit before the decimal point is always <span class="math inline">1</span>. For this reason it is usually dropped to save space (although this requires a special representation for <span class="math inline">0</span>).</p>
+<p>Negative values are typically handled by adding a <strong>sign bit</strong> that is <span class="math inline">0</span> for positive numbers and <span class="math inline">1</span> for negative numbers.</p>
+<h4 id="Floating-point_constants"><span class="header-section-number">4.2.3.2</span> Floating-point constants</h4>
+<p>Any number that has a decimal point in it will be interpreted by the
+compiler as a floating-point number. Note that you have to put at least
+one digit after the decimal point: <code class="backtick">2.0</code>, <code class="backtick">3.75</code>, <code class="backtick">-12.6112</code>. You can specific a floating point number in scientific notation using <code class="backtick">e</code> for the exponent: <code class="backtick">6.022e23</code>.</p>
+<h4 id="Operators"><span class="header-section-number">4.2.3.3</span> Operators</h4>
+<p>Floating-point types in C support most of the same arithmetic and relational operators as integer types; <code class="backtick">x&nbsp;&gt;&nbsp;y</code>, <code class="backtick">x&nbsp;/&nbsp;y</code>, <code class="backtick">x&nbsp;+&nbsp;y</code> all make sense when <code class="backtick">x</code> and <code class="backtick">y</code> are <code class="backtick">float</code>s.
+ If you mix two different floating-point types together, the
+less-precise one will be extended to match the precision of the
+more-precise one; this also works if you mix integer and floating point
+types as in <code class="backtick">2&nbsp;/&nbsp;3.0</code>. Unlike
+integer division, floating-point division does not discard the
+fractional part (although it may produce round-off error: <code class="backtick">2.0/3.0</code> gives <code class="backtick">0.66666666666666663</code>, which is not quite exact). Be careful about accidentally using integer division when you mean to use floating-point division: <code class="backtick">2/3</code> is <code class="backtick">0</code>. Casts can be used to force floating-point division (see below).</p>
+<p>Some operators that work on integers will <em>not</em> work on floating-point types. These are <code class="backtick">%</code> (use <code class="backtick">modf</code> from the math library if you really need to get a floating-point remainder) and all of the bitwise operators <code class="backtick">~</code>, <code class="backtick">&lt;&lt;</code>, <code class="backtick">&gt;&gt;</code>, <code class="backtick">&amp;</code>, <code class="backtick">^</code>, and <code class="backtick">|</code>.</p>
+<h4 id="Conversion_to_and_from_integer_types"><span class="header-section-number">4.2.3.4</span> Conversion to and from integer types</h4>
+<p>Mixed uses of floating-point and integer types will convert the integers to floating-point.</p>
+<p>You can convert floating-point numbers to and from integer types explicitly using casts. A typical use might be:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the average of a list */</span>
+<span class="dt">double</span>
+average(<span class="dt">int</span> n, <span class="dt">int</span> a[])
+{
+ <span class="dt">int</span> sum = <span class="dv">0</span>;
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ sum += a[i];
+ }
+
+ <span class="kw">return</span> (<span class="dt">double</span>) sum / n;
+}</code></pre></div>
+<p>If we didn't put in the <code class="backtick">(double)</code> to convert <code class="backtick">sum</code> to a <code class="backtick">double</code>,
+ we'd end up doing integer division, which would truncate the fractional
+ part of our average. Note that casts bind tighter than arithmetic
+operations, so the <code>(double)</code> applies to just <code>sum</code>, and not the whole expression <code>sum / n</code>.</p>
+<p>In the other direction, we can write:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> i = (<span class="dt">int</span>) f;</code></pre></div>
+<p>to convert a <code class="backtick">float&nbsp;f</code> to <code class="backtick">int&nbsp;i</code>. This conversion loses information by throwing away the fractional part of <code class="backtick">f</code>: if <code class="backtick">f</code> was <code class="backtick">3.2</code>, <code class="backtick">i</code> will end up being just <code class="backtick">3</code>.</p>
+<p>The math library contains a pile of functions for converting values of type <code>double</code> to integer values of type <code>double</code> that give more control over the rounding: see for example the descriptions of <code>floor</code>, <code>ceil</code>, <code>round</code>, <code>trunc</code>, and <code>nearbyint</code> in the <a href="http://www.gnu.org/software/libc/manual/html_node/Rounding-Functions.html">GNU libc reference manual</a>.</p>
+<h4 id="The_IEEE-754_floating-point_standard"><span class="header-section-number">4.2.3.5</span> The IEEE-754 floating-point standard</h4>
+<p>The IEEE-754 floating-point standard is a standard for representing
+and manipulating floating-point quantities that is followed by all
+modern computer systems. It defines several standard representations of
+floating-point numbers, all of which have the following basic pattern
+(the specific layout here is for 32-bit <code class="backtick">float</code>s):</p>
+<pre><code>bit 31 30 23 22 0
+ S EEEEEEEE MMMMMMMMMMMMMMMMMMMMMMM</code></pre>
+<p>The bit numbers are counting from the least-significant bit. The
+first bit is the sign (0 for positive, 1 for negative). The following 8
+bits are the exponent in <strong>excess-127</strong> binary notation;
+this means that the binary pattern 01111111 = 127 represents an exponent
+ of 0, 1000000 = 128, represents 1, 01111110 = 126 represents -1, and so
+ forth. The mantissa fits in the remaining 24 bits, with its leading 1
+stripped off as described above.</p>
+<p>Certain numbers have a special representation. Because 0 cannot be
+represented in the standard form (there is no 1 before the decimal
+point), it is given the special representation <code class="backtick">0&nbsp;00000000&nbsp;00000000000000000000000</code>. (There is also a -0 = <code class="backtick">1&nbsp;00000000&nbsp;00000000000000000000000</code>, which looks equal to +0 but prints differently.) Numbers with exponents of 11111111 = 255 = 2<sup>128</sup> represent non-numeric quantities such as "not a number" (<code class="backtick">NaN</code>), returned by operations like (<code class="backtick">0.0/0.0</code>) and positive or negative infinity. A table of some typical floating-point numbers (generated by the program <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/floatingPoint/float.c">float.c</a>) is given below:</p>
+<pre><code> 0 = 0 = 0 00000000 00000000000000000000000
+ -0 = -0 = 1 00000000 00000000000000000000000
+ 0.125 = 0.125 = 0 01111100 00000000000000000000000
+ 0.25 = 0.25 = 0 01111101 00000000000000000000000
+ 0.5 = 0.5 = 0 01111110 00000000000000000000000
+ 1 = 1 = 0 01111111 00000000000000000000000
+ 2 = 2 = 0 10000000 00000000000000000000000
+ 4 = 4 = 0 10000001 00000000000000000000000
+ 8 = 8 = 0 10000010 00000000000000000000000
+ 0.375 = 0.375 = 0 01111101 10000000000000000000000
+ 0.75 = 0.75 = 0 01111110 10000000000000000000000
+ 1.5 = 1.5 = 0 01111111 10000000000000000000000
+ 3 = 3 = 0 10000000 10000000000000000000000
+ 6 = 6 = 0 10000001 10000000000000000000000
+ 0.1 = 0.10000000149011612 = 0 01111011 10011001100110011001101
+ 0.2 = 0.20000000298023224 = 0 01111100 10011001100110011001101
+ 0.4 = 0.40000000596046448 = 0 01111101 10011001100110011001101
+ 0.8 = 0.80000001192092896 = 0 01111110 10011001100110011001101
+ 1e+12 = 999999995904 = 0 10100110 11010001101010010100101
+ 1e+24 = 1.0000000138484279e+24 = 0 11001110 10100111100001000011100
+ 1e+36 = 9.9999996169031625e+35 = 0 11110110 10000001001011111001110
+ inf = inf = 0 11111111 00000000000000000000000
+ -inf = -inf = 1 11111111 00000000000000000000000
+ nan = nan = 0 11111111 10000000000000000000000</code></pre>
+<p>What this means in practice is that a 32-bit floating-point value (e.g. a <code class="backtick">float</code>) can represent any number between <code class="backtick">1.17549435e-38</code> and <code class="backtick">3.40282347e+38</code>, where the <code class="backtick">e</code>
+ separates the (base 10) exponent. Operations that would create a
+smaller value will underflow to 0 (slowly—IEEE 754 allows "denormalized"
+ floating point numbers with reduced precision for very small values)
+and operations that would create a larger value will produce <code class="backtick">inf</code> or <code class="backtick">-inf</code> instead.</p>
+<p>For a 64-bit <code class="backtick">double</code>, the size of both the exponent and mantissa are larger; this gives a range from <code class="backtick">1.7976931348623157e+308</code> to <code class="backtick">2.2250738585072014e-308</code>, with similar behavior on underflow and overflow.</p>
+<p>Intel processors internally use an even larger 80-bit floating-point
+format for all operations. Unless you declare your variables as <code class="backtick">long&nbsp;double</code>,
+ this should not be visible to you from C except that some operations
+that might otherwise produce overflow errors will not do so, provided
+all the variables involved sit in registers (typically the case only for
+ local variables and function parameters).</p>
+<h4 id="Error"><span class="header-section-number">4.2.3.6</span> Error</h4>
+<p>In general, floating-point numbers are not exact: they are likely to contain <strong>round-off error</strong> because of the truncation of the mantissa to a fixed number of bits. This is particularly noticeable for large values (e.g. <code class="backtick">1e+12</code> in the table above), but can also be seen in fractions with values that aren't powers of 2 in the denominator (e.g. <code class="backtick">0.1</code>).
+ Round-off error is often invisible with the default float output
+formats, since they produce fewer digits than are stored internally, but
+ can accumulate over time, particularly if you subtract floating-point
+quantities with values that are close (this wipes out the mantissa
+without wiping out the error, making the error much larger relative to
+the number that remains).</p>
+<p>The easiest way to avoid accumulating error is to use high-precision floating-point numbers (this means using <code class="backtick">double</code> instead of <code class="backtick">float</code>). On modern CPUs there is little or no time penalty for doing so, although storing <code class="backtick">double</code>s instead of <code class="backtick">float</code>s will take twice as much space in memory.</p>
+<p>Note that a consequence of the internal structure of IEEE 754
+floating-point numbers is that small integers and fractions with small
+numerators and power-of-2 denominators can be represented <em>exactly</em>—indeed,
+ the IEEE 754 standard carefully defines floating-point operations so
+that arithmetic on such exact integers will give the same answers as
+integer arithmetic would (except, of course, for division that produces a
+ remainder). This fact can sometimes be exploited to get higher
+precision on integer values than is available from the standard integer
+types; for example, a <code class="backtick">double</code> can represent any integer between -2<sup>53</sup> and 2<sup>53</sup> exactly, which is a much wider range than the values from <code class="backtick">2^-31^&nbsp;to&nbsp;2^31^-1&nbsp;that&nbsp;fit&nbsp;in&nbsp;a&nbsp;32-bit&nbsp;</code>int<code class="backtick">&nbsp;or&nbsp;</code>long<code class="backtick">.&nbsp;&nbsp;(A&nbsp;64-bit&nbsp;</code>long long<code class="backtick">&nbsp;does&nbsp;better.)&nbsp;&nbsp;So&nbsp;</code>double`
+ should be considered for applications where large precise integers are
+needed (such as calculating the net worth in pennies of a billionaire.)</p>
+<p>One consequence of round-off error is that it is very difficult to
+test floating-point numbers for equality, unless you are sure you have
+an exact value as described above. It is generally not the case, for
+example, that <code class="backtick">(0.1+0.1+0.1)&nbsp;==&nbsp;0.3</code> in C. This can produce odd results if you try writing something like <code class="backtick">for(f&nbsp;=&nbsp;0.0;&nbsp;f&nbsp;&lt;=&nbsp;0.3;&nbsp;f&nbsp;+=&nbsp;0.1)</code>: it will be hard to predict in advance whether the loop body will be executed with <code class="backtick">f&nbsp;=&nbsp;0.3</code> or not. (Even more hilarity ensues if you write <code class="backtick">for(f&nbsp;=&nbsp;0.0;&nbsp;f&nbsp;!=&nbsp;0.3;&nbsp;f&nbsp;+=&nbsp;0.1)</code>, which after not quite hitting <code class="backtick">0.3</code>
+ exactly keeps looping for much longer than I am willing to wait to see
+it stop, but which I suspect will eventually converge to some constant
+value of <code class="backtick">f</code> large enough that adding <code class="backtick">0.1</code>
+ to it has no effect.) Most of the time when you are tempted to test
+floats for equality, you are better off testing if one lies within a
+small distance from the other, e.g. by testing <code class="backtick">fabs(x-y)&nbsp;&lt;=&nbsp;fabs(EPSILON&nbsp;*&nbsp;y)</code>, where <code class="backtick">EPSILON</code>
+ is usually some application-dependent tolerance. This isn't quite the
+same as equality (for example, it isn't transitive), but it usually
+closer to what you want.</p>
+<h4 id="Reading_and_writing_floating-point_numbers"><span class="header-section-number">4.2.3.7</span> Reading and writing floating-point numbers</h4>
+<p>Any numeric constant in a C program that contains a decimal point is treated as a <code class="backtick">double</code> by default. You can also use <code class="backtick">e</code> or <code class="backtick">E</code> to add a base-10 exponent (see the table for some examples of this.) If you want to insist that a constant value is a <code class="backtick">float</code> for some reason, you can append <code class="backtick">F</code> on the end, as in <code class="backtick">1.0F</code>.</p>
+<p>For I/O, floating-point values are most easily read and written using <code class="backtick">scanf</code> (and its relatives <code class="backtick">fscanf</code> and <code class="backtick">sscanf</code>) and <code class="backtick">printf</code>. For <code class="backtick">printf</code>,
+ there is an elaborate variety of floating-point format codes; the
+easiest way to find out what these do is experiment with them. For <code class="backtick">scanf</code>, pretty much the only two codes you need are <code class="backtick">"%lf"</code>, which reads a <code class="backtick">double</code> value into a <code class="backtick">double&nbsp;*</code>, and <code class="backtick">"%f"</code>, which reads a <code class="backtick">float</code> value into a <code class="backtick">float&nbsp;*</code>. Both these formats are exactly the same in <code class="backtick">printf</code>, since a <code class="backtick">float</code> is promoted to a <code class="backtick">double</code> before being passed as an argument to <code class="backtick">printf</code> (or any other function that doesn't declare the type of its arguments). But you have to be careful with the arguments to <code class="backtick">scanf</code> or you will get odd results as only 4 bytes of your 8-byte <code class="backtick">double</code> are filled in, or—even worse—8 bytes of your 4-byte <code class="backtick">float</code> are.</p>
+<h4 id="Non-finite_numbers_in_C"><span class="header-section-number">4.2.3.8</span> Non-finite numbers in C</h4>
+<p>The values <code class="backtick">nan</code>, <code class="backtick">inf</code>, and <code class="backtick">-inf</code> can't be written in this form as floating-point constants in a C program, but <code class="backtick">printf</code> will generate them and <code class="backtick">scanf</code> seems to recognize them. With some machines and compilers you may be able to use the macros <code class="backtick">INFINITY</code> and <code class="backtick">NAN</code> from <code class="backtick">&lt;math.h&gt;</code> to generate infinite quantities. The macros <code class="backtick">isinf</code> and <code class="backtick">isnan</code> can be used to detect such quantities if they occur.</p>
+<h4 id="The_math_library"><span class="header-section-number">4.2.3.9</span> The math library</h4>
+<p>(See also K&amp;R Appendix B4.)</p>
+<p>Many mathematical functions on floating-point values are not linked
+into C programs by default, but can be obtained by linking in the math
+library. Examples would be the trigonometric functions <code class="backtick">sin</code>, <code class="backtick">cos</code>, and <code class="backtick">tan</code> (plus more exotic ones), <code class="backtick">sqrt</code> for taking square roots, <code class="backtick">pow</code> for exponentiation, <code class="backtick">log</code> and <code class="backtick">exp</code> for base-e logs and exponents, and <code class="backtick">fmod</code> for when you really want to write <code class="backtick">x%y</code> but one or both variables is a <code class="backtick">double</code>. The standard math library functions all take <code class="backtick">double</code>s as arguments and return <code class="backtick">double</code> values; most implementations also provide some extra functions with similar names (e.g., <code class="backtick">sinf</code>) that use <code class="backtick">float</code>s instead, for applications where space or speed is more important than accuracy.</p>
+<p>There are two parts to using the math library. The first is to include the line</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;math.h&gt;</span></code></pre></div>
+<p>somewhere at the top of your source file. This tells the preprocessor
+ to paste in the declarations of the math library functions found in <code class="backtick">/usr/include/math.h</code>.</p>
+<p>The second step is to link to the math library when you compile. This is done by passing the flag <code class="backtick">-lm</code> to <code class="backtick">gcc</code> <em>after</em> your C program source file(s). A typical command might be:</p>
+<pre><code>c99 -o program program.c -lm</code></pre>
+<p>If you don't do this, you will get errors from the compiler about
+missing functions. The reason is that the math library is not linked in
+by default, since for many system programs it's not needed.</p>
+<h2 id="operatorPrecedence"><span class="header-section-number">4.3</span> Operator precedence</h2>
+<p><strong>Operator precedence</strong> in C controls the interpretation of ambiguous expressions like <code class="backtick">2+3*4</code>, which could in principle be parsed either as <code class="backtick">2+(3*4)</code> (the right way) or as <code class="backtick">(2+3)*4</code>
+ (the cheap calculator way). For the most part, C parses unparenthesized
+ expressions the right way, but if you are not sure what it will do with
+ an expression, you can always put in parentheses to force it to do the
+right thing.</p>
+<p>There is a table on page 53 of Kernighan and Ritchie that shows the precedence of all operators in C, which we reproduce below.</p>
+<p>The interpretation of this table is that higher entries bind tighter than lower ones; so the fact that <code class="backtick">*</code> has higher precedence that <code class="backtick">+</code> and both have higher precedence that <code class="backtick">&gt;</code> means that <code class="backtick">2+3*4&nbsp;&gt;&nbsp;5</code> gets parsed as <code class="backtick">(2+(3*4))&nbsp;&gt;&nbsp;5</code>.</p>
+<p>Associativity controls how an expression with multiple operators of the same precedence is interpreted. The fact that <code class="backtick">+</code> and <code class="backtick">-</code> associate left-to-right means that the expression <code class="backtick">2+3-4-5</code> is interpreted as <code class="backtick">(((2+3)-4)-5)</code>: the leftmost operation is done first. Unary operators, ternary <code class="backtick">?:</code> and assignment operators are the only ones that associate right-to-left. For assignment operators, this is so <code class="backtick">x&nbsp;=&nbsp;y&nbsp;=&nbsp;0</code> is interpreted as <code class="backtick">x&nbsp;=&nbsp;(y&nbsp;=&nbsp;0)</code> (assigning <code class="backtick">0</code> to both <code class="backtick">x</code> and <code class="backtick">y</code>) and not <code class="backtick">(x&nbsp;=&nbsp;y)&nbsp;=&nbsp;0</code> (which would give an error because <code class="backtick">(x&nbsp;=&nbsp;y)</code> isn't something you can assign to). For unary operators, this mostly affects expressions like <code class="backtick">*p++</code>, which is equivalent to <code class="backtick">*(p++)</code> (increment the pointer first then dereference it) rather than <code class="backtick">(*p)++</code> (increment the thing that <code class="backtick">p</code> points to).</p>
+<table>
+<colgroup>
+<col width="62%">
+<col width="37%">
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="left"><p><code>()</code> <code>[]</code> <code>-&gt;</code> <code>.</code></p></td>
+<td align="left"><p>function calls and indexing</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>!</code> <code>~</code> <code>-</code> (unary) <code>*</code> (unary) <code>&amp;</code>(unary) <code>++</code> <code>--</code> <code>(</code><em>type</em><code>)</code> <code>sizeof</code></p></td>
+<td align="left"><p>unary operators (associate right-to-left)</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>*</code> (binary) <code>/</code> <code>%</code></p></td>
+<td align="left"><p>multiplication and division</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>+</code> (binary) <code>-</code> (binary)</p></td>
+<td align="left"><p>addition and subtraction</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>&lt;&lt;</code> <code>&gt;&gt;</code></p></td>
+<td align="left"><p>shifts</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>&lt;</code> <code>&lt;=</code> <code>&gt;=</code> <code>&gt;</code></p></td>
+<td align="left"><p>inequalities</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>==</code> <code>!=</code></p></td>
+<td align="left"><p>equality</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>&amp;</code> (binary)</p></td>
+<td align="left"><p>bitwise AND</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>^</code></p></td>
+<td align="left"><p>bitwise XOR</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>|</code></p></td>
+<td align="left"><p>bitwise OR</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>&amp;&amp;</code></p></td>
+<td align="left"><p>logical AND</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>||</code></p></td>
+<td align="left"><p>logical OR</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>?:</code></p></td>
+<td align="left"><p>ternary if (associates right-to-left)</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p><code>=</code> <code>+=</code> <code>-=</code> <code>*=</code> <code>/=</code> <code>%=</code> <code>&amp;=</code> <code>^=</code> <code>|=</code> <code>&lt;&lt;=</code> <code>&gt;&gt;=</code></p></td>
+<td align="left"><p>assignment (associate right-to-left)</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p><code>,</code></p></td>
+<td align="left"><p>comma</p></td>
+</tr>
+</tbody>
+</table>
+<h2 id="programmingStyle"><span class="header-section-number">4.4</span> Programming style</h2>
+<p>The C programming language imposes very few constraints on how
+programs are formatted and organized. Both of the following are
+legitimate C programs, which compile to exactly the same machine code
+using <code>gcc</code> with a high enough optimization level:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Count down from COUNTDOWN_START (defined below) to 0.</span>
+<span class="co"> * Prints all numbers in the range including both endpoints.</span>
+<span class="co"> */</span>
+
+<span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="ot">#define COUNTDOWN_START (10)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">for</span>(<span class="dt">int</span> i = COUNTDOWN_START; i &gt;= <span class="dv">0</span>; i--) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/style/countdown.c" class="uri">examples/style/countdown.c</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="dt">int</span> main(<span class="dt">int</span> _,<span class="dt">char</span>**xb){_=<span class="bn">0xb</span>;<span class="kw">while</span>(_--)printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>,_);<span class="kw">return</span> ++_;}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/style/badCountdown.c" class="uri">examples/style/badCountdown.c</a>
+</div>
+<p>The difference between these programs is that the first is designed
+to be easy to read and understand while the second is not. Though
+computer can't tell the difference between them, the second will be much
+ harder to debug or modify to accomplish some new task.</p>
+<p>Certain formatting and programming conventions have evolved over the
+years to make C code as comprehensible as possible, and as we introduce
+various features of C, we will talk about how best to use them to make
+your programs understood by both computers and humans.</p>
+<p>Submitted assignments may be graded for style in addition to
+correctness. Below is a checklist that has been used in past versions of
+ the course to identify some of the more egregious violations of
+reasonable coding practice. For more extreme examples of what not to do,
+ see the <a href="http://www.ioccc.org/">International Obfuscated C Code Contest</a>.</p>
+<pre><code>Style grading checklist
+Score is 20 points minus 1 for each box checked (but never less than 0)
+
+Comments
+
+[ ] Undocumented module.
+[ ] Undocumented function other than main.
+[ ] Underdocumented function: return value or args not described.
+[ ] Undocumented program input and output (when main is provided).
+[ ] Undocumented struct or union components.
+[ ] Undocumented #define.
+[ ] Failure to cite code taken from other sources.
+[ ] Insufficient comments.
+[ ] Excessive comments.
+
+Naming
+
+[ ] Meaningless function name.
+[ ] Confusing variable name.
+[ ] Inconsistent variable naming style (UgLyName, ugly_name, NAME___UGLY_1).
+[ ] Inconsistent use of capitalization to distinguish constants.
+
+Whitespace
+
+[ ] Inconsistent or misleading indentation.
+[ ] Spaces not used or used misleadingly to break up complicated expressions.
+[ ] Blank lines not used or used misleadingly to break up long function bodies.
+
+Macros
+
+[ ] Non-trivial constant with no symbolic name.
+[ ] Failure to parenthesize expression in macro definition.
+[ ] Dependent constant not written as expression of earlier constant.
+[ ] Underdocumented parameterized macro.
+
+Global variables
+
+[ ] Inappropriate use of a global variable.
+
+Functions
+
+[ ] Kitchen-sink function that performs multiple unrelated tasks.
+[ ] Non-void function that returns no useful value.
+[ ] Function with too many arguments.
+
+Code organization
+
+[ ] Lack of modularity.
+[ ] Function used in multiple source files but not declared in header file.
+[ ] Internal-use-only function not declared static.
+[ ] Full struct definition in header files when components should be hidden.
+[ ] #include "file.c"
+[ ] Substantial repetition of code.
+
+Miscellaneous
+
+[ ] Other obstacle to readability not mentioned above.</code></pre>
+<h2 id="variables"><span class="header-section-number">4.5</span> Variables</h2>
+<p>Variables in C are a direct abstraction of physical memory locations.
+ To understand how variables work, it helps to start by understanding
+how computer memory works.</p>
+<h3 id="MachineMemory"><span class="header-section-number">4.5.1</span> Memory</h3>
+<p>Basic model: memory consists of many bytes of storage, each of which
+has an address which is itself a sequence of bits. Though the actual
+memory architecture of a modern computer is complex, from the point of
+view of a C program we can think of as simply a large <strong>address space</strong>
+ that the CPU can store things in (and load things from), provided it
+can supply an address to the memory. Because we don't want to have to
+type long strings of bits all the time, the C compiler lets us give
+names to particular regions of the address space, and will even find
+free space for us to use.</p>
+<h3 id="variablesAsNames"><span class="header-section-number">4.5.2</span> Variables as names</h3>
+<p>A <strong>variable</strong> is a name given in a program for some region of memory. Each variable has a <strong>type</strong>,
+ which tells the compiler how big the region of memory corresponding to
+it is and how to treat the bits stored in that region when performing
+various kinds of operations (e.g. integer variables are added together
+by very different circuitry than floating-point variables, even though
+both represent numbers as bits). In modern programming languages, a
+variable also has a <strong>scope</strong> (a limit on where the name is
+ meaningful, which allows the same name to be used for different
+variables in different parts of the program) and an <strong>extent</strong> (the duration of the variable's existence, controlling when the program allocates and deallocates space for it).</p>
+<h4 id="Variable_declarations"><span class="header-section-number">4.5.2.1</span> Variable declarations</h4>
+<p>Before you can use a variable in C, you must <strong>declare</strong> it. Variable declarations show up in three places:</p>
+<ul>
+<li>Outside a function. These declarations declare <strong>global variables</strong> that are visible throughout the program (i.e. they have <strong>global scope</strong>). Use of global variables is almost always a mistake.</li>
+<li>In the argument list in the header of a function. These variables are <strong>parameters</strong> to the function. They are only visible inside the function body (<strong>local scope</strong>), exist only from when the function is called to when the function returns (<strong>bounded extent</strong>—note
+ that this is different from what happens in some garbage-collected
+languages like Scheme), and get their initial values from the arguments
+to the function when it is called.</li>
+<li>At the start of any block delimited by curly braces. Such variables
+are visible only within the block (local scope again) and exist only
+when the containing function is active (bounded extent). The convention
+in C is has generally been to declare all such <strong>local variables</strong> at the top of a function; this is different from the convention in <a href="#cplusplus">C++</a>
+ or Java, which encourage variables to be declared when they are first
+used. This convention may be less strong in C99 code, since C99 adopts
+the C++ rule of allowing variables to be declared anywhere (which can be
+ particularly useful for index variables in <code class="backtick">for</code> loops).</li>
+</ul>
+<p>Another feature of function parameters and local variables is that if a function is called more than once (even if the function <a href="#recursion">calls itself</a>), each copy of the function gets its own local variables.</p>
+<p>Variable declarations consist of a type name followed by one or more
+variable names separated by commas and terminated by a semicolon (except
+ in argument lists, where each declaration is terminated by a comma). I
+personally find it easiest to declare variables one per line, to
+simplify documenting them. It is also possible for global and local
+variables (but not function arguments) to assign an initial value to a
+variable by putting in something like <code class="backtick">=&nbsp;0</code>
+ after the variable name. It is good practice to put a comment after
+each variable declaration that explains what the variable does (with a
+possible exception for conventionally-named loop variables like <code class="backtick">i</code> or <code class="backtick">j</code> in short functions). Below is an example of a program with some variable declarations in it:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;ctype.h&gt;</span>
+
+<span class="co">/* This program counts the number of digits in its input. */</span>
+
+<span class="co">/*</span>
+<span class="co"> *This global variable is not used; it is here only to demonstrate</span>
+<span class="co"> * what a global variable declaration looks like.</span>
+<span class="co"> */</span>
+<span class="dt">unsigned</span> <span class="dt">long</span> SpuriousGlobalVariable = <span class="dv">127</span>;
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> c; <span class="co">/* character read */</span>
+ <span class="dt">int</span> count = <span class="dv">0</span>; <span class="co">/* number of digits found */</span>
+
+ <span class="kw">while</span>((c = getchar()) != EOF) {
+ <span class="kw">if</span>(isdigit(c)) {
+ count++;
+ }
+ }
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, count);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/variables/countDigits.c" class="uri">examples/variables/countDigits.c</a>
+</div>
+<h4 id="Variable_names"><span class="header-section-number">4.5.2.2</span> Variable names</h4>
+<p>The evolution of variable names in different programming languages:</p>
+<dl>
+<dt>11101001001001</dt>
+<dd>Physical addresses represented as bits.
+</dd>
+<dt><code>#FC27</code></dt>
+<dd>Typical assembly language address represented in hexadecimal to save
+ typing (and because it's easier for humans to distinguish #A7 from #B6
+than to distinguish 10100111 from 10110110.)
+</dd>
+<dt><code>A1$</code></dt>
+<dd>A string variable in BASIC, back in the old days where BASIC
+variables were one uppercase letter, optionally followed by a number,
+optionally followed by $ for a string variable and % for an integer
+variable. These type tags were used because BASIC interpreters didn't
+have a mechanism for declaring variable types.
+</dd>
+<dt><code>IFNXG7</code></dt>
+<dd>A typical FORTRAN variable name, back in the days of 6-character all-caps variable names. The <code class="backtick">I</code>
+ at the start means it's an integer variable. The rest of the letters
+probably abbreviate some much longer description of what the variable
+means. The default type based on the first letter was used because
+FORTRAN programmers were lazy, but it could be overridden by an explicit
+ declaration.
+</dd>
+<dt><code>i</code>, <code>j</code>, <code>c</code>, <code>count</code>, <code>top_of_stack</code>, <code>accumulatedTimeInFlight</code></dt>
+<dd>Typical names from modern C programs. There is no type information
+contained in the name; the type is specified in the declaration and
+remembered by the compiler elsewhere. Note that there are two different
+conventions for representing multi-word names: the first is to replace
+spaces with underscores, and the second is to capitalize the first
+letter of each word (possibly excluding the first letter), a style
+called <a href="http://c2.com/cgi/wiki?FindPage&amp;value=CamelCase">camel case</a>. You should pick <strong>one</strong> of these two conventions and stick to it.
+</dd>
+<dt><code>prgcGradeDatabase</code></dt>
+<dd><p>An example of <strong>Hungarian notation</strong>, a style of
+variable naming in which the type of the variable is encoded in the
+first few character. The type is now back in the variable name again.
+This is <em>not</em> enforced by the compiler: even though <code class="backtick">iNumberOfStudents</code> is supposed to be an <code class="backtick">int</code>, there is nothing to prevent you from declaring <code class="backtick">float&nbsp;iNumberOfStudents</code> if you are teaching a class on improper chainsaw handling and want to allow for the possibility of fractional students. See <a href="http://msdn.microsoft.com/en-us/library/aa260976%28v=vs.60%29.aspx">this MSDN page</a> for a much more detailed explanation of the system.</p>
+<p>Not clearly an improvement on standard naming conventions, but it is popular in some programming shops.</p>
+</dd>
+</dl>
+<p>In C, variable names are called <strong>identifiers</strong>. These are also used to identify things that are not variables, like functions and user-defined types.</p>
+<p>An identifier in C must start with a lower or uppercase letter or the underscore character <code class="backtick">_</code>.
+ Typically variables starting with underscores are used internally by
+system libraries, so it's dangerous to name your own variables this way.
+ Subsequent characters in an identifier can be letters, digits, or
+underscores. So for example <code class="backtick">a</code>, <code class="backtick">____a___a_a_11727_a</code>, <code class="backtick">AlbertEinstein</code>, <code class="backtick">aAaAaAaAaAAAAAa</code>, and <code class="backtick">______</code> are all legal identifiers in C, but <code class="backtick">$foo</code> and <code class="backtick">01</code> are not.</p>
+<p>The basic principle of variable naming is that a variable name is a
+substitute for the programmer's memory. It is generally best to give
+identifiers names that are easy to read and describe what the variable
+is used for. Such variables are called <strong>self-documenting</strong>. None of the variable names in the preceding list are any good by this standard. Better names would be <code class="backtick">total_input_characters</code>, <code class="backtick">dialedWrongNumber</code>, or <code class="backtick">stepsRemaining</code>. Non-descriptive single-character names are acceptable for certain conventional uses, such as the use of <code class="backtick">i</code> and <code class="backtick">j</code> for loop iteration variables, or <code class="backtick">c</code>
+ for an input character. Such names should only be used when the scope
+of the variable is small, so that it's easy to see all the places where
+it is used at the same time.</p>
+<p>C identifiers are case-sensitive, so <code class="backtick">aardvark</code>, <code class="backtick">AArDvARK</code>, and <code class="backtick">AARDVARK</code>
+ are all different variables. Because it is hard to remember how you
+capitalized something before, it is important to pick a standard
+convention and stick to it. The traditional convention in C goes like
+this:</p>
+<ul>
+<li>Ordinary variables and functions are lowercased or camel-cased, e.g. <code class="backtick">count</code>, <code class="backtick">countOfInputBits</code>.</li>
+<li>User-defined types (and in some conventions global variables) are capitalized, e.g. <code class="backtick">Stack</code>, <code class="backtick">TotalBytesAllocated</code>.</li>
+<li>Constants created with <code class="backtick">#define</code> or <code class="backtick">enum</code> are put in all-caps: <code class="backtick">MAXIMUM_STACK_SIZE</code>, <code class="backtick">BUFFER_LIMIT</code>.</li>
+</ul>
+<h3 id="usingVariables"><span class="header-section-number">4.5.3</span> Using variables</h3>
+<p>Ignoring <a href="#pointers">pointers</a> for the moment, there are essentially two things you can do to a variable. You can assign a value to it using the <code class="backtick">=</code> operator, as in:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> x = <span class="dv">2</span>; <span class="co">/* assign 2 to x */</span>
+ y = <span class="dv">3</span>; <span class="co">/* assign 3 to y */</span></code></pre></div>
+<p>or you can use its value in an expression:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> x = y<span class="dv">+1</span>; <span class="co">/* assign y+1 to x */</span></code></pre></div>
+<p>The assignment operator is an ordinary operator, and assignment expressions can be used in larger expressions:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> x = (y=<span class="dv">2</span>)*<span class="dv">3</span>; <span class="co">/* sets y to 2 and x to 6 */</span></code></pre></div>
+<p>This feature is usually only used in certain standard idioms, since it's confusing otherwise.</p>
+<p>There are also shorthand operators for expressions of the form <em>variable</em> <code class="backtick">=</code> <em>variable</em> <em>operator</em> <em>expression</em>. For example, writing <code class="backtick">x&nbsp;+=&nbsp;y</code> is equivalent to writing <code class="backtick">x&nbsp;=&nbsp;x&nbsp;+&nbsp;y</code>, <code class="backtick">x&nbsp;/=&nbsp;y</code> is the same as <code class="backtick">x&nbsp;=&nbsp;x&nbsp;/&nbsp;y</code>, etc.</p>
+<p>For the special case of adding or subtracting 1, you can abbreviate still further with the <code>++</code> and <code>--</code>
+ operators. These come in two versions, depending on whether you want
+the result of the expression (if used in a larger expression) to be the
+value of the variable before or after the variable is incremented:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> x = <span class="dv">0</span>;
+ y = x++; <span class="co">/* sets x to 1 and y to 0 (the old value) */</span>
+ y = ++x; <span class="co">/* sets x to 2 and y to 2 (the new value) */</span>
+ y = x--; <span class="co">/* sets x to 1 and y to 2 (the old value) */</span>
+ y = --x; <span class="co">/* sets x to 0 and y to 0 (the new value) */</span></code></pre></div>
+<p>The intuition is that if the <code class="backtick">++</code> comes before the variable, the increment happens before the value of the variable is read (a <strong>preincrement</strong>; if it comes after, it happens after the value is read (a <strong>postincrement</strong>).
+ This is confusing enough that it is best not to use the value of
+preincrement or postincrement operations except in certain standard
+idioms. But using <code>x++</code> or <code class="backtick">++x</code> by itself as a substitute for <code class="backtick">x&nbsp;=&nbsp;x+1</code> is perfectly acceptable style.<a href="#fn8" class="footnoteRef" id="fnref8"><sup>8</sup></a></p>
+<h3 id="initializers"><span class="header-section-number">4.5.4</span> Initialization</h3>
+<p>It is a serious error to use the value of a variable that has never
+been assigned to, because you will get whatever junk is sitting in
+memory at the address allocated to the variable, and this might be some
+arbitrary leftover value from a previous function call that doesn't even
+ represent the same type.<a href="#fn9" class="footnoteRef" id="fnref9"><sup>9</sup></a></p>
+<p>Fortunately, C provides a way to guarantee that a variable is
+initialized as soon as it is declared. Many of the examples in the notes
+ do not use this mechanism, because of bad habits learned by the
+instructor using early versions of C that imposed tighter constraints on
+ initialization. But initializing variables is a good habit to get in
+the practice of doing.</p>
+<p>For variables with simple types (that is, not <a href="#arrays">arrays</a>, <a href="#structs">structs</a>, or <a href="#unions">unions</a>), an initializer looks like an assignment:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> sum = <span class="dv">0</span>;
+ <span class="dt">int</span> n = <span class="dv">100</span>;
+ <span class="dt">int</span> nSquared = n*n;
+ <span class="dt">double</span> gradeSchoolPi = <span class="fl">3.14</span>;
+ <span class="dt">const</span> <span class="dt">char</span> * <span class="dt">const</span> greeting = <span class="st">"Hi!"</span>;
+ <span class="dt">const</span> <span class="dt">int</span> greetingLength = strlen(greeting);</code></pre></div>
+<p>For ordinary local variables, the initializer value can be any
+expression, including expressions that call other functions. There is an
+ exception for variables allocated when the program starts (which
+includes global variables outside functions and <code>static</code> variables inside functions), which can only be initialized to constant expressions.</p>
+<p>The last two examples show how initializers can set the values of variables that are declared to be <a href="#const"><code>const</code></a> (the variable <code>greeting</code> is both constant itself, because of <code>const greeting</code>, and points to data that is also constant, because it is of type <code>const char</code>).
+ This is the only way to set the values of such variables without
+cheating, because the compiler will complain if you try to do an
+ordinary assignment to a variable declared to be constant.</p>
+<p>For fixed-size <a href="#arrays">arrays</a> and <a href="#structs">structs</a>,
+ it is possible to supply an initializer for each component, by
+enclosing the initializer values in braces, separated by commas. For
+example:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> threeNumbers[<span class="dv">3</span>] = { <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span> };
+
+ <span class="kw">struct</span> numericTitle {
+ <span class="dt">int</span> number;
+ <span class="dt">const</span> <span class="dt">char</span> *name;
+ };
+
+ <span class="kw">struct</span> numericTitle s = { <span class="dv">7</span>, <span class="st">"Samurai"</span> };
+ <span class="kw">struct</span> numericTitle n = { <span class="dv">3</span>, <span class="st">"Ninjas"</span> };</code></pre></div>
+<h3 id="qualifiers"><span class="header-section-number">4.5.5</span> Storage class qualifiers</h3>
+<p>It is possible to specify additional information about how a variable can be used using <strong>storage class qualifiers</strong>, which usually go before the type of a variable in a declaration.</p>
+<h4 id="scopeAndExtent"><span class="header-section-number">4.5.5.1</span> Scope and extent</h4>
+<p>Most variables that you will use in C are either parameters to <a href="#functions">functions</a> or local variables inside functions. These have <strong>local scope</strong>, meaning the variable names can only be used in the function in which they are declared, and <strong>automatic extent</strong>,
+ meaning the space for the variable is allocated, typically on the
+stack, when the function is called, and reclaimed when the function
+exits. (If the function calls itself, you get another copy of all the
+local variables; see <a href="#recursion">recursion</a>.)</p>
+<p>On <em>very rare</em> occasions you might want to have a variable that survives the entire execution of a program (has <strong>static extent</strong>) or that is visible throughout the program (has <strong>global scope</strong>). C provides a mechanism for doing this <em>that you shold never use under normal circumstances</em>.
+ Pretty much the only time you are going to want to have a variable with
+ static extent is if you are keeping track of some piece of information
+that (a) you only need one instance of, (b) you need to survive between
+function calls, and (c) it would be annoying to pass around as an extra
+argument to any function that uses it. An example would be the internal
+data structures used by <a href="#malloc"><code>malloc</code></a>, or the count variable in the function below:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* returns the number of times this function has previously been called */</span>
+<span class="co">/* this can be used to generate unique numerical identifiers */</span>
+<span class="dt">unsigned</span> <span class="dt">long</span> <span class="dt">long</span>
+ticketMachine(<span class="dt">void</span>)
+{
+ <span class="dt">static</span> <span class="dt">unsigned</span> <span class="dt">long</span> <span class="dt">long</span> count = <span class="dv">0</span>;
+
+ <span class="kw">return</span> count++;
+}</code></pre></div>
+<p>To declare a local variable with static extent, use the <code>static</code>
+ qualifier as in the above example. To declare a global variable with
+static extent, declare it outside a function. In both cases you should
+provide an <a href="#initializers">initializer</a> for the variable.</p>
+<h5 id="additional-qualifiers-for-global-variables"><span class="header-section-number">4.5.5.1.1</span> Additional qualifiers for global variables</h5>
+<p>It is possible to put some additional constraints on the visibility
+of global variables. By default, a global variable will be visible
+everywhere, but functions files other than the one in which it is
+defined won't necessarily know what type it has. This latter problem can
+ be fixed using an <code>extern</code> declaration, which says that
+there is a variable somewhere else of a particular type that we are
+declaring (but not defining, so no space is allocated). In contrast, the
+ <code>static</code> keyword (on a global variable) specifies that it
+will only be visible in the current file, even if some other file
+includes a declaration of a global variable of the same name.</p>
+<p>Here are three variable declarations that illustrate how this works:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">unsigned</span> <span class="dt">short</span> Global = <span class="dv">5</span>; <span class="co">/* global variable, can be used anywhere */</span>
+
+ <span class="kw">extern</span> <span class="dt">float</span> GlobalFloat; <span class="co">/* this global variable, defined somewhere else, has type float */</span>
+
+ <span class="dt">static</span> <span class="dt">char</span> Character = 'c'; <span class="co">/* global variable, can only be used by functions in this file */</span></code></pre></div>
+<p>(Note the convention of putting capital letters on global variables to distinguish them from local variables.)</p>
+<p>Typically, an <code>extern</code> definition would appear in a header
+ file so that it can be included in any function that uses the variable,
+ while an ordinary global variable definition would appear in a C file
+so it only occurs once.</p>
+<h3 id="const"><span class="header-section-number">4.5.6</span> Marking variables as constant</h3>
+<p>The <strong>const</strong> qualifier declares a variable to be constant:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">const</span> <span class="dt">int</span> three = <span class="dv">3</span>; <span class="co">/* this will always be 3 */</span></code></pre></div>
+<p>It is an error to apply any sort of assignment (<code>=</code>, <code>+=,</code>++<code>, etc.) to a variable qualified as</code>const`.</p>
+<h4 id="pointers-to-const"><span class="header-section-number">4.5.6.1</span> Pointers to <code>const</code></h4>
+<p>A <a href="#pointers">pointer</a> to a region that should not be modified should be declared with <code>const</code> type:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">const</span> <span class="dt">char</span> *string = <span class="st">"You cannot modify this string."</span>;</code></pre></div>
+<p>The <code>const</code> in the declaration above applies to the characters that <code>string</code> points to: <code>string</code> is not <code>const</code> itself, but is instead a <em>pointer to <code>const</code></em>. It is still possible to make <code>string</code> point somewhere else, say by doing an assignment:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> string = <span class="st">"You cannot modify this string either."</span></code></pre></div>
+<p>If you want to make it so that you can't assign to <code>string</code>, put <code>const</code> right before the variable name:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="co">/* prevent assigning to string as well */</span>
+ <span class="dt">const</span> <span class="dt">char</span> * <span class="dt">const</span> string = <span class="st">"You cannot modify this string."</span>;</code></pre></div>
+<p>Now <code>string</code> is a <code>const</code> pointer to <code>const</code>: you can neither modify <code>string</code> nor the values it points to.</p>
+<p>Note that <code>const</code> only restricts what you can do using
+this particular variable name. If you can get at the memory that
+something points to by some other means, say through another pointer,
+you may be able to change the values in these memory locations anyway:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> x = <span class="dv">5</span>;
+ <span class="dt">const</span> <span class="dt">int</span> *p = &amp;x;
+ <span class="dt">int</span> *q;
+
+ *p = <span class="dv">1</span>; <span class="co">/* will cause an error at compile time */</span>
+ x = <span class="dv">3</span>; <span class="co">/* also changes *p, but will not cause an error */</span></code></pre></div>
+<h2 id="IO"><span class="header-section-number">4.6</span> Input and output</h2>
+<p>Input and output from C programs is typically done through theconst <strong>standard I/O library</strong>, whose functions etc. are declared in <code class="backtick">stdio.h</code>.
+ A detailed descriptions of the functions in this library is given in
+Appendix B of Kernighan and Ritchie. We'll talk about some of the more
+useful functions and about how input-output (I/O) works on Unix-like
+operating systems in general.</p>
+<h3 id="Character_streams"><span class="header-section-number">4.6.1</span> Character streams</h3>
+<p>The standard I/O library works on <strong>character streams</strong>,
+ objects that act like long sequences of incoming or outgoing
+characters. What a stream is connected to is often not apparent to a
+program that uses it; an output stream might go to a terminal, to a
+file, or even to another program (appearing there as an input stream).</p>
+<p>Three standard streams are available to all programs: these are <code class="backtick">stdin</code> (standard input), <code class="backtick">stdout</code> (standard output), and <code class="backtick">stderr</code> (standard error). Standard I/O functions that do not take a stream as an argument will generally either read from <code class="backtick">stdin</code> or write to <code class="backtick">stdout</code>. The <code class="backtick">stderr</code> stream is used for error messages. It is kept separate from <code class="backtick">stdout</code> so that you can see these messages even if you redirect output to a file:</p>
+<pre><code>$ ls no-such-file &gt; /tmp/dummy-output
+ls: no-such-file: No such file or directory</code></pre>
+<h3 id="characterIO"><span class="header-section-number">4.6.2</span> Reading and writing single characters</h3>
+<p>To read a single character from <code class="backtick">stdin</code>, use <code class="backtick">getchar</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> c;
+
+ c = getchar();</code></pre></div>
+<p>The <code class="backtick">getchar</code> routine will return the special value <code class="backtick">EOF</code> (usually -1; short for <em>end of file</em>)
+ if there are no more characters to read, which can happen when you hit
+the end of a file or when the user types the end-of-file key control-D
+to the terminal. Note that the return value of <code class="backtick">getchar</code> is declared to be an <code class="backtick">int</code> since <code class="backtick">EOF</code> lies outside the normal character range.</p>
+<p>To write a single character to <code class="backtick">stdout</code>, use <code class="backtick">putchar</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> putchar('!');</code></pre></div>
+<p>Even though <code class="backtick">putchar</code> can only write single bytes, it takes an <code class="backtick">int</code> as an argument. Any value outside the range 0..255 will be truncated to its last byte, as in the usual conversion from <code class="backtick">int</code> to <code class="backtick">unsigned&nbsp;char</code>.</p>
+<p>Both <code class="backtick">getchar</code> and <code class="backtick">putchar</code> are wrappers for more general routines <code class="backtick">getc</code> and <code class="backtick">putc</code> that allow you to specify which stream you are using. To illustrate <code class="backtick">getc</code> and <code class="backtick">putc</code>, here's how we might define <code class="backtick">getchar</code> and <code class="backtick">putchar</code> if they didn't exist already:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+getchar2(<span class="dt">void</span>)
+{
+ <span class="kw">return</span> getc(stdin);
+}
+
+<span class="dt">int</span>
+putchar2(<span class="dt">int</span> c)
+{
+ <span class="kw">return</span> putc(c, stdout);
+}</code></pre></div>
+<p>Note that <code class="backtick">putc</code>, <code class="backtick">putchar2</code> as defined above, and the original <code class="backtick">putchar</code> all return an <code class="backtick">int</code> rather than <code class="backtick">void</code>; this is so that they can signal whether the write succeeded. If the write succeeded, <code class="backtick">putchar</code> or <code class="backtick">putc</code> will return the value written. If the write failed (say because the disk was full), then <code class="backtick">putc</code> or <code class="backtick">putchar</code> will return <code class="backtick">EOF</code>.</p>
+<p>Here's another example of using <code class="backtick">putc</code> to make a new function <code class="backtick">putcerr</code> that writes a character to <code class="backtick">stderr</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+putcerr(<span class="dt">int</span> c)
+{
+ <span class="kw">return</span> putc(c, stderr);
+}</code></pre></div>
+<p>A rather odd feature of the C standard I/O library is that if you
+don't like the character you just got, you can put it back using the <code class="backtick">ungetc</code> function. The limitations on <code class="backtick">ungetc</code> are that (a) you can only push one character back, and (b) that character can't be <code class="backtick">EOF</code>. The <code class="backtick">ungetc</code>
+ function is provided because it makes certain high-level input tasks
+easier; for example, if you want to parse a number written as a sequence
+ of digits, you need to be able to read characters until you hit the
+first non-digit. But if the non-digit is going to be used elsewhere in
+your program, you don't want to eat it. The solution is to put it back
+using <code class="backtick">ungetc</code>.</p>
+<p>Here's a function that uses <code class="backtick">ungetc</code> to peek at the next character on <code class="backtick">stdin</code> without consuming it:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the next character from stdin without consuming it */</span>
+<span class="dt">int</span>
+peekchar(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> c;
+
+ c = getchar();
+ <span class="kw">if</span>(c != EOF) ungetc(c, stdin); <span class="co">/* puts it back */</span>
+
+ <span class="kw">return</span> c;
+}</code></pre></div>
+<h3 id="Formatted_I.2FO"><span class="header-section-number">4.6.3</span> Formatted I/O</h3>
+<p>Reading and writing data one character at a time can be painful. The C
+ standard I/O library provides several convenient routines for reading
+and writing formatted data. The most commonly used one is <code class="backtick">printf</code>,
+ which takes as arguments a format string followed by zero or more
+values that are filled in to the format string according to patterns
+appearing in it.</p>
+<p>Here are some typical <code class="backtick">printf</code> statements:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> printf(<span class="st">"Hello</span><span class="ch">\n</span><span class="st">"</span>); <span class="co">/* print "Hello" followed by a newline */</span>
+ printf(<span class="st">"%c"</span>, c); <span class="co">/* equivalent to putchar(c) */</span>
+ printf(<span class="st">"%d"</span>, n); <span class="co">/* print n (an int) formatted in decimal */</span>
+ printf(<span class="st">"%u"</span>, n); <span class="co">/* print n (an unsigned int) formatted in decimal */</span>
+ printf(<span class="st">"%o"</span>, n); <span class="co">/* print n (an unsigned int) formatted in octal */</span>
+ printf(<span class="st">"%x"</span>, n); <span class="co">/* print n (an unsigned int) formatted in hexadecimal */</span>
+ printf(<span class="st">"%f"</span>, x); <span class="co">/* print x (a float or double) */</span>
+
+ <span class="co">/* print total (an int) and average (a double) on two lines with labels */</span>
+ printf(<span class="st">"Total: %d</span><span class="ch">\n</span><span class="st">Average: %f</span><span class="ch">\n</span><span class="st">"</span>, total, average);</code></pre></div>
+<p>For a full list of formatting codes see Table B-1 in Kernighan and Ritchie, or run <code class="backtick">man&nbsp;3&nbsp;printf</code>.</p>
+<p>The inverse of <code class="backtick">printf</code> is <code class="backtick">scanf</code>. The <code class="backtick">scanf</code> function reads formatted data from <code class="backtick">stdin</code> according to the format string passed as its first argument and stuffs the results into variables whose <em>addresses</em> are given by the later arguments. This requires prefixing each such argument with the <code class="backtick">&amp;</code> operator, which takes the address of a variable.</p>
+<p>Format strings for <code class="backtick">scanf</code> are close enough to format strings for <code class="backtick">printf</code> that you can usually copy them over directly. However, because <code class="backtick">scanf</code> arguments don't go through argument promotion (where all small integer types are converted to <code class="backtick">int</code> and <code class="backtick">float</code>s are converted to <code class="backtick">double</code>), you have to be much more careful about specifying the type of the argument correctly. For example, while <code>printf("%f", x)</code> will work whether<code>x</code> is a <code>float</code> or a <code>double</code>, <code>scanf("%f", &amp;x)</code> will work only if <code>x</code> is a <code>float</code>, which means that <code>scanf("%lf", &amp;x)</code> is needed if <code>x</code> is in fact a <code>double</code>.</p>
+<p>Some examples:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> scanf(<span class="st">"%c"</span>, &amp;c); <span class="co">/* like c = getchar(); c must be a char; will NOT put EOF in c */</span>
+ scanf(<span class="st">"%d"</span>, &amp;n); <span class="co">/* read an int formatted in decimal */</span>
+ scanf(<span class="st">"%u"</span>, &amp;n); <span class="co">/* read an unsigned int formatted in decimal */</span>
+ scanf(<span class="st">"%o"</span>, &amp;n); <span class="co">/* read an unsigned int formatted in octal */</span>
+ scanf(<span class="st">"%x"</span>, &amp;n); <span class="co">/* read an unsigned int formatted in hexadecimal */</span>
+ scanf(<span class="st">"%f"</span>, &amp;x); <span class="co">/* read a float */</span>
+ scanf(<span class="st">"%lf"</span>, &amp;x); <span class="co">/* read a double */</span>
+
+ <span class="co">/* read total (an int) and average (a float) on two lines with labels */</span>
+ <span class="co">/* (will also work if input is missing newlines or uses other whitespace, see below) */</span>
+ scanf(<span class="st">"Total: %d</span><span class="ch">\n</span><span class="st">Average: %f</span><span class="ch">\n</span><span class="st">"</span>, &amp;total, &amp;average);</code></pre></div>
+<p>For a full list of formatting codes, run <code>man 3 scanf</code>.</p>
+<p>The <code class="backtick">scanf</code> routine usually eats
+whitespace (spaces, tabs, newlines, etc.) in its input whenever it sees a
+ conversion specification or a whitespace character in its format
+string. The one exception is that a <code>%c</code> conversion specifier
+ will not eat whitespace and will instead return the next character
+whether it is whitespace or not. Non-whitespace characters that are not
+part of conversion specifications must match exactly. To detect if <code class="backtick">scanf</code> parsed everything successfully, look at its return value; it returns the number of values it filled in, or <code class="backtick">EOF</code> if it hits end-of-file before filling in any values.</p>
+<p>The <code class="backtick">printf</code> and <code class="backtick">scanf</code> routines are wrappers for <code class="backtick">fprintf</code> and <code class="backtick">fscanf</code>, which take a stream as their first argument, e.g.:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> fprintf(stderr, <span class="st">"BUILDING ON FIRE, %d%% BURNT!!!</span><span class="ch">\n</span><span class="st">"</span>, percentage);</code></pre></div>
+<p>This sends the output the the standard error output handle <code>stderr</code>. Note the use of "%%" to print a single percent in the output.</p>
+<h3 id="Rolling_your_own_I.2FO_routines"><span class="header-section-number">4.6.4</span> Rolling your own I/O routines</h3>
+<p>Since we can write our own functions in C, if we don't like what the
+standard routines do, we can build our own on top of them. For example,
+here's a function that reads in integer values without leading minus
+signs and returns the result. It uses the <code class="backtick">peekchar</code> routine we defined above, as well as the <code class="backtick">isdigit</code> routine declared in <code class="backtick">ctype.h</code>.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* read an integer written in decimal notation from stdin until the first</span>
+<span class="co"> * non-digit and return it. Returns 0 if there are no digits. */</span>
+<span class="dt">int</span>
+readNumber(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> accumulator; <span class="co">/* the number so far */</span>
+ <span class="dt">int</span> c; <span class="co">/* next character */</span>
+
+ accumulator = <span class="dv">0</span>;
+
+ <span class="kw">while</span>((c = peekchar()) != EOF &amp;&amp; isdigit(c)) {
+ c = getchar(); <span class="co">/* consume it */</span>
+ accumulator *= <span class="dv">10</span>; <span class="co">/* shift previous digits over */</span>
+ accumulator += (c - '<span class="dv">0</span>'); <span class="co">/* add decimal value of new digit */</span>
+ }
+
+ <span class="kw">return</span> accumulator;
+}</code></pre></div>
+<p>Here's another implementation that does <em>almost</em> the same thing:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+readNumber2(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> n;
+
+ <span class="kw">if</span>(scanf(<span class="st">"%u"</span>, &amp;n) == <span class="dv">1</span>) {
+ <span class="kw">return</span> n;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+}</code></pre></div>
+<p>The difference is that <code class="backtick">readNumber2</code> will consume any whitespace before the first digit, which may or may not be what we want.</p>
+<p>More complex routines can be used to parse more complex input. For example, here's a routine that uses <code class="backtick">readNumber</code> to parse simple arithmetic expressions, where each expression is either a number or of the form <code class="backtick">(</code><em>expression</em><code class="backtick">+</code><em>expression</em><code class="backtick">)</code> or <code class="backtick">(</code><em>expression</em><code class="backtick">*</code><em>expression</em><code class="backtick">)</code>.
+ The return value is the value of the expression after adding together
+or multiplying all of its subexpressions. (A complete program including
+this routine and the others defined earlier that it uses can be found <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/IO/calc.c" class="uri">examples/IO/calc.c</a>.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define EXPRESSION_ERROR (-1)</span>
+
+<span class="co">/* read an expression from stdin and return its value */</span>
+<span class="co">/* returns EXPRESSION_ERROR on error */</span>
+<span class="dt">int</span>
+readExpression(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> e1; <span class="co">/* value of first sub-expression */</span>
+ <span class="dt">int</span> e2; <span class="co">/* value of second sub-expression */</span>
+ <span class="dt">int</span> c;
+ <span class="dt">int</span> op; <span class="co">/* operation: '+' or '*' */</span>
+
+ c = peekchar();
+
+ <span class="kw">if</span>(c == '(') {
+ c = getchar();
+
+ e1 = readExpression();
+ op = getchar();
+ e2 = readExpression();
+
+ c = getchar(); <span class="co">/* this had better be ')' */</span>
+ <span class="kw">if</span>(c != ')') <span class="kw">return</span> EXPRESSION_ERROR;
+
+ <span class="co">/* else */</span>
+ <span class="kw">switch</span>(op) {
+ <span class="kw">case</span> '*':
+ <span class="kw">return</span> e1*e2;
+ <span class="kw">break</span>;
+ <span class="kw">case</span> '+':
+ <span class="kw">return</span> e1+e2;
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ <span class="kw">return</span> EXPRESSION_ERROR;
+ <span class="kw">break</span>;
+ }
+ } <span class="kw">else</span> <span class="kw">if</span>(isdigit(c)) {
+ <span class="kw">return</span> readNumber();
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> EXPRESSION_ERROR;
+ }
+}</code></pre></div>
+<p>Because this routine calls itself recursively as it works its way down through the input, it is an example of a <a href="http://en.wikipedia.org/wiki/Recursive_descent_parser" title="WikiPedia">recursive descent parser</a>.
+ Parsers for more complicated languages like C are usually not written
+by hand like this, but are instead constructed mechanically using a <a href="http://en.wikipedia.org/wiki/Parser_generator" title="WikiPedia">Parser generator</a>.</p>
+<h3 id="File_I.2FO"><span class="header-section-number">4.6.5</span> File I/O</h3>
+<p>Reading and writing files is done by creating new streams attached to the files. The function that does this is <code class="backtick">fopen</code>.
+ It takes two arguments: a filename, and a flag that controls whether
+the file is opened for reading or writing. The return value of <code class="backtick">fopen</code> has type <code class="backtick">FILE&nbsp;*</code> and can be used in <code class="backtick">putc</code>, <code class="backtick">getc</code>, <code class="backtick">fprintf</code>, etc. just like <code class="backtick">stdin</code>, <code class="backtick">stdout</code>, or <code class="backtick">stderr</code>. When you are done using a stream, you should close it using <code class="backtick">fclose</code>.</p>
+<p>Here's a program that reads a list of numbers from a file whose name is given as <code class="backtick">argv[1]</code> and prints their sum:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ FILE *f;
+ <span class="dt">int</span> x;
+ <span class="dt">int</span> sum;
+
+ <span class="kw">if</span>(argc &lt; <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s filename</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ exit(<span class="dv">1</span>);
+ }
+
+ f = fopen(argv[<span class="dv">1</span>], <span class="st">"r"</span>);
+ <span class="kw">if</span>(f == <span class="dv">0</span>) {
+ <span class="co">/* perror is a standard C library routine */</span>
+ <span class="co">/* that prints a message about the last failed library routine */</span>
+ <span class="co">/* prepended by its argument */</span>
+ perror(filename);
+ exit(<span class="dv">2</span>);
+ }
+
+ <span class="co">/* else everything is ok */</span>
+ sum = <span class="dv">0</span>;
+ <span class="kw">while</span>(fscanf(<span class="st">"%d"</span>, &amp;x) == <span class="dv">1</span>) {
+ sum += x;
+ }
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, sum);
+
+ <span class="co">/* not strictly necessary but it's polite */</span>
+ fclose(f);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/IO/sum.c" class="uri">examples/IO/sum.c</a>
+</div>
+<p>To write to a file, open it with <code class="backtick">fopen(filename,&nbsp;"w")</code>. Note that as soon as you call <code class="backtick">fopen</code> with the <code class="backtick">"w"</code> flag, any previous contents of the file are erased. If you want to append to the end of an existing file, use <code class="backtick">"a"</code> instead. You can also add <code class="backtick">+</code> onto the flag if you want to read and write the same file (this will probably involve using <code class="backtick">fseek</code>).</p>
+<p>Some operating systems (Windows) make a distinction between text and
+binary files. For text files, use the same arguments as above. For
+binary files, add a <code class="backtick">b</code>, e.g. <code class="backtick">fopen(filename,&nbsp;"wb")</code> to write a binary file.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* leave a greeting in the current directory */</span>
+
+<span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#define FILENAME "hello.txt"</span>
+<span class="ot">#define MESSAGE "hello world"</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ FILE *f;
+
+ f = fopen(FILENAME, <span class="st">"w"</span>);
+ <span class="kw">if</span>(f == <span class="dv">0</span>) {
+ perror(FILENAME);
+ exit(<span class="dv">1</span>);
+ }
+
+ <span class="co">/* unlike puts, fputs doesn't add a newline */</span>
+ fputs(MESSAGE, f);
+ putc(<span class="ch">'\n'</span>, f);
+
+ fclose(f);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/IO/helloFile.c" class="uri">examples/IO/helloFile.c</a>
+</div>
+<h2 id="statements"><span class="header-section-number">4.7</span> Statements and control structures</h2>
+<p>The bodies of C functions (including the <code class="backtick">main</code> function) are made up of <strong>statements</strong>. These can either be <strong>simple statements</strong> that do not contain other statements, or <strong>compound statements</strong> that have other statements inside them. <strong>Control structures</strong>
+ are compound statements like if/then/else, while, for, and do..while
+that control how or whether their component statements are executed.</p>
+<h3 id="Simple_statements"><span class="header-section-number">4.7.1</span> Simple statements</h3>
+<p>The simplest kind of statement in C is an expression (followed by a
+semicolon, the terminator for all simple statements). Its value is
+computed and discarded. Examples:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> x = <span class="dv">2</span>; <span class="co">/* an assignment statement */</span>
+ x = <span class="dv">2+3</span>; <span class="co">/* another assignment statement */</span>
+ <span class="dv">2+3</span>; <span class="co">/* has no effect---will be discarded by smart compilers */</span>
+ puts(<span class="st">"hi"</span>); <span class="co">/* a statement containing a function call */</span>
+ root2 = sqrt(<span class="dv">2</span>); <span class="co">/* an assignment statement with a function call */</span></code></pre></div>
+<p>Most statements in a typical C program are simple statements of this form.</p>
+<p>Other examples of simple statements are the jump statements <code class="backtick">return</code>, <code class="backtick">break</code>, <code class="backtick">continue</code>, and <code class="backtick">goto</code>. A <code class="backtick">return</code>
+ statement specifies the return value for a function (if there is one),
+and when executed it causes the function to exit immediately. The <code class="backtick">break</code> and <code class="backtick">continue</code> statements jump immediately to the end of a loop (or <code class="backtick">switch</code>; see below) or the next iteration of a loop; we'll talk about these more when we talk about loops. The <code class="backtick">goto</code>
+ statement jumps to another location in the same function, and exists
+for the rare occasions when it is needed. Using it in most circumstances
+ is a sin.</p>
+<h3 id="Compound_statements"><span class="header-section-number">4.7.2</span> Compound statements</h3>
+<p>Compound statements come in two varieties: conditionals and loops.</p>
+<h4 id="conditionals"><span class="header-section-number">4.7.2.1</span> Conditionals</h4>
+<p>These are compound statements that test some condition and execute
+one or another block depending on the outcome of the condition. The
+simplest is the <code class="backtick">if</code> statement:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(houseIsOnFire) {
+ <span class="co">/* ouch! */</span>
+ scream();
+ runAway();
+ }</code></pre></div>
+<p>The <strong>body</strong> of the <code class="backtick">if</code>
+statement is executed only if the expression in parentheses at the top
+evaluates to true (which in C means any value that is not 0).</p>
+<p>The braces are not strictly required, and are used only to group one
+or more statements into a single statement. If there is only one
+statement in the body, the braces can be omitted:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(programmerIsLazy) omitBraces();</code></pre></div>
+<p>This style is recommended only for very simple bodies. Omitting the
+braces makes it harder to add more statements later without errors.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(underAttack)
+ launchCounterAttack(); <span class="co">/* executed only when attacked */</span>
+ hideInBunker(); <span class="co">/* ### DO NOT INDENT LIKE THIS ### executed always */</span></code></pre></div>
+<p>In the example above, the lack of braces means that the <code class="backtick">hideInBunker()</code> statement is <em>not</em> part of the <code class="backtick">if</code> statement, despite the misleading indentation. This sort of thing is why I generally always put in braces in an <code class="backtick">if</code>.</p>
+<p>An <code class="backtick">if</code> statement may have an <code class="backtick">else</code> clause, whose body is executed if the test is false (i.e. equal to 0).</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(happy) {
+ smile();
+ } <span class="kw">else</span> {
+ frown();
+ }</code></pre></div>
+<p>A common idiom is to have a chain of <code class="backtick">if</code> and <code class="backtick">else&nbsp;if</code> branches that test several conditions:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(temperature &lt; <span class="dv">0</span>) {
+ puts(<span class="st">"brrr"</span>);
+ } <span class="kw">else</span> <span class="kw">if</span>(temperature &lt; <span class="dv">100</span>) {
+ puts(<span class="st">"hooray"</span>);
+ } <span class="kw">else</span> {
+ puts(<span class="st">"ouch!"</span>);
+ }</code></pre></div>
+<p>This can be inefficient if there are a lot of cases, since the tests are applied sequentially. For tests of the form <em>&lt;expression&gt;</em> <code class="backtick">==</code> <em>&lt;small constant&gt;</em>, the <code class="backtick">switch</code> statement may provide a faster alternative. Here's a typical <code class="backtick">switch</code> statement:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="co">/* print plural of cow, maybe using the obsolete dual number construction */</span>
+ <span class="kw">switch</span>(numberOfCows) {
+ <span class="kw">case</span> <span class="dv">1</span>:
+ puts(<span class="st">"cow"</span>);
+ <span class="kw">break</span>;
+ <span class="kw">case</span> <span class="dv">2</span>:
+ puts(<span class="st">"cowen"</span>);
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ puts(<span class="st">"cows"</span>);
+ <span class="kw">break</span>;
+ }</code></pre></div>
+<p>This prints the string "cow" if there is one cow, "cowen" if there
+are two cowen, and "cows" if there are any other number of cows. The <code class="backtick">switch</code> statement evaluates its argument and jumps to the matching <code class="backtick">case</code> label, or to the <code class="backtick">default</code> label if none of the cases match. Cases must be constant integer values.</p>
+<p>The <code class="backtick">break</code> statements inside the block jump to the end of the block. Without them, executing the <code class="backtick">switch</code> with <code class="backtick">numberOfCows</code>
+ equal to 1 would print all three lines. This can be useful in some
+circumstances where the same code should be used for more than one case:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">switch</span>(c) {
+ <span class="kw">case</span> 'a':
+ <span class="kw">case</span> 'e':
+ <span class="kw">case</span> 'i':
+ <span class="kw">case</span> 'o':
+ <span class="kw">case</span> 'u':
+ type = VOWEL;
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ type = CONSONANT;
+ <span class="kw">break</span>;
+ }</code></pre></div>
+<p>or when a case "falls through" to the next:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">switch</span>(countdownStart) {
+ <span class="kw">case</span> <span class="dv">3</span>:
+ puts(<span class="st">"3"</span>);
+ <span class="kw">case</span> <span class="dv">2</span>:
+ puts(<span class="st">"2"</span>);
+ <span class="kw">case</span> <span class="dv">1</span>:
+ puts(<span class="st">"1"</span>)
+ <span class="kw">case</span> <span class="dv">0</span>:
+ puts(<span class="st">"KABLOOIE!"</span>);
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ puts(<span class="st">"I can't count that high!"</span>);
+ <span class="kw">break</span>;
+ }</code></pre></div>
+<p>Note that it is customary to include a <code class="backtick">break</code>
+ on the last case even though it has no effect; this avoids problems
+later if a new case is added. It is also customary to include a <code class="backtick">default</code> case even if the other cases supposedly exhaust all the possible values, as a check against bad or unanticipated inputs.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">switch</span>(oliveSize) {
+ <span class="kw">case</span> JUMBO:
+ eatOlives(SLOWLY);
+ <span class="kw">break</span>;
+ <span class="kw">case</span> COLLOSSAL:
+ eatOlives(QUICKLY);
+ <span class="kw">break</span>;
+ <span class="kw">case</span> SUPER_COLLOSSAL:
+ eatOlives(ABSURDLY);
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ <span class="co">/* unknown size! */</span>
+ abort();
+ <span class="kw">break</span>;
+ }</code></pre></div>
+<p>Though <code class="backtick">switch</code> statements are better
+than deeply nested if/else-if constructions, it is often even better to
+organize the different cases as data rather than code. We'll see
+examples of this when we talk about <a href="#functionPointers">function pointers</a>.</p>
+<p>Nothing in the C standards prevents the <code class="backtick">case</code> labels from being buried inside other compound statements. One rather hideous application of this fact is <a href="http://en.wikipedia.org/wiki/Duff%27s_device" title="WikiPedia">Duff's device</a>.</p>
+<h4 id="Loops"><span class="header-section-number">4.7.2.2</span> Loops</h4>
+<p>There are three kinds of loops in C.</p>
+<h5 id="The_while_loop"><span class="header-section-number">4.7.2.2.1</span> The while loop</h5>
+<p>A <code class="backtick">while</code> loop tests if a condition is
+true, and if so, executes its body. It then tests the condition is true
+again, and keeps executing the body as long as it is. Here's a program
+that deletes every occurence of the letter <code class="backtick">e</code> from its input.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> c;
+
+ <span class="kw">while</span>((c = getchar()) != EOF) {
+ <span class="kw">switch</span>(c) {
+ <span class="kw">case</span> 'e':
+ <span class="kw">case</span> 'E':
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ putchar(c);
+ <span class="kw">break</span>;
+ }
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<p>Note that the expression inside the <code class="backtick">while</code> argument both assigns the return value of <code class="backtick">getchar</code> to <code class="backtick">c</code> and tests to see if it is equal to <code class="backtick">EOF</code>
+ (which is returned when no more input characters are available). This
+is a very common idiom in C programs. Note also that even though <code class="backtick">c</code> holds a single character, it is declared as an <code class="backtick">int</code>. The reason is that <code class="backtick">EOF</code> (a constant defined in <code class="backtick">stdio.h</code>) is outside the normal character range, and if you assign it to a variable of type <code class="backtick">char</code>
+ it will be quietly truncated into something else. Because C doesn't
+provide any sort of exception mechanism for signalling unusual outcomes
+of function calls, designers of library functions often have to resort
+to extending the output of a function to include an extra value or two
+to signal failure; we'll see this a lot when the null pointer shows up
+in the chapter on <a href="#pointers">pointers</a>.</p>
+<h5 id="The_do..while_loop"><span class="header-section-number">4.7.2.2.2</span> The do..while loop</h5>
+<p>The <code class="backtick">do</code>..<code class="backtick">while</code> statement is like the <code class="backtick">while</code>
+ statement except the test is done at the end of the loop instead of the
+ beginning. This means that the body of the loop is always executed at
+least once.</p>
+<p>Here's a loop that does a random walk until it gets back to 0 (if ever). If we changed the <code class="backtick">do</code>..<code class="backtick">while</code> loop to a <code class="backtick">while</code> loop, it would never take the first step, because <code class="backtick">pos</code> starts at 0.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;time.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> pos = <span class="dv">0</span>; <span class="co">/* position of random walk */</span>
+
+ srandom(time(<span class="dv">0</span>)); <span class="co">/* initialize random number generator */</span>
+
+ <span class="kw">do</span> {
+ pos += random() &amp; <span class="bn">0x1</span> ? +<span class="dv">1</span> : -<span class="dv">1</span>;
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, pos);
+ } <span class="kw">while</span>(pos != <span class="dv">0</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/statements/randomWalk.c" class="uri">examples/statements/randomWalk.c</a>
+</div>
+<p>The <code class="backtick">do</code>..<code class="backtick">while</code> loop is used much less often in practice than the <code class="backtick">while</code> loop.</p>
+<p>It is theoretically possible to convert a <code class="backtick">do</code>..<code class="backtick">while</code> loop to a <code class="backtick">while</code>
+ loop by making an extra copy of the body in front of the loop, but this
+ is not recommended since it's almost always a bad idea to duplicate
+code.</p>
+<h5 id="forLoop"><span class="header-section-number">4.7.2.2.3</span> The for loop</h5>
+<p>The <code class="backtick">for</code> loop is a form of <a href="http://en.wikipedia.org/wiki/Syntactic_sugar">syntactic sugar</a>
+ that is used when a loop iterates over a sequence of values stored in
+some variable (or variables). Its argument consists of three
+expressions: the first initializes the variable and is called once when
+the statement is first reached. The second is the test to see if the
+body of the loop should be executed; it has the same function as the
+test in a <code class="backtick">while</code> loop. The third sets the variable to its next value. Some examples:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="co">/* count from 0 to 9 */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">10</span>; i++) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+
+ <span class="co">/* and back from 10 to 0 */</span>
+ <span class="kw">for</span>(i = <span class="dv">10</span>; i &gt;= <span class="dv">0</span>; i--) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+
+ <span class="co">/* this loop uses some functions to move around */</span>
+ <span class="kw">for</span>(c = firstCustomer(); c != END_OF_CUSTOMERS; c = customerAfter(c)) {
+ helpCustomer(c);
+ }
+
+ <span class="co">/* this loop prints powers of 2 that are less than n*/</span>
+ <span class="kw">for</span>(i = <span class="dv">1</span>; i &lt; n; i *= <span class="dv">2</span>) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+
+ <span class="co">/* this loop does the same thing with two variables by using the comma operator */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>, power = <span class="dv">1</span>; power &lt; n; i++, power *= <span class="dv">2</span>) {
+ printf(<span class="st">"2^%d = %d</span><span class="ch">\n</span><span class="st">"</span>, i, power);
+ }
+
+ <span class="co">/* Here are some nested loops that print a times table */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; n; j++) {
+ printf(<span class="st">"%d*%d=%d "</span>, i, j, i*j);
+ }
+ putchar(<span class="ch">'\n'</span>);
+ }</code></pre></div>
+<p>A <code class="backtick">for</code> loop can always be rewritten as a <code class="backtick">while</code> loop.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">10</span>; i++) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+
+ <span class="co">/* is exactly the same as */</span>
+
+ i = <span class="dv">0</span>;
+ <span class="kw">while</span>(i &lt; <span class="dv">10</span>) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ i++;
+ }</code></pre></div>
+<h5 id="Loops_with_break.2C_continue.2C_and_goto"><span class="header-section-number">4.7.2.2.4</span> Loops with break, continue, and goto</h5>
+<p>The <code class="backtick">break</code> statement immediately exits the innermmost enclosing loop or <code class="backtick">switch</code> statement.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ openDoorNumber(i);
+ <span class="kw">if</span>(boobyTrapped()) {
+ <span class="kw">break</span>;
+ }
+ }</code></pre></div>
+<p>The <code class="backtick">continue</code> statement skips to the
+next iteration. Here is a program with a loop that iterates through all
+the integers from -10 through 10, skipping 0:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="co">/* print a table of inverses */</span>
+<span class="ot">#define MAXN (10)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> n;
+
+ <span class="kw">for</span>(n = -MAXN; n &lt;= MAXN; n++) {
+ <span class="kw">if</span>(n == <span class="dv">0</span>) <span class="kw">continue</span>;
+ printf(<span class="st">"1.0/%3d = %+f</span><span class="ch">\n</span><span class="st">"</span>, n, <span class="fl">1.0</span>/n);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/statements/inverses.c" class="uri">examples/statements/inverses.c</a>
+</div>
+<p>Occasionally, one would like to break out of more than one nested loop. The way to do this is with a <code class="backtick">goto</code> statement.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; n; j++) {
+ doSomethingTimeConsumingWith(i, j);
+ <span class="kw">if</span>(checkWatch() == OUT_OF_TIME) {
+ <span class="kw">goto</span> giveUp;
+ }
+ }
+ }
+giveUp:
+ puts(<span class="st">"done"</span>);</code></pre></div>
+<p>The target for the <code class="backtick">goto</code> is a <strong>label</strong>, which is just an identifier followed by a colon and a statement (the empty statement <code class="backtick">;</code> is ok).</p>
+<p>The <code class="backtick">goto</code> statement can be used to jump
+anywhere within the same function body, but breaking out of nested loops
+ is widely considered to be its only genuinely acceptable use in normal
+code.</p>
+<h4 id="Choosing_where_to_put_a_loop_exit"><span class="header-section-number">4.7.2.3</span> Choosing where to put a loop exit</h4>
+<p>Choosing where to put a loop exit is usually pretty obvious: you want
+ it after any code that you want to execute at least once, and before
+any code that you want to execute only if the termination test fails.</p>
+<p>If you know in advance what values you are going to be iterating over, you will most likely be using a <code class="backtick">for</code> loop:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ a[i] = <span class="dv">0</span>;
+}</code></pre></div>
+<p>Most of the rest of the time, you will want a <code class="backtick">while</code> loop:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">while</span>(!done()) {
+ doSomething();
+}</code></pre></div>
+<p>The <code class="backtick">do</code>..<code class="backtick">while</code> loop comes up mostly when you want to try something, then try again if it failed:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">do</span> {
+ result = fetchWebPage(url);
+} <span class="kw">while</span>(result == <span class="dv">0</span>);</code></pre></div>
+<p>Finally, leaving a loop in the middle using <code class="backtick">break</code> can be handy if you have something extra to do before trying again:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">for</span>(;;) {
+ result = fetchWebPage(url);
+ <span class="kw">if</span>(result != <span class="dv">0</span>) {
+ <span class="kw">break</span>;
+ }
+ <span class="co">/* else */</span>
+ fprintf(stderr, <span class="st">"fetchWebPage failed with error code %03d</span><span class="ch">\n</span><span class="st">"</span>, result);
+ sleep(retryDelay); <span class="co">/* wait before trying again */</span>
+}</code></pre></div>
+<p>(Note the empty <code class="backtick">for</code> loop header means to loop forever; <code class="backtick">while(1)</code> also works.)</p>
+<h2 id="functions"><span class="header-section-number">4.8</span> Functions</h2>
+<p>A <strong>function</strong>, <strong>procedure</strong>, or <strong>subroutine</strong> encapsulates some complex computation as a single operation. Typically, when we <strong>call</strong> a function, we pass as <strong>arguments</strong> all the information this function needs, and any effect it has will be reflected in either its <strong>return value</strong>
+ or (in some cases) in changes to values pointed to by the arguments.
+Inside the function, the arguments are copied into local variables,
+which can be used just like any other local variable—they can even be
+assigned to without affecting the original argument.</p>
+<h3 id="functionDefinitions"><span class="header-section-number">4.8.1</span> Function definitions</h3>
+<p>A typical function definition looks like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Returns the square of the distance between two points separated by </span>
+<span class="co"> dx in the x direction and dy in the y direction. */</span>
+<span class="dt">int</span>
+distSquared(<span class="dt">int</span> dx, <span class="dt">int</span> dy)
+{
+ <span class="kw">return</span> dx*dx + dy*dy;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/distSquaredNoHeader.c" class="uri">examples/functions/distSquaredNoHeader.c</a>
+</div>
+<p>The part outside the braces is called the <strong>function declaration</strong>; the braces and their contents is the <strong>function body</strong>.</p>
+<p>Like most complex declarations in C, once you delete the type names
+the declaration looks like how the function is used: the name of the
+function comes before the parentheses and the arguments inside. The <code class="backtick">int</code>s
+ scattered about specify the type of the return value of the function
+(before the function name) and of the parameters (inside the parentheses
+ after the function name); these are used by the compiler to determine
+how to pass values in and out of the function and (usually for more
+complex types, since numerical types will often convert automatically)
+to detect type mismatches.</p>
+<p>If you want to define a function that doesn't return anything, declare its return type as <code class="backtick">void</code>. You should also declare a parameter list of <code class="backtick">void</code> if the function takes no arguments.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Prints "hi" to stdout */</span>
+<span class="dt">void</span>
+helloWorld(<span class="dt">void</span>)
+{
+ puts(<span class="st">"hi"</span>);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/helloWorld.c" class="uri">examples/functions/helloWorld.c</a>
+</div>
+<p>It is not strictly speaking an error to omit the second <code class="backtick">void</code> here. Putting <code class="backtick">void</code> in for the parameters tells the compiler to enforce that no arguments are passed in. If we had instead declared <code class="backtick">helloWorld</code> as</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Prints "hi" to stdout */</span>
+<span class="dt">void</span>
+helloWorld() <span class="co">/* DANGER! */</span>
+{
+ puts(<span class="st">"hi"</span>);
+}</code></pre></div>
+<p>it would be possible to call it as</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> helloWorld(<span class="st">"this is a bogus argument"</span>);</code></pre></div>
+<p>without causing an error. The reason is that a function declaration
+with no arguments means that the function can take an unspecified number
+ of arguments, and it's up to the user to make sure they pass in the
+right ones. There are good historical reasons for what may seem like
+obvious lack of sense in the design of the language here, and fixing
+this bug would break most C code written before 1989. But you shouldn't
+ever write a function declaration with an empty argument list, since you
+ want the compiler to know when something goes wrong.</p>
+<h3 id="functionIdeology"><span class="header-section-number">4.8.2</span> When to write a function</h3>
+<p>As with any kind of abstraction, there are two goals to making a function:</p>
+<ul>
+<li><strong>Encapsulation:</strong> If you have some task to carry out
+that is simple do describe from the outside but messy to understand from
+ the inside, wrapping it in a function lets somebody carry out this task
+ without having to know the details. This is also useful if you want to
+change the implementation later.</li>
+<li><strong>Code re-use:</strong> If you find yourself writing the same
+lines of code in several places (or worse, are tempted to copy a block
+of code to several places), you should probably put this code in a
+function (or perhaps more than one function, if there is no succinct way
+ to describe what this block of code is doing).</li>
+</ul>
+<p>Both of these goals may be trumped by the goal of making your code
+understandable. If you can't describe what a function is doing in a
+single, simple sentence, this is a sign that maybe you need to
+restructure your code. Having a function that does more than one thing
+(or does different thing depending on its arguments) is likely to lead
+to confusion. So, for example, this is not a good function definition:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*** ### UGLY CODE AHEAD ### ***/</span>
+
+<span class="co">/*</span>
+<span class="co"> * If getMaximum is true, return maximum of x and y,</span>
+<span class="co"> * else return minimum.</span>
+<span class="co"> */</span>
+<span class="dt">int</span>
+computeMaximumOrMinimum(<span class="dt">int</span> x, <span class="dt">int</span> y, <span class="dt">int</span> getMaximum)
+{
+ <span class="kw">if</span>(x &gt; y) {
+ <span class="kw">if</span>(getMaximum) {
+ <span class="kw">return</span> x;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> y;
+ }
+ } <span class="kw">else</span> {
+ <span class="kw">if</span>(getMaximum) {
+ <span class="kw">return</span> y;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> x;
+ }
+ }
+}</code></pre></div>
+<p>Better would be to write two functions:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the maximum of x and y */</span>
+<span class="dt">int</span>
+maximum(<span class="dt">int</span> x, <span class="dt">int</span> y)
+{
+ <span class="kw">if</span>(x &gt; y) {
+ <span class="kw">return</span> x;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> y;
+ }
+}
+
+<span class="co">/* return the minimum of x and y */</span>
+<span class="dt">int</span>
+minimum(<span class="dt">int</span> x, <span class="dt">int</span> y)
+{
+ <span class="kw">if</span>(x &lt; y) {
+ <span class="kw">return</span> x;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> y;
+ }
+}</code></pre></div>
+<p>At the same time, it's possible for a function to be too simple. Suppose I write the function</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* print x to stdout followed by a newline */</span>
+<span class="dt">void</span>
+printIntWithNewline(<span class="dt">int</span> x)
+{
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, x);
+}</code></pre></div>
+<p>It's pretty clear from the name what this function does. But since anybody who has been using C for a while has seen <code>printf("%d\n", ...)</code> over and over again, it's usually more clear to expand out the definition:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> printIntWithNewline(<span class="dv">2+5</span>); <span class="co">/* this could do anything */</span>
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, <span class="dv">2+7</span>); <span class="co">/* this does exactly what it says */</span></code></pre></div>
+<p>As with all caveats, this caveat comes with its own caveat: what
+might justify a function like this is if you want to be able to do some
+kind of specialized formatting that should be consistent for all values
+of a particular form. So you might write a <code>printDistance</code> function like the above as a stub for a fancier function that might use different units at different scales or something.</p>
+<p>A similar issue will come up with <a href="#nonSyntacticMacros">non-syntactic macros</a>,
+ which also tend to fail the "does this make my code more or less
+understandable" test. Usually it is a bad idea to try to replace common C
+ idioms.</p>
+<h3 id="Calling_a_function"><span class="header-section-number">4.8.3</span> Calling a function</h3>
+<p>A function call consists of the function followed by its arguments
+(if any) inside parentheses, separated by comments. For a function with
+no arguments, call it with nothing between the parentheses. A function
+call that returns a value can be used in an expression just like a
+variable. A call to a <code class="backtick">void</code> function can only be used as an expression by itself:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> totalDistance += distSquared(x1 - x2, y1 - y2);
+ helloWorld();
+ greetings += helloWorld(); <span class="co">/* ERROR */</span></code></pre></div>
+<h3 id="The_return_statement"><span class="header-section-number">4.8.4</span> The return statement</h3>
+<p>To return a value from a function, write a <code class="backtick">return</code> statement, e.g.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">return</span> <span class="dv">172</span>;</code></pre></div>
+<p>The argument to <code class="backtick">return</code> can be any expression. Unlike the expression in, say, an <code class="backtick">if</code> statement, you do not need to wrap it in parentheses. If a function is declared <code class="backtick">void</code>, you can do a <code class="backtick">return</code> with no expression, or just let control reach the end of the function.</p>
+<p>Executing a <code class="backtick">return</code> statement immediately terminates the function. This can be used like <code class="backtick">break</code> to get out of loops early.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* returns 1 if n is prime, 0 otherwise */</span>
+<span class="dt">int</span>
+isPrime(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span> (n &lt; <span class="dv">2</span>) <span class="kw">return</span> <span class="dv">0</span>; <span class="co">/* special case for 0, 1, negative n */</span>
+
+ <span class="kw">for</span>(i = <span class="dv">2</span>; i &lt; n; i++) {
+ <span class="kw">if</span> (n % i == <span class="dv">0</span>) {
+ <span class="co">/* found a factor */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ }
+
+ <span class="co">/* no factors */</span>
+ <span class="kw">return</span> <span class="dv">1</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/isPrime.c" class="uri">examples/functions/isPrime.c</a>
+</div>
+<h3 id="Function_declarations_and_modules"><span class="header-section-number">4.8.5</span> Function declarations and modules</h3>
+<p>By default, functions have <strong>global scope</strong>: they can be used anywhere in your program, even in other files. If a file doesn't contain a declaration for a function <code class="backtick">someFunc</code> before it is used, the compiler will assume that it is declared like <code class="backtick">int&nbsp;someFunc()</code> (i.e., return type <code class="backtick">int</code>
+ and unknown arguments). This can produce infuriating complaints later
+when the compiler hits the real declaration and insists that your
+function <code class="backtick">someFunc</code> should be returning an <code class="backtick">int</code> and you are a bonehead for declaring it otherwise.</p>
+<p>The solution to such insulting compiler behavior errors is to either
+(a) move the function declaration before any functions that use it; or
+(b) put in a declaration without a body before any functions that use
+it, in addition to the declaration that appears in the function
+definition. (Note that this violates the <strong>no separate but equal</strong>
+ rule, but the compiler should tell you when you make a mistake.) Option
+ (b) is generally preferred, and is the only option when the function is
+ used in a different file.</p>
+<p>To make sure that all declarations of a function are consistent, the
+usual practice is to put them in an include file. For example, if <code class="backtick">distSquared</code> is used in a lot of places, we might put it in its own file <code class="backtick">distSquared.c</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include "distSquared.h"</span>
+
+<span class="dt">int</span>
+distSquared(<span class="dt">int</span> dx, <span class="dt">int</span> dy)
+{
+ <span class="kw">return</span> dx*dx + dy*dy;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/distSquared.c" class="uri">examples/functions/distSquared.c</a>
+</div>
+<p>The file <code>distSquared.c</code> above uses <code class="backtick">#include</code> to include a copy of the following header file <code>distSquared.h</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Returns the square of the distance between two points separated by </span>
+<span class="co"> dx in the x direction and dy in the y direction. */</span>
+<span class="dt">int</span> distSquared(<span class="dt">int</span> dx, <span class="dt">int</span> dy);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/distSquared.h" class="uri">examples/functions/distSquared.h</a>
+</div>
+<p>Note that the declaration in <code class="backtick">distSquared.h</code>
+ doesn't have a body. Instead, it's terminated by a semicolon, like a
+variable declaration. It's also worth noting that we moved the
+documenting comment to <code class="backtick">distSquared.h</code>: the idea is that <code class="backtick">distSquared.h</code> is the public face of this (very small one-function) module, and so the explanation of how to use the function should be there.</p>
+<p>The reason <code class="backtick">distSquared.c</code> includes <code class="backtick">distSquared.h</code> is to get the compiler to verify that the declarations in the two files match. But to use the <code class="backtick">distSquared</code> function, we also put <code class="backtick">#include&nbsp;"distSquared.h"</code> at the top of the file that uses it:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include "distSquared.h"</span>
+
+<span class="ot">#define THRESHOLD (100)</span>
+
+<span class="dt">int</span>
+tooClose(<span class="dt">int</span> x1, <span class="dt">int</span> y1, <span class="dt">int</span> x2, <span class="dt">int</span> y2)
+{
+ <span class="kw">return</span> distSquared(x1 - x2, y1 - y2) &lt; THRESHOLD;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/tooClose.c" class="uri">examples/functions/tooClose.c</a>
+</div>
+<p>The <code class="backtick">#include</code> on line 1 uses double quotes instead of angle brackets; this tells the compiler to look for <code class="backtick">distSquared.h</code> in the current directory instead of the system include directory (typically <code class="backtick">/usr/include</code>).</p>
+<h3 id="Static_functions"><span class="header-section-number">4.8.6</span> Static functions</h3>
+<p>By default, all functions are global; they can be used in any file of
+ your program whether or not a declaration appears in a header file. To
+restrict access to the current file, declare a function <code class="backtick">static</code>, like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">static</span> <span class="dt">void</span>
+helloHelper(<span class="dt">void</span>)
+{
+ puts(<span class="st">"hi!"</span>);
+}
+
+<span class="dt">void</span>
+hello(<span class="dt">int</span> repetitions)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; repetitions; i++) {
+ helloHelper();
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/staticHello.c" class="uri">examples/functions/staticHello.c</a>
+</div>
+<p>The function <code class="backtick">hello</code> will be visible everywhere. The function <code class="backtick">helloHelper</code> will only be visible in the current file.</p>
+<p>It's generally good practice to declare a function static unless you intend to make it available, since not doing so can cause <strong>namespace conflicts</strong>,
+ where the presence of two functions with the same name either prevent
+the program from linking or—even worse—cause the wrong function to be
+called. The latter can happen with library functions, since C allows the
+ programmer to override library functions by defining a new function
+with the same name. Early on in my career as a C programmer, I once had a
+ program fail in a spectacularly incomprehensible way because I'd
+written a <code class="backtick">select</code> function without realizing that <code class="backtick">select</code> is a core library function in Unix.</p>
+<h3 id="Local_variables"><span class="header-section-number">4.8.7</span> Local variables</h3>
+<p>A function may contain definitions of <strong>local variables</strong>,
+ which are visible only inside the function and which survive only until
+ the function returns. These may be declared at the start of any block
+(group of statements enclosed by braces), but it is conventional to
+declare all of them at the outermost block of the function.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Given n, compute n! = 1*2*...*n */</span>
+<span class="co">/* Warning: will overflow on 32-bit machines if n &gt; 12 */</span>
+<span class="dt">int</span>
+factorial(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> product;
+
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) <span class="kw">return</span> n;
+ <span class="co">/* else */</span>
+
+ product = <span class="dv">1</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">2</span>; i &lt;= n; i++) {
+ product *= i;
+ }
+
+ <span class="kw">return</span> product;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/factorial.c" class="uri">examples/functions/factorial.c</a>
+</div>
+<h3 id="Mechanics_of_function_calls"><span class="header-section-number">4.8.8</span> Mechanics of function calls</h3>
+<p>Several things happen under the hood when a function is called. Since
+ a function can be called from several different places, the CPU needs
+to store its previous state to know where to go back. It also needs to
+allocate space for function arguments and local variables.</p>
+<p>Some of this information will be stored in <strong>registers</strong>, memory locations built into the CPU itself, but most will go on the <strong>stack</strong>,
+ a region of memory that on typical machines grows downward, even though
+ the most recent additions to the stack are called the "top" of the
+stack. The location of the top of the stack is stored in the CPU in a
+special register called the <strong>stack pointer</strong>.</p>
+<p>So a typical function call looks like this internally:</p>
+<ol style="list-style-type: decimal">
+<li>The current <strong>instruction pointer</strong> or <strong>program counter</strong> value, which gives the address of the next line of machine code to be executed, is pushed onto the stack.</li>
+<li>Any arguments to the function are copied either into specially
+designated registers or onto new locations on the stack. The exact rules
+ for how to do this vary from one CPU architecture to the next, but a
+typical convention might be that the first few arguments are copied into
+ registers and the rest (if any) go on the stack.</li>
+<li>The instruction pointer is set to the first instruction in the code for the function.</li>
+<li>The code for the function allocates additional space on the stack to
+ hold its local variables (if any) and to save copies of the values of
+any registers it wants to use (so that it can restore their contents
+before returning to its caller).</li>
+<li>The function body is executed until it hits a <code class="backtick">return</code> statement.</li>
+<li>Returning from the function is the reverse of invoking it: any saved
+ registers are restored from the stack, the return value is copied to a
+standard register, and the values of the instruction pointer and stack
+pointer are restored to what they were before the function call.</li>
+</ol>
+<p>From the programmer's perspective, the important point is that both
+the arguments and the local variables inside a function are stored in
+freshly-allocated locations that are thrown away after the function
+exits. So after a function call the state of the CPU is restored to its
+previous state, except for the return value. Any arguments that are
+passed to a function are passed as copies, so changing the values of the
+ function arguments inside the function has no effect on the caller. Any
+ information stored in local variables is lost.</p>
+<p>Under very rare circumstances, it may be useful to have a variable
+local to a function that persists from one function call to the next.
+You can do so by declaring the variable <code class="backtick">static</code>. For example, here is a function that counts how many times it has been called:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the number of times the function has been called */</span>
+<span class="dt">int</span>
+counter(<span class="dt">void</span>)
+{
+ <span class="dt">static</span> count = <span class="dv">0</span>;
+
+ <span class="kw">return</span> ++count;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/staticCounter.c" class="uri">examples/functions/staticCounter.c</a>
+</div>
+<p>Static local variables are stored outside the stack with global
+variables, and have unbounded extent. But they are only visible inside
+the function that declares them. This makes them slightly less dangerous
+ than global variables—there is no fear that some foolish bit of code
+elsewhere will quietly change their value—but it is still the case that
+they usually aren't what you want. It is also likely that operations on
+static variables will be slightly slower than operations on ordinary
+("automatic") variables, since making them persistent means that they
+have to be stored in (slow) main memory instead of (fast) registers.</p>
+<h2 id="pointers"><span class="header-section-number">4.9</span> Pointers</h2>
+<h3 id="addressSpace"><span class="header-section-number">4.9.1</span> Memory and addresses</h3>
+<p>Memory in a typical modern computer is divided into two classes: a small number of <strong>registers</strong>,
+ which live on the CPU chip and perform specialized functions like
+keeping track of the location of the next machine code instruction to
+execute or the current stack frame, and <strong>main memory</strong>,
+which (mostly) lives outside the CPU chip and which stores the code and
+data of a running program. When the CPU wants to fetch a value from a
+particular location in main memory, it must supply an address: a 32-bit
+or 64-bit unsigned integer on typical current architectures, referring
+to one of up to 2<sup>32</sup> or 2<sup>64</sup> distinct 8-bit locations in the memory. These integers can be manipulated like any other integer; in C, they appear as <strong>pointers</strong>, a family of types that can be passed as arguments, stored in variables, returned from functions, etc.</p>
+<h3 id="Pointer_variables"><span class="header-section-number">4.9.2</span> Pointer variables</h3>
+<p>A <strong>pointer variable</strong> is a variable that holds a pointer, just like an <code>int</code> variable is a variable that holds an <code>int</code>.</p>
+<h4 id="Declaring_a_pointer_variable"><span class="header-section-number">4.9.2.1</span> Declaring a pointer variable</h4>
+<p>The convention is C is that the declaration of a complex type looks
+like its use. To declare a pointer-valued variable, write a declaration
+for the thing that it points to, but include a <code class="backtick">*</code> before the variable name:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> *pointerToInt;
+ <span class="dt">double</span> *pointerToDouble;
+ <span class="dt">char</span> *pointerToChar;
+ <span class="dt">char</span> **pointerToPointerToChar;</code></pre></div>
+<p>These declarations create four pointer variables, named <code>pointerToInt</code>, <code>pointerToDouble</code>, <code>pointerToChar</code>, and <code>pointerToPointerToChar</code>. On a typical 64-bit machine, each will be allocated 8 bytes, enough to represent an address in memory.</p>
+<p>The contents of these variables are initially arbitrary: to use them,
+ you will need to compute the address of something and assign it to the
+variable.</p>
+<h4 id="Assigning_to_pointer_variables"><span class="header-section-number">4.9.2.2</span> Assigning to pointer variables</h4>
+<p>Declaring a pointer-valued variable allocates space to hold the pointer but <em>not</em>
+ to hold anything it points to. Like any other variable in C, a
+pointer-valued variable will initially contain garbage—in this case, the
+ address of a location that might or might not contain something
+important. To initialize a pointer variable, you have to assign to it
+the address of something that already exists. Typically this is done
+using the <code class="backtick">&amp;</code> (<strong>address-of</strong>) operator:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> n; <span class="co">/* an int variable */</span>
+ <span class="dt">int</span> *p; <span class="co">/* a pointer to an int */</span>
+
+ p = &amp;n; <span class="co">/* p now points to n */</span></code></pre></div>
+<h4 id="Using_a_pointer"><span class="header-section-number">4.9.2.3</span> Using a pointer</h4>
+<p>Pointer variables can be used in two ways. The simplest way is to get
+ their value as with any other variable. This value will be an address,
+which can be stored in another pointer variable of the same type.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> n; <span class="co">/* an int variable */</span>
+ <span class="dt">int</span> *p; <span class="co">/* a pointer to an int */</span>
+ <span class="dt">int</span> *q; <span class="co">/* another pointer to an int */</span>
+
+ p = &amp;n; <span class="co">/* p now points to n */</span>
+ q = p; <span class="co">/* q now points to n as well */</span></code></pre></div>
+<p>But more often you will want to work on the value stored at the location pointed to. You can do this by using the <code class="backtick">*</code> (<strong>dereference</strong>) operator, which acts as an inverse of the address-of operator:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> n; <span class="co">/* an int variable */</span>
+ <span class="dt">int</span> *p; <span class="co">/* a pointer to an int */</span>
+
+ p = &amp;n; <span class="co">/* p now points to n */</span>
+
+ *p = <span class="dv">2</span>; <span class="co">/* sets n to 2 */</span>
+ *p = *p + *p; <span class="co">/* sets n to 4 */</span></code></pre></div>
+<p>The <code class="backtick">*</code> operator binds very tightly, so you can usually use <code class="backtick">*p</code> anywhere you could use the variable it points to without worrying about parentheses. However, a few operators, such as the <code class="backtick">--</code> and <code class="backtick">++</code> operators and the <code class="backtick">.</code> operator used to unpack <a href="#structs">structs</a>, bind tighter. These require parentheses if you want the <code class="backtick">*</code> to take precedence.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> (*p)++; <span class="co">/* increment the value pointed to by p */</span>
+ *p++; <span class="co">/* WARNING: increments p itself */</span></code></pre></div>
+<h4 id="Printing_pointers"><span class="header-section-number">4.9.2.4</span> Printing pointers</h4>
+<p>You can print a pointer value using <code class="backtick">printf</code> with the <code class="backtick">%p</code> format specifier. To do so, you should convert the pointer to type <code class="backtick">void&nbsp;*</code> first using a cast (see below for <code class="backtick">void&nbsp;*</code>
+ pointers), although on machines that don't have different
+representations for different pointer types, this may not be necessary.</p>
+<p>Here is a short program that prints out some pointer values:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="dt">int</span> G = <span class="dv">0</span>; <span class="co">/* a global variable, stored in BSS segment */</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">static</span> <span class="dt">int</span> s; <span class="co">/* static local variable, stored in BSS segment */</span>
+ <span class="dt">int</span> a; <span class="co">/* automatic variable, stored on stack */</span>
+ <span class="dt">int</span> *p; <span class="co">/* pointer variable for malloc below */</span>
+
+ <span class="co">/* obtain a block big enough for one int from the heap */</span>
+ p = malloc(<span class="kw">sizeof</span>(<span class="dt">int</span>));
+
+ printf(<span class="st">"&amp;G = %p</span><span class="ch">\n</span><span class="st">"</span>, (<span class="dt">void</span> *) &amp;G);
+ printf(<span class="st">"&amp;s = %p</span><span class="ch">\n</span><span class="st">"</span>, (<span class="dt">void</span> *) &amp;s);
+ printf(<span class="st">"&amp;a = %p</span><span class="ch">\n</span><span class="st">"</span>, (<span class="dt">void</span> *) &amp;a);
+ printf(<span class="st">"&amp;p = %p</span><span class="ch">\n</span><span class="st">"</span>, (<span class="dt">void</span> *) &amp;p);
+ printf(<span class="st">"p = %p</span><span class="ch">\n</span><span class="st">"</span>, (<span class="dt">void</span> *) p);
+ printf(<span class="st">"main = %p</span><span class="ch">\n</span><span class="st">"</span>, (<span class="dt">void</span> *) main);
+
+ free(p);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/lookingAtPointers.c" class="uri">examples/pointers/lookingAtPointers.c</a>
+</div>
+<p>When I run this on a Mac OS X 10.6 machine after compiling with <code class="backtick">gcc</code>, the output is:</p>
+<pre><code>&amp;G = 0x100001078
+&amp;s = 0x10000107c
+&amp;a = 0x7fff5fbff2bc
+&amp;p = 0x7fff5fbff2b0
+p = 0x100100080
+main = 0x100000e18</code></pre>
+<p>The interesting thing here is that we can see how the compiler
+chooses to allocate space for variables based on their storage classes.
+The global variable <code class="backtick">G</code> and the static local variable <code class="backtick">s</code> both persist between function calls, so they get placed in the BSS segment (see <a href="http://en.wikipedia.org/wiki/.bss" title="WikiPedia">.bss</a>) that starts somewhere around <code class="backtick">0x100000000</code>, typically after the code segment containing the actual code of the program. Local variables <code class="backtick">a</code> and <code class="backtick">p</code> are allocated on the stack, which grows down from somewhere near the top of the address space. The block returned from <code class="backtick">malloc</code> that <code class="backtick">p</code>
+ points to is allocated off the heap, a region of memory that may also
+grow over time and starts after the BSS segment. Finally, <code class="backtick">main</code> appears at 0x100000e18; this is in the code segment, which is a bit lower in memory than all the global variables.</p>
+<h3 id="The_null_pointer"><span class="header-section-number">4.9.3</span> The null pointer</h3>
+<p>The special value <code class="backtick">0</code>, known as the <strong>null pointer</strong>, may be assigned to a pointer of any type. It may or may not be represented by the actual address <code class="backtick">0</code>, but it will act like <code class="backtick">0</code> in all contexts (e.g., it has the value false in an <code class="backtick">if</code> or <code class="backtick">while</code>
+ statement). Null pointers are often used to indicate missing data or
+failed functions. Attempting to dereference a null pointer can have
+catastrophic effects, so it's important to be aware of when you might be
+ supplied with one.</p>
+<h3 id="Pointers_and_functions"><span class="header-section-number">4.9.4</span> Pointers and functions</h3>
+<p>A simple application of pointers is to get around C's limit on having
+ only one return value from a function. Because C arguments are copied,
+assigning a value to an argument inside a function has no effect on the
+outside. So the <code class="backtick">doubler</code> function below doesn't do much:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="co">/* doesn't work */</span>
+<span class="dt">void</span>
+doubler(<span class="dt">int</span> x)
+{
+ x *= <span class="dv">2</span>;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> y;
+
+ y = <span class="dv">1</span>;
+
+ doubler(y); <span class="co">/* no effect on y */</span>
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, y); <span class="co">/* prints 1 */</span>
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/badDoubler.c" class="uri">examples/pointers/badDoubler.c</a>
+</div>
+<p>However, if instead of passing the value of <code class="backtick">y</code> into <code class="backtick">doubler</code> we pass a pointer to <code class="backtick">y</code>, then the <code class="backtick">doubler</code> function can reach out of its own stack frame to manipulate <code class="backtick">y</code> itself:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">void</span>
+doubler(<span class="dt">int</span> *x)
+{
+ *x *= <span class="dv">2</span>;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> y;
+
+ y = <span class="dv">1</span>;
+
+ doubler(&amp;y); <span class="co">/* sets y to 2 */</span>
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, y); <span class="co">/* prints 2 */</span>
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/goodDoubler.c" class="uri">examples/pointers/goodDoubler.c</a>
+</div>
+<p>Generally, if you pass the value of a variable into a function (with no <code class="backtick">&amp;</code>),
+ you can be assured that the function can't modify your original
+variable. When you pass a pointer, you should assume that the function
+can and will change the variable's value. If you want to write a
+function that takes a pointer argument but promises not to modify the
+target of the pointer, use <code class="backtick">const</code>, like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printPointerTarget(<span class="dt">const</span> <span class="dt">int</span> *p)
+{
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, *p);
+}</code></pre></div>
+<p>The <code class="backtick">const</code> qualifier tells the compiler
+that the target of the pointer shouldn't be modified. This will cause it
+ to return an error if you try to assign to it anyway:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printPointerTarget(<span class="dt">const</span> <span class="dt">int</span> *p)
+{
+ *p = <span class="dv">5</span>; <span class="co">/* produces compile-time error */</span>
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, *p);
+}</code></pre></div>
+<p>Passing <code class="backtick">const</code> pointers is mostly used
+when passing large structures to functions, where copying a 32-bit
+pointer is cheaper than copying the thing it points to.</p>
+<p>If you really want to modify the target anyway, C lets you "cast away <code class="backtick">const</code>":</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printPointerTarget(<span class="dt">const</span> <span class="dt">int</span> *p)
+{
+ *((<span class="dt">int</span> *) p) = <span class="dv">5</span>; <span class="co">/* no compile-time error */</span>
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, *p);
+}</code></pre></div>
+<p>There is usually no good reason to do this. The one exception might be if the target of the pointer represents an <a href="#abstractDataTypes">abstract data type</a>,
+ and you want to modify its representation during some operation to
+optimize things somehow in a way that will not be visible outside the
+abstraction barrier, making it appear to leave the target constant.</p>
+<p>Note that while it is safe to pass pointers down into functions, it
+is very dangerous to pass pointers up. The reason is that the space used
+ to hold any local variable of the function will be reclaimed when the
+function exits, but the pointer will still point to the same location, <em>even though something else may now be stored there</em>. So this function is very dangerous:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span> *
+dangerous(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> n;
+
+ <span class="kw">return</span> &amp;n; <span class="co">/* NO! */</span>
+}
+
+...
+
+ *dangerous() = <span class="dv">12</span>; <span class="co">/* writes 12 to some unknown location */</span></code></pre></div>
+<p>An exception is when you can guarantee that the location pointed to
+will survive even after the function exits, e.g. when the location is
+dynamically allocated using <code class="backtick">malloc</code> (see below) or when the local variable is declared <code class="backtick">static</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span> *
+returnStatic(<span class="dt">void</span>)
+{
+ <span class="dt">static</span> <span class="dt">int</span> n;
+
+ <span class="kw">return</span> &amp;n;
+}
+
+...
+
+ *returnStatic() = <span class="dv">12</span>; <span class="co">/* writes 12 to the hidden static variable */</span></code></pre></div>
+<p>Usually returning a pointer to a <code class="backtick">static</code>
+ local variable is not good practice, since the point of making a
+variable local is to keep outsiders from getting at it. If you find
+yourself tempted to do this, a better approach is to allocate a new
+block using <code class="backtick">malloc</code> (see below) and return a pointer to that. The downside of the <code class="backtick">malloc</code> method is that the caller has to promise to call <code class="backtick">free</code> on the block later, or you will get a storage leak.</p>
+<h3 id="pointerArithmetic"><span class="header-section-number">4.9.5</span> Pointer arithmetic and arrays</h3>
+<p>Because pointers are just numerical values, one can do arithmetic on them. Specifically, it is permitted to</p>
+<ul>
+<li>Add an integer to a pointer or subtract an integer from a pointer. The effect of <code class="backtick">p+n</code> where <code class="backtick">p</code> is a pointer and <code class="backtick">n</code> is an integer is to compute the address equal to <code class="backtick">p</code> plus <code class="backtick">n</code> times the size of whatever <code class="backtick">p</code> points to (this is why <code class="backtick">int&nbsp;*</code> pointers and <code class="backtick">char&nbsp;*</code> pointers aren't the same).</li>
+<li>Subtract one pointer from another. The two pointers must have the same type (e.g. both <code class="backtick">int&nbsp;*</code> or both <code class="backtick">char&nbsp;*</code>). The result is a signed integer value of type <a href="#sizeTypes"><code>ptrdiff_t</code></a>, equal to the numerical difference between the addresses divided by the size of the objects pointed to.</li>
+<li>Compare two pointers using <code class="backtick">==</code>, <code class="backtick">!=</code>, <code class="backtick">&lt;</code>, <code class="backtick">&gt;</code>, <code class="backtick">&lt;=</code>, or <code class="backtick">&gt;=</code>.</li>
+<li>Increment or decrement a pointer using <code class="backtick">++</code> or <code class="backtick">--</code>.</li>
+</ul>
+<h4 id="arrays"><span class="header-section-number">4.9.5.1</span> Arrays</h4>
+<p>The main application of pointer arithmetic in C is in <strong>arrays</strong>.
+ An array is a block of memory that holds one or more objects of a given
+ type. It is declared by giving the type of object the array holds
+followed by the array name and the size in square brackets:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> a[<span class="dv">50</span>]; <span class="co">/* array of 50 ints */</span>
+ <span class="dt">char</span> *cp[<span class="dv">100</span>]; <span class="co">/* array of 100 pointers to char */</span></code></pre></div>
+<p>Declaring an array allocates enough space to hold the specified number of objects (e.g. 200 bytes for <code class="backtick">a</code> above and 400 for <code class="backtick">cp</code>—note that a <code class="backtick">char&nbsp;*</code> is an address, so it is much bigger than a <code class="backtick">char</code>). The number inside the square brackets must be a constant whose value can be determined at compile time.</p>
+<p>The array name acts like a constant pointer to the zeroth element of
+the array. It is thus possible to set or read the zeroth element using <code class="backtick">*a</code>. But because the array name is constant, you can't assign to it:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dv">1</span> *a = <span class="dv">12</span>; <span class="co">/* sets zeroth element to 12 */</span>
+ <span class="dv">2</span>
+ <span class="dv">3</span> a = &amp;n; <span class="co">/* #### DOESN'T WORK #### */</span></code></pre></div>
+<p>More common is to use square brackets to refer to a particular element of the array. The expression <code class="backtick">a[n]</code> is defined to be equivalent to <code class="backtick">*(a+n)</code>; the <strong>index</strong> <code class="backtick">n</code> (an integer) is added to the base of the array (a pointer), to get to the location of the <code class="backtick">n</code>-th element of <code class="backtick">a</code>. The implicit <code class="backtick">*</code>
+ then dereferences this location so that you can read its value (in a
+normal expression) or assign to it (on the left-hand side of an
+assignment operator). The effect is to allow you to use <code class="backtick">a[n]</code> just as you would any other variable of type <code class="backtick">int</code> (or whatever type <code class="backtick">a</code> was declared as).</p>
+<p>Note that C doesn't do any sort of bounds checking. Given the declaration <code class="backtick">int&nbsp;a[50];</code>, only indices from <code class="backtick">a[0]</code> to <code class="backtick">a[49]</code> can be used safely. However, the compiler will not blink at <code class="backtick">a[-12]</code> or <code class="backtick">a[10000]</code>.
+ If you read from such a location you will get garbage data; if you
+write to it, you will overwrite god-knows-what, possibly trashing some
+other variable somewhere else in your program or some critical part of
+the stack (like the location to jump to when you return from a
+function). It is up to you as a programmer to avoid such <strong>buffer overruns</strong>, which can lead to very mysterious (and in the case of code that gets input from a network, security-damaging) bugs. The <a href="#valgrind">valgrind</a> program can help detect such overruns in some cases.</p>
+<p>Another curious feature of the definition of <code class="backtick">a[n]</code> as identical to <code class="backtick">*(a+n)</code> is that it doesn't actually matter which of the array name or the index goes inside the braces. So all of <code class="backtick">a[0]</code>, <code class="backtick">*a</code>, and <code class="backtick">0[a]</code> refer to the zeroth entry in <code class="backtick">a</code>. Unless you are deliberately trying to obfuscate your code, it's best to write what you mean.</p>
+<h4 id="arraysAndFunctions"><span class="header-section-number">4.9.5.2</span> Arrays and functions</h4>
+<p>Because array names act like pointers, they can be passed into
+functions that expect pointers as their arguments. For example, here is a
+ function that computes the sum of all the values in an array <code class="backtick">a</code> of size <code class="backtick">n</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* compute the sum of the first n elements of array a */</span>
+<span class="dt">int</span>
+sumArray(<span class="dt">int</span> n, <span class="dt">const</span> <span class="dt">int</span> *a)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> sum;
+
+ sum = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ sum += a[i];
+ }
+
+ <span class="kw">return</span> sum;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/sumArray.c" class="uri">examples/pointers/sumArray.c</a>
+</div>
+<p>Note the use of <code class="backtick">const</code> to promise that <code class="backtick">sumArray</code> won't modify the contents of <code class="backtick">a</code>.</p>
+<p>Another way to write the function header is to declare <code class="backtick">a</code> as an array of unknown size:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the sum of the values in a, an array of size n */</span>
+<span class="dt">int</span>
+sumArray(<span class="dt">int</span> n, <span class="dt">const</span> <span class="dt">int</span> a[])
+{
+ ...
+}</code></pre></div>
+<p>This has <em>exactly</em> the same meaning to the compiler as the previous definition. Even though normally the declarations <code class="backtick">int&nbsp;a[10]</code> and <code class="backtick">int&nbsp;*a</code> mean very different things (the first one allocates space to hold 10 <code class="backtick">int</code>s, and prevents assigning a new value to <code class="backtick">a</code>), in a function argument <code class="backtick">int&nbsp;a[]</code> is just <a href="http://en.wikipedia.org/wiki/Syntactic_sugar">syntactic sugar</a> for <code class="backtick">int&nbsp;*a</code>. You can even modify what <code class="backtick">a</code> points to inside <code class="backtick">sumArray</code> by assigning to it. This will allow you to do things that you usually don't want to do, like write this hideous routine:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the sum of the first n values in a */</span>
+<span class="dt">int</span>
+sumArray(<span class="dt">int</span> n, <span class="dt">const</span> <span class="dt">int</span> a[])
+{
+ <span class="dt">const</span> <span class="dt">int</span> *an; <span class="co">/* pointer to first element not in a */</span>
+ <span class="dt">int</span> sum;
+
+ sum = <span class="dv">0</span>;
+ an = a+n;
+
+ <span class="kw">while</span>(a &lt; an) {
+ sum += *a++;
+ }
+
+ <span class="kw">return</span> sum;
+}</code></pre></div>
+<h4 id="multidimensionalArrays"><span class="header-section-number">4.9.5.3</span> Multidimensional arrays</h4>
+<p>Arrays can themselves be members of arrays. The result is a multidimensional array, where a value in row <code class="backtick">i</code> and column <code class="backtick">j</code> is accessed by <code class="backtick">a[i][j]</code>.</p>
+<p>Declaration is similar to one-dimensional arrays:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> a[<span class="dv">6</span>][<span class="dv">4</span>]; <span class="co">/* declares an array of 6 rows of 4 ints each */</span></code></pre></div>
+<p>This declaration produces an array of 24 <code class="backtick">int</code> values, packed contiguously in memory. The interpretation is that <code class="backtick">a</code> is an array of 6 objects, each of which is an array of 4 <code class="backtick">int</code>s.</p>
+<p>If we imagine the array to contain increasing values like this:</p>
+<pre><code> 0 1 2 3 4 5
+ 6 7 8 9 10 11
+12 13 14 15 16 17</code></pre>
+<p>the actual positions in memory will look like this:</p>
+<pre><code> 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
+ ^ ^ ^
+a[0] a[1] a[2]</code></pre>
+<p>To look up a value, we do the usual array-indexing magic. Suppose we want to find <code class="backtick">a[1][4]</code>. The name <code class="backtick">a</code> acts as a pointer to the base of the array.The name <code class="backtick">a[1]</code> says to skip ahead 1 times the size of the things pointed to by <code class="backtick">a</code>, which are arrays of 6 <code class="backtick">int</code>s each, for a total size of 24 bytes assuming 4-byte <code class="backtick">int</code>s. For <code class="backtick">a[1][4]</code>, we start at <code class="backtick">a[1]</code> and move forward 4 times the size of the thing pointed to by <code class="backtick">a[1]</code>, which is an <code class="backtick">int</code>; this puts us 24+16 bytes from <code class="backtick">a</code>, the position of 10 in the picture above.</p>
+<p>Like other array declarations, the size must be specified at compile
+time in pre-C99 C. If this is not desirable, a similar effect can be
+obtained by allocating each row separately using <code class="backtick">malloc</code> and building a master list of pointers to rows, of type <code class="backtick">int&nbsp;**</code>.
+ The downside of this approach is that the array is no longer contiguous
+ (which may affect cache performance) and it requires reading a pointer
+to find the location of a particular value, instead of just doing
+address arithmetic starting from the base address of the array. But
+elements can still be accessed using the <code class="backtick">a[i][j]</code> syntax. An example of this approach is given below:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Demo program for malloc'd two-dimensional arrays */</span>
+
+<span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="co">/* frees a 2d array created by malloc2d */</span>
+<span class="dt">void</span>
+free2d(<span class="dt">void</span> **a)
+{
+ <span class="dt">void</span> **row;
+
+ <span class="co">/* first free rows */</span>
+ <span class="kw">for</span>(row = a; *row != <span class="dv">0</span>; row++) {
+ free(*row);
+ }
+
+ <span class="co">/* then free array of rows */</span>
+ free(a);
+}
+
+<span class="co">/* returns a two-dimensional array with numRows rows and </span>
+<span class="co"> * rowSize bytes per row, or 0 on allocation failure.</span>
+<span class="co"> * The caller is responsible for freeing the result with free2d. */</span>
+<span class="dt">void</span> **
+malloc2d(size_t numRows, size_t rowSize)
+{
+ <span class="dt">void</span> **a;
+ size_t i;
+
+ <span class="co">/* a is an array of void * pointers that point to the rows */</span>
+ <span class="co">/* The last element is 0, so free2d can detect the last row */</span>
+ a = malloc(<span class="kw">sizeof</span>(<span class="dt">void</span> *) * (numRows + <span class="dv">1</span>)); <span class="co">/* one extra for sentinel */</span>
+ <span class="kw">if</span>(a == <span class="dv">0</span>) {
+ <span class="co">/* malloc failed */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+
+ <span class="co">/* now allocate the actual rows */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; numRows; i++) {
+ a[i] = malloc(rowSize);
+ <span class="kw">if</span>(a[i] == <span class="dv">0</span>) {
+ <span class="co">/* note that 0 in a[i] will stop freed2d after it frees previous rows */</span>
+ free2d(a);
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ }
+
+ <span class="co">/* initialize the sentinel value */</span>
+ a[numRows] = <span class="dv">0</span>;
+
+ <span class="kw">return</span> a;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> rows;
+ <span class="dt">int</span> cols;
+ <span class="dt">int</span> **a;
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> j;
+
+ <span class="kw">if</span>(argc != <span class="dv">3</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s rows cols</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+ <span class="co">/* else */</span>
+
+ rows = atoi(argv[<span class="dv">1</span>]);
+ cols = atoi(argv[<span class="dv">2</span>]);
+
+ <span class="co">/* note that void ** is not converted automatically,</span>
+<span class="co"> * so we need an explicit cast */</span>
+ a = (<span class="dt">int</span> **) malloc2d(rows, cols * <span class="kw">sizeof</span>(<span class="dt">int</span>));
+ <span class="kw">if</span>(a == <span class="dv">0</span>) {
+ fprintf(stderr, <span class="st">"malloc2d failed, exiting</span><span class="ch">\n</span><span class="st">"</span>);
+ <span class="kw">return</span> <span class="dv">2</span>;
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; rows; i++) {
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; cols; j++) {
+ a[i][j] = i - j;
+ }
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; rows; i++) {
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; cols; j++) {
+ printf(<span class="st">"%4d"</span>, a[i][j]);
+ }
+ putchar(<span class="ch">'\n'</span>);
+ }
+
+ free2d((<span class="dt">void</span> **) a); <span class="co">/* always clean up */</span>
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/malloc2d.c" class="uri">examples/pointers/malloc2d.c</a>
+</div>
+<h4 id="variableLengthArrays"><span class="header-section-number">4.9.5.4</span> Variable-length arrays</h4>
+<p>C99 adds the feature of <strong>variable-length arrays</strong>, where the size of the array is determined at run-time. These can only appear as local variables in procedures (<em>automatic variables</em>)
+ or in argument lists. In the case of variable-length arrays in argument
+ lists, it is also necessary that the length of the array be computable
+from previous arguments.</p>
+<p>For example, we could make the length of the array explicit in our <code class="backtick">sumArray</code> function:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the sum of the values in a, an array of size n */</span>
+<span class="dt">int</span>
+sumArray(<span class="dt">int</span> n, <span class="dt">const</span> <span class="dt">int</span> a[n])
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> sum;
+
+ sum = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ sum += a[i];
+ }
+
+ <span class="kw">return</span> sum;
+}</code></pre></div>
+<p>This doesn't accomplish much, because the length of the array is not
+used. However, it does become useful if we have a two-dimensional array,
+ as otherwise there is no way to compute the length of each row:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+sumMatrix(<span class="dt">int</span> rows, <span class="dt">int</span> cols, <span class="dt">const</span> <span class="dt">int</span> m[rows][cols])
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> j;
+ <span class="dt">int</span> sum;
+
+ sum = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; rows; i++) {
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; cols; j++) {
+ sum += a[i][j];
+ }
+ }
+
+ <span class="kw">return</span> sum;
+}</code></pre></div>
+<p>Here the fact that each row of <code class="backtick">m</code> is known to be an array of <code class="backtick">cols</code> many <code class="backtick">int</code>s makes the implicit pointer computation in <code class="backtick">a[i][j]</code> actually work. It is considerably more difficult to to this in ANSI C; the simplest approach is to pack <code class="backtick">m</code> into a one-dimensional array and do the address computation explicitly:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+sumMatrix(<span class="dt">int</span> rows, <span class="dt">int</span> cols, <span class="dt">const</span> <span class="dt">int</span> a[])
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> j;
+ <span class="dt">int</span> sum;
+
+ sum = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; rows; i++) {
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; cols; j++) {
+ sum += a[i*cols + j];
+ }
+ }
+
+ <span class="kw">return</span> sum;
+}</code></pre></div>
+<p>Variable-length arrays can sometimes be used for run-time storage allocation, as an alternative to <code class="backtick">malloc</code> and <code class="backtick">free</code>
+ (see below). A variable-length array allocated as a local variable will
+ be deallocated when the containing scope (usually a function body, but
+maybe just a compound statement marked off by braces) exits. One
+consequence of this is that you can't return a variable-length array
+from a function.</p>
+<p>Here is an example of code using this feature:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* reverse an array in place */</span>
+<span class="dt">void</span>
+reverseArray(<span class="dt">int</span> n, <span class="dt">int</span> a[n])
+{
+ <span class="co">/* algorithm: copy to a new array in reverse order */</span>
+ <span class="co">/* then copy back */</span>
+
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> copy[n];
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ <span class="co">/* the -1 is needed to that a[0] goes to a[n-1] etc. */</span>
+ copy[n-i<span class="dv">-1</span>] = a[i];
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ a[i] = copy[i];
+ }
+}</code></pre></div>
+<p>While using variable-length arrays for this purpose can simplify code in some cases, as a general programming practice it is <strong>extremely dangerous</strong>. The reason is that, unlike allocations through <code class="backtick">malloc</code>,
+ variable-length array allocations are typically allocated on the stack
+(which is often more constrainted than the heap) and have no way of
+reporting failure. So if there isn't enough room for your
+variable-length array, odds are you won't find out until a segmentation
+fault occurs somewhere later in your code when you try to use it.</p>
+<p>(As an additional annoyance, <code class="backtick">gdb</code> is confused by two-dimensional variable-length arrays.)</p>
+<p>Here's a safer version of the above routine, using <code class="backtick">malloc</code> and <code class="backtick">free</code>.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* reverse an array in place */</span>
+<span class="dt">void</span>
+reverseArray(<span class="dt">int</span> n, <span class="dt">int</span> a[n])
+{
+ <span class="co">/* algorithm: copy to a new array in reverse order */</span>
+ <span class="co">/* then copy back */</span>
+
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> *copy;
+
+ copy = (<span class="dt">int</span> *) malloc(n * <span class="kw">sizeof</span>(<span class="dt">int</span>));
+ assert(copy); <span class="co">/* or some other error check */</span>
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ <span class="co">/* the -1 is needed to that a[0] goes to a[n-1] etc. */</span>
+ copy[n-i<span class="dv">-1</span>] = a[i];
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ a[i] = copy[i];
+ }
+
+ free(copy);
+}</code></pre></div>
+<h3 id="Void_pointers"><span class="header-section-number">4.9.6</span> Void pointers</h3>
+<p>A special pointer type is <code class="backtick">void&nbsp;*</code>, a "pointer to <code class="backtick">void</code>". Such pointers are declared in the usual way:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">void</span> *nothing; <span class="co">/* pointer to nothing */</span></code></pre></div>
+<p>Unlike ordinary pointers, you can't dereference a <code class="backtick">void&nbsp;*</code> pointer or do arithmetic on it, because the compiler doesn't know what type it points to. However, you are allowed to use a <code class="backtick">void&nbsp;*</code> as a kind of "raw address" pointer value that you can store arbitrary pointers in. It is permitted to assign to a <code class="backtick">void&nbsp;*</code> variable from an expression of any pointer type; conversely, a <code class="backtick">void&nbsp;*</code> pointer value can be assigned to a pointer variable of any type. An example is the return value of <code>malloc</code> or the argument to <code>free</code>, both of which are declared as <code>void *</code>. (Note that K&amp;R suggests using an explicit cast for the return value of <code>malloc</code>.
+ This is now acknowledged by the authors to be an error, which arose
+from the need for a cast prior to the standardization of void * in ANSI
+C. See <a href="http://cm.bell-labs.com/cm/cs/cbook/2ediffs.html" class="uri">http://cm.bell-labs.com/cm/cs/cbook/2ediffs.html</a>.)</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> *block;
+
+ block = malloc(sizoef(<span class="dt">int</span>) * <span class="dv">12</span>); <span class="co">/* void * converted to int * before assignment */</span>
+ free(block); <span class="co">/* int * converted to void * before passing to free */</span></code></pre></div>
+<p>If you need to use a <code class="backtick">void&nbsp;*</code> pointer as a pointer of a particular type in an expression, you can <strong>cast</strong> it to the appropriate type by prefixing it with a type name in parentheses, like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> a[<span class="dv">50</span>]; <span class="co">/* typical array of ints */</span>
+ <span class="dt">void</span> *p; <span class="co">/* dangerous void pointer */</span>
+
+ a[<span class="dv">12</span>] = <span class="dv">17</span>; <span class="co">/* save that valuable 17 */</span>
+ p = a; <span class="co">/* p now holds base address of a */</span>
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, ((<span class="dt">int</span> *) p)[<span class="dv">12</span>]); <span class="co">/* get 17 back */</span></code></pre></div>
+<p>Usually if you have to start writing casts, it's a sign that you are doing something wrong, and you run the danger of <strong>violating the type system</strong>—say, by tricking the compiler into treating a block of bits that are supposed to be an <code class="backtick">int</code> as four <code class="backtick">char</code>s.
+ But violating the type system like this will be necessary for some
+applications, because even the weak type system in C turns out to be too
+ restrictive for writing certain kinds of "generic" code that work on
+values of arbitrary types.</p>
+<h4 id="alignment"><span class="header-section-number">4.9.6.1</span> Alignment</h4>
+<p>One issue with casting pointers to and from <code>void *</code> is that you may violate the <strong>alignment restrictions</strong> for a particular kind of pointer on some architectures.</p>
+<p>Back in the 8-bit era of the 1970s, a single load or store operation
+would access a single byte of memory, and because some data (<code>char</code>s)
+ are still only one byte wide, C pointers retain the ability to address
+individual bytes. But present-day memory architectures typically have a
+wider data path, and the CPU may load or store as much as 8 bytes (64
+bits) in a single operation. This makes it natural to organize memory
+into 4-byte or 8-byte words even though addresses still refer to
+individual bytes. The effect of the memory architecture is that the
+address of memory words must be <strong>aligned</strong> to a multiple of the word size: so with 4-byte words, the address <code>0x1037ef44</code> (a multiple of 4) could refer to a full word, but <code>0x1037ef45</code> (one more than a multiple of 4) could only be used to refer to a byte within a word.</p>
+<p>What this means for a C program depends on your particular CPU and compiler. If you try to use someting like <code>0x1037ef45</code> as an <code>int *</code>, one of three things might happen:</p>
+<ol style="list-style-type: decimal">
+<li>The CPU might load the 4 bytes starting at this address, using two accesses to memory to piece together the full <code>int</code> out of fragments of words. This is done on Intel architectures, but costs performance.</li>
+<li>The CPU might quietly zero out the last two bits of the address, loading from <code>0x1037ef44</code> even though you asked for <code>0x1037ef45</code>. This happens on some other architectures, notably ARM.</li>
+<li>The CPU might issue a run-time exception.</li>
+</ol>
+<p>All of these outcomes are bad, and the C standard does not specify
+what happens if you try to dereference a pointer value that does not
+satisfy the alignment restrictions of its target type. Fortunately,
+unless you are doing very nasty things with casts, this is unlikely to
+come up, because any pointer value you will see in a typical program is
+likely to arise in one of three ways:</p>
+<ol style="list-style-type: decimal">
+<li>By taking the address of some variable. This pointer will be
+appropriately aligned, because the compiler allocates space for each
+variable (including fields within <code>struct</code>s) with appropriate alignment.</li>
+<li>By computing an offset address using pointer arithmetic either explicitly (<code>p + n</code>) or implicilty (<code>p[n]</code>). In either case, as long as the base pointer is correctly aligned, the computed pointer will also be correctly aligned.</li>
+<li>By obtaining a pointer to an allocated block of memory using <code>malloc</code> or a similar function. Here <code>malloc</code>
+ is designed to always return blocks with the maximum possible required
+alignment, just to avoid problems when you use the results elsewhere.</li>
+</ol>
+<p>On many compilers, you can use <code>__alignof(</code><em>type</em><code>)</code> to get the alignment restriction for a particular type. This was formalized in C11 (with a different name!).</p>
+<p>The other place where alignment can create issues is that if you make a <a href="#structs"><code>struct</code></a>
+ with components with different alignment restrictions, you may end up
+with some empty space. For example, on a machine that enforces 4-byte
+alignment for <code>int</code>s, building a <code>struct</code> that contains a <code>char</code> and an <code>int</code> will give you something bigger than you might expect:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="kw">struct</span> ci {
+ <span class="dt">char</span> c; <span class="co">/* offset 0 */</span>
+ <span class="co">/* 3 unused bytes go here */</span>
+ <span class="dt">int</span> i; <span class="co">/* offset 4 */</span>
+};
+
+<span class="kw">struct</span> ic {
+ <span class="dt">int</span> i; <span class="co">/* offset 0 */</span>
+ <span class="dt">char</span> c; <span class="co">/* offset 4 */</span>
+ <span class="co">/* 3 unused bytes go here */</span>
+};
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ printf(<span class="st">"sizeof(struct ci) == %lu</span><span class="ch">\n</span><span class="st">"</span>, <span class="kw">sizeof</span>(<span class="kw">struct</span> ci));
+ printf(<span class="st">"sizeof(struct ic) == %lu</span><span class="ch">\n</span><span class="st">"</span>, <span class="kw">sizeof</span>(<span class="kw">struct</span> ic));
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/alignment/structPacking.c" class="uri">examples/alignment/structPacking.c</a>
+</div>
+<pre><code>$ c99 -Wall -o structPacking structPacking.c
+$ ./structPacking
+sizeof(struct ci) == 8
+sizeof(struct ic) == 8</code></pre>
+<p>In both cases, the compiler packs in an extra 3 bytes to make the
+size of the struct a multiple of the worst alignment of any of its
+components. If it didn't do this, you would have trouble as soon as you
+tried to make an array of these things.</p>
+<h3 id="malloc"><span class="header-section-number">4.9.7</span> Run-time storage allocation using <code>malloc</code></h3>
+<p>C does not generally permit arrays to be declared with variable
+sizes. C also doesn't let local variables outlive the function they are
+declared in. Both features can be awkward if you want to build data
+structures at run time that have unpredictable (perhaps even changing)
+sizes and that are intended to persist longer than the functions that
+create them. To build such structures, the standard C library provides
+the <code class="backtick">malloc</code> routine, which asks the
+operating system for a block of space of a given size (in bytes). With a
+ bit of pushing and shoving, this can be used to obtain a block of space
+ that for all practical purposes acts just like an array.</p>
+<p>To use <code class="backtick">malloc</code>, you must include <code class="backtick">stdlib.h</code> at the top of your program. The declaration for <code class="backtick">malloc</code> is</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span> *malloc(size_t);</code></pre></div>
+<p>where <code class="backtick">size_t</code> is an integer type (often <code class="backtick">unsigned&nbsp;long</code>). Calling <code class="backtick">malloc</code> with an argument of <span class="math inline"><em>n</em></span> allocates and returns a pointer to the start of a block of <span class="math inline"><em>n</em></span> bytes if possible. If the system can't give you the space you asked for (maybe you asked for more space than it has), <code class="backtick">malloc</code> returns a null pointer. It is good practice to test the return value of <code class="backtick">malloc</code> whenever you call it.</p>
+<p>Because the return type of <code class="backtick">malloc</code> is <code class="backtick">void&nbsp;*</code>,
+ its return value can be assigned to any variable with a pointer type.
+Computing the size of the block you need is your responsibility---and
+you will be punished for any mistakes with difficult-to-diagnose buffer
+overrun errors---but this task is made slightly easier by the built-in <code class="backtick">sizeof</code> operator that allows you to compute the size in bytes of any particular data type. A typical call to <code class="backtick">malloc</code> might thus look something like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="co">/* allocate and return a new integer array with n elements */</span>
+<span class="co">/* calls abort() if there isn't enough space */</span>
+<span class="dt">int</span> *
+makeIntArray(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> *a;
+
+ a = malloc(<span class="kw">sizeof</span>(<span class="dt">int</span>) * n);
+
+ <span class="kw">if</span>(a == <span class="dv">0</span>) abort(); <span class="co">/* die on failure */</span>
+
+ <span class="kw">return</span> a;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/makeIntArray.c" class="uri">examples/pointers/makeIntArray.c</a>
+</div>
+<p>When you are done with a <code class="backtick">malloc</code>'d region, you should return the space to the system using the <code class="backtick">free</code> routine, also defined in <code class="backtick">stdlib.h</code>. If you don't do this, your program will quickly run out of space. The <code class="backtick">free</code> routine takes a <code class="backtick">void&nbsp;*</code> as its argument and returns nothing. It is good practice to write a matching <strong>destructor</strong> that de-allocates an object for each <strong>constructor</strong> (like <code class="backtick">makeIntArray</code>) that makes one.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+destroyIntArray(<span class="dt">int</span> *a)
+{
+ free(a);
+}</code></pre></div>
+<p>It is a serious error to do anything at all with a block after it has been <code class="backtick">free</code>d. This is not necessarily because <code>free</code>
+ modifies the contents of the block (although it might), but because
+when you free a block you are granting the storage allocator permission
+to hand the same block out in response to a future call to <code>malloc</code>, and you don't want to step on whatever other part of your program is now trying to use that space.</p>
+<p>It is also possible to grow or shrink a previously allocated block. This is done using the <code class="backtick">realloc</code> function, which is declared as</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span> *realloc(<span class="dt">void</span> *oldBlock, size_t newSize);</code></pre></div>
+<p>The <code class="backtick">realloc</code> function returns a pointer
+to the resized block. It may or may not allocate a new block. If there
+is room, it may leave the old block in place and return its argument.
+But it may allocate a new block and copy the contents of the old block,
+so you should assume that the old pointer has been <code class="backtick">free</code>d.</p>
+<p>Here's a typical use of <code class="backtick">realloc</code> to build an array that grows as large as it needs to be:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* read numbers from stdin until there aren't any more */</span>
+<span class="co">/* returns an array of all numbers read, or null on error */</span>
+<span class="co">/* returns the count of numbers read in *count */</span>
+<span class="dt">int</span> *
+readNumbers(<span class="dt">int</span> *count <span class="co">/* RETVAL */</span>)
+{
+ <span class="dt">int</span> mycount; <span class="co">/* number of numbers read */</span>
+ <span class="dt">int</span> size; <span class="co">/* size of block allocated so far */</span>
+ <span class="dt">int</span> *a; <span class="co">/* block */</span>
+ <span class="dt">int</span> n; <span class="co">/* number read */</span>
+
+ mycount = <span class="dv">0</span>;
+ size = <span class="dv">1</span>;
+
+ a = malloc(<span class="kw">sizeof</span>(<span class="dt">int</span>) * size); <span class="co">/* allocating zero bytes is tricky */</span>
+ <span class="kw">if</span>(a == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ <span class="kw">while</span>(scanf(<span class="st">"%d"</span>, &amp;n) == <span class="dv">1</span>) {
+ <span class="co">/* is there room? */</span>
+ <span class="kw">while</span>(mycount &gt;= size) {
+ <span class="co">/* double the size to avoid calling realloc for every number read */</span>
+ size *= <span class="dv">2</span>;
+ a = realloc(a, <span class="kw">sizeof</span>(<span class="dt">int</span>) * size);
+ <span class="kw">if</span>(a == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ }
+
+ <span class="co">/* put the new number in */</span>
+ a[mycount++] = n;
+ }
+
+ <span class="co">/* now trim off any excess space */</span>
+ a = realloc(a, <span class="kw">sizeof</span>(<span class="dt">int</span>) * mycount);
+ <span class="co">/* note: if a == 0 at this point we'll just return it anyway */</span>
+
+ <span class="co">/* save out mycount */</span>
+ *count = mycount;
+
+ <span class="kw">return</span> a;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/readNumbers.c" class="uri">examples/pointers/readNumbers.c</a>
+</div>
+<p>Because errors involving <code class="backtick">malloc</code> and its friends can be very difficult to spot, it is recommended to test any program that uses <code class="backtick">malloc</code> using <a href="#valgrind">valgrind</a>.</p>
+<h3 id="functionPointers"><span class="header-section-number">4.9.8</span> Function pointers</h3>
+<p>A <strong>function pointer</strong>, internally, is just the
+numerical address for the code for a function. When a function name is
+used by itself without parentheses, the value is a pointer to the
+function, just as the name of an array by itself is a pointer to its
+zeroth element. Function pointers can be stored in variables, <code class="backtick">struct</code>s, <code class="backtick">union</code>s,
+ and arrays and passed to and from functions just like any other pointer
+ type. They can also be called: a variable of type function pointer can
+be used in place of a function name.</p>
+<p>Function pointers are not used as much in C as in functional languages, but there are many common uses even in C code.</p>
+<h4 id="Function_pointer_declarations"><span class="header-section-number">4.9.8.1</span> Function pointer declarations</h4>
+<p>A function pointer declaration looks like a function declaration,
+except that the function name is wrapped in parentheses and preceded by
+an asterisk. For example:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* a function taking two int arguments and returning an int */</span>
+<span class="dt">int</span> function(<span class="dt">int</span> x, <span class="dt">int</span> y);
+
+<span class="co">/* a pointer to such a function */</span>
+<span class="dt">int</span> (*pointer)(<span class="dt">int</span> x, <span class="dt">int</span> y);</code></pre></div>
+<p>As with function declarations, the names of the arguments can be omitted.</p>
+<p>Here's a short program that uses function pointers:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Functional "hello world" program */</span>
+
+<span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="co">/* function for emitting text */</span>
+ <span class="dt">int</span> (*say)(<span class="dt">const</span> <span class="dt">char</span> *);
+
+ say = puts;
+
+ say(<span class="st">"hello world"</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<h4 id="Callbacks"><span class="header-section-number">4.9.8.2</span> Callbacks</h4>
+<p>A <strong>callback</strong> is when we pass a function pointer into a
+ function so that that function can call our function when some event
+happens or it needs to compute something.</p>
+<p>A classic example is the comparison argument to <code class="backtick">qsort</code>, from the standard library:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* defined in stdlib.h */</span>
+<span class="dt">void</span>
+qsort(
+ <span class="dt">void</span> *base,
+ size_t n,
+ size_t size,
+ <span class="dt">int</span> (*cmp)(<span class="dt">const</span> <span class="dt">void</span> *key1, <span class="dt">const</span> <span class="dt">void</span> *key2)
+);</code></pre></div>
+<p>This is a generic sorting routine that will sort any array in place.
+It needs to know (a) the base address of the array; (b) how many
+elements there are; (c) how big each element is; and (d) how to compare
+two elements. The only tricky part is supplying the comparison, which
+could involve arbitrarily-complex code. So we supply this code as a
+function with an interface similar to <code class="backtick">strcmp</code>.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">static</span> <span class="dt">int</span>
+compare_ints(<span class="dt">void</span> *key1, <span class="dt">void</span> *key2)
+{
+ <span class="kw">return</span> *((<span class="dt">int</span> *) key1) - *((<span class="dt">int</span> *) key2);
+}
+
+<span class="dt">int</span>
+sort_int_array(<span class="dt">int</span> *a, <span class="dt">int</span> n)
+{
+ qsort(a, n, <span class="kw">sizeof</span>(*a), compare_ints);
+}</code></pre></div>
+<p>Other examples might include things like registering an error handler for a library, instead of just having it call <code class="backtick">abort()</code> or something equally catastrophic, or providing a cleanup function for freeing data passed into a data structure.</p>
+<h4 id="Dispatch_tables"><span class="header-section-number">4.9.8.3</span> Dispatch tables</h4>
+<p>Alternative to gigantic <code class="backtick">if/else&nbsp;if</code> or <code class="backtick">switch</code> statements. The idea is to build an array of function pointers (or, more generally, some sort of <a href="#dictionaries">dictionary data structure</a>), and use the value we might otherwise be feeding to <code>switch</code>
+ as an index into this array. Here is a simple example, which echoes
+most of the characters in its input intact, except for echoing every
+lowercase vowel twice:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;limits.h&gt;</span>
+
+<span class="co">/*</span>
+<span class="co"> * Demonstrate use of dispatch tables.</span>
+<span class="co"> */</span>
+
+<span class="co">/* print a character twice */</span>
+<span class="co">/* like putchar, returns character if successful or EOF on error */</span>
+<span class="dt">int</span>
+putcharTwice(<span class="dt">int</span> c)
+{
+ <span class="kw">if</span>(putchar(c) == EOF || putchar(c) == EOF) {
+ <span class="kw">return</span> EOF;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> c;
+ }
+}
+
+<span class="ot">#define NUM_CHARS (UCHAR_MAX + 1) </span><span class="co">/* UCHAR_MAX is in limits.h */</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="co">/* this declares table as an array of function pointers */</span>
+ <span class="dt">int</span> (*table[UCHAR_MAX<span class="dv">+1</span>])(<span class="dt">int</span>);
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> c;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; UCHAR_MAX; i++) {
+ <span class="co">/* default is to call putchar */</span>
+ table[i] = putchar;
+ }
+
+ <span class="co">/* but lower-case vowels show up twice */</span>
+ table['a'] = putcharTwice;
+ table['e'] = putcharTwice;
+ table['i'] = putcharTwice;
+ table['o'] = putcharTwice;
+ table['u'] = putcharTwice;
+
+ <span class="kw">while</span>((c = getchar()) != EOF) {
+ table[c](c);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/pointers/dispatchTable.c" class="uri">examples/pointers/dispatchTable.c</a>
+</div>
+<p>And here is the program translating Shakespeare into mock-Swedish:</p>
+<pre><code>$ c99 -Wall -pedantic -g3 -o dispatchTable dispatchTable.c
+$ echo Now is the winter of our discontent made glorious summer by this sun of York. | ./dispatchTable
+Noow iis thee wiinteer oof oouur diiscoonteent maadee glooriioouus suummeer by thiis suun oof Yoork.</code></pre>
+<p>In this particular case, we did a lot of work to avoid just writing a <code>switch</code>
+ statement. But being able to build a dispatch table dynamically can be
+very useful sometimes. An example might be a graphical user interface
+where each button has an associated function. If buttons can be added by
+ different parts of the program, using a table mapping buttons to
+functions allows a single dispatch routine to figure out where to route
+button presses.</p>
+<p>(For some applications, we might want to pass additional information
+in to the function to change its behavior. This can be done by replacing
+ the function pointers with <a href="#closures">closures</a>.)</p>
+<h3 id="The_restrict_keyword"><span class="header-section-number">4.9.9</span> The restrict keyword</h3>
+<p>In C99, it is possible to declare that a pointer variable is the only
+ way to reach its target as long as it is in scope. This is not enforced
+ by the compiler; instead, it is a promise from the programmer <em>to</em>
+ the compiler that any data reached through this point will not be
+changed by other parts of the code, which allows the compiler to
+optimize code in ways that are not possible if pointers might point to
+the same place (a phenomenon called <strong>pointer aliasing</strong>). For example, consider the following short function:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">// write 1 + *src to *dst and return *src</span>
+<span class="dt">int</span>
+copyPlusOne(<span class="dt">int</span> * <span class="dt">restrict</span> dst, <span class="dt">int</span> * <span class="dt">restrict</span> src)
+{
+ *dst = *src + <span class="dv">1</span>;
+ <span class="kw">return</span> *src;
+}</code></pre></div>
+<p>For this function, the output of <code class="backtick">c99&nbsp;-O3&nbsp;-S</code> includes one more instruction if the <code class="backtick">restrict</code> qualifiers are removed. The reason is that if <code class="backtick">dst</code> and <code class="backtick">src</code> may point to the same location, <code class="backtick">src</code> needs to be re-read for the <code class="backtick">return</code>
+ statement, in case it changed. But if they are guaranteed to point to
+different locations, the compiler can re-use the previous value it
+already has in one of the CPU registers.</p>
+<p>For most code, this feature is useless, and potentially dangerous if
+someone calls your routine with aliased pointers. However, it may
+sometimes be possible to increase performance of time-critical code by
+adding a <code class="backtick">restrict</code> keyword. The cost is that the code might no longer work if called with aliased pointers.</p>
+<p>Curiously, C assumes that two pointers are never aliases if you have
+two arguments with different pointer types, neither of which is <code>char *</code> or <code>void *</code>.<a href="#fn10" class="footnoteRef" id="fnref10"><sup>10</sup></a> This is known as the <strong>strict aliasing rule</strong> and cannot be overridden from within the program source code: there is no <code>unrestrict</code>
+ keyword. You probably only need to worry about this if you are casting
+pointers to different types and then passing the cast pointers around in
+ the same context as the original pointers.</p>
+<h2 id="strings"><span class="header-section-number">4.10</span> Strings</h2>
+<p>Processing strings of characters is one of the oldest application of
+mechanical computers, arguably predating numerical computation by at
+least fifty years. Assuming you've already solved the problem of how to
+represent characters in memory (e.g. as the C <code class="backtick">char</code> type encoded in <a href="http://en.wikipedia.org/wiki/ASCII" title="WikiPedia">ASCII</a>), there are two standard ways to represent strings:</p>
+<ul>
+<li>As a <strong>delimited string</strong>, where the end of a string is
+ marked by a special character. The advantages of this method are that
+only one extra byte is needed to indicate the length of an arbitrarily
+long string, that strings can be manipulated by simple pointer
+operations, and in some cases that common string operations that involve
+ processing the entire string can be performed very quickly. The
+disadvantage is that the delimiter can't appear inside any string, which
+ limits what kind of data you can store in a string.</li>
+<li>As a <strong>counted string</strong>, where the string data is
+prefixed or supplemented with an explicit count of the number of
+characters in the string. The advantage of this representation is that a
+ string can hold arbitrary data (including delimiter characters) and
+that one can quickly jump to the end of the string without having to
+scan its entire length. The disadvantage is that maintaining a separate
+count typically requires more space than adding a one-byte delimiter
+(unless you limit your string length to 255 characters) and that more
+care needs to be taken to make sure that the count is correct.</li>
+</ul>
+<h3 id="C_strings"><span class="header-section-number">4.10.1</span> C strings</h3>
+<p>Because delimited strings are simpler and take less space, C went for
+ delimited strings. A string is a sequence of characters terminated by a
+ null character <code class="backtick">'\0'</code>. Looking back from almost half a century later, this choice <a href="http://queue.acm.org/detail.cfm?id=2010365">may have been a mistake in the long run</a>, but we are pretty much stuck with it.</p>
+<p>Note that the null character is <em>not</em> the same as a null pointer, although both appear to have the value <code class="backtick">0</code> when used in integer contexts. A string is represented by a variable of type <code class="backtick">char&nbsp;*</code>,
+ which points to the zeroth character of the string. The programmer is
+responsible for allocating and managing space to store strings, except
+for explicit <strong>string constants</strong>, which are stored in a special non-writable string space by the compiler.</p>
+<p>If you want to use counted strings instead, you can build your own using a <a href="#structs"><code>struct</code></a>. Most scripting languages written in C (e.g. <a href="http://en.wikipedia.org/wiki/Perl" title="WikiPedia">Perl</a>, <a href="http://en.wikipedia.org/wiki/Python_programming_language" title="WikiPedia">Python_programming_language</a>, <a href="http://en.wikipedia.org/wiki/PHP" title="WikiPedia">PHP</a>, etc.) use this approach internally. (<a href="http://en.wikipedia.org/wiki/Tcl" title="WikiPedia">Tcl</a> is an exception, which is one of many good reasons not to use Tcl).</p>
+<h3 id="String_constants"><span class="header-section-number">4.10.2</span> String constants</h3>
+<p>A string constant in C is represented by a sequence of characters
+within double quotes. Standard C character escape sequences like <code class="backtick">\n</code> (newline), <code class="backtick">\r</code> (carriage return), <code class="backtick">\a</code> (bell), <code class="backtick">\0x17</code> (character with hexadecimal code <code class="backtick">0x17</code>), <code class="backtick">\\</code> (backslash), and <code class="backtick">\"</code> (double quote) can all be used inside string constants. The value of a string constant has type <code class="backtick">const&nbsp;char&nbsp;*</code>, and can be assigned to variables and passed as function arguments or return values of this type.</p>
+<p>Two string constants separated only by whitespace will be concatenated by the compiler as a single constant: <code class="backtick">"foo"&nbsp;"bar"</code> is the same as <code class="backtick">"foobar"</code>. This feature is not much used in normal code, but shows up sometimes in <a href="#macros">macros</a>.</p>
+<h3 id="String_buffers"><span class="header-section-number">4.10.3</span> String buffers</h3>
+<p>The problem with string constants is that you can't modify them. If
+you want to build strings on the fly, you will need to allocate space
+for them. The traditional approach is to use a <strong>buffer</strong>, an array of <code class="backtick">char</code>s. Here is a particularly painful hello-world program that builds a string by hand:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">char</span> hi[<span class="dv">3</span>];
+
+ hi[<span class="dv">0</span>] = 'h';
+ hi[<span class="dv">1</span>] = 'i';
+ hi[<span class="dv">2</span>] = '\<span class="dv">0</span>';
+
+ puts(hi);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/strings/hi.c" class="uri">examples/strings/hi.c</a>
+</div>
+<p>Note that the buffer needs to have size at least 3 in order to hold
+all three characters. A common error in programming with C strings is to
+ forget to leave space for the null at the end (or to forget to add the
+null, which can have comical results depending on what you are using
+your surprisingly long string for).</p>
+<h4 id="string-buffers-and-the-perils-of-gets"><span class="header-section-number">4.10.3.1</span> String buffers and the perils of <code>gets</code></h4>
+<p>Fixed-size buffers are a common source of errors in older C programs, particularly ones written with the library routine <code>gets</code>. The problem is that if you do something like</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> strcpy(smallBuffer, bigString);</code></pre></div>
+<p>the <code>strcpy</code> function will happily keep copying characters across memory long after it has passed the end of <code>smallBuffer</code>. While you can avoid this to a certain extent when you control where <code>bigString</code>
+ is coming from, the situation becomes particularly fraught if the
+string you are trying to store comes from the input, where it might be
+supplied by anybody, including somebody who is trying to execute a <strong>buffer overrun attack</strong> to seize control of your program.</p>
+<p>If you do need to read a string from the input, you should allocate the receiving buffer using <code>malloc</code> and expand it using <code>realloc</code> as needed. Below is a program that shows how to do this, with some bad alternatives commented out:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#define NAME_LENGTH (2)</span>
+
+<span class="ot">#define INITIAL_LINE_LENGTH (2)</span>
+
+<span class="co">/* return a freshly-malloc'd line with next line of input from stdin */</span>
+<span class="dt">char</span> *
+getLine(<span class="dt">void</span>)
+{
+ <span class="dt">char</span> *line;
+ <span class="dt">int</span> size; <span class="co">/* how much space do I have in line? */</span>
+ <span class="dt">int</span> length; <span class="co">/* how many characters have I used */</span>
+ <span class="dt">int</span> c;
+
+ size = INITIAL_LINE_LENGTH;
+ line = malloc(size);
+ assert(line);
+
+ length = <span class="dv">0</span>;
+
+ <span class="kw">while</span>((c = getchar()) != EOF &amp;&amp; c != <span class="ch">'\n'</span>) {
+ <span class="kw">if</span>(length &gt;= size<span class="dv">-1</span>) {
+ <span class="co">/* need more space! */</span>
+ size *= <span class="dv">2</span>;
+
+ <span class="co">/* make length equal to new size */</span>
+ <span class="co">/* copy contents if necessary */</span>
+ line = realloc(line, size);
+ }
+
+ line[length++] = c;
+ }
+
+ line[length] = '\<span class="dv">0</span>';
+
+ <span class="kw">return</span> line;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> x = <span class="dv">12</span>;
+ <span class="co">/* char name[NAME_LENGTH]; */</span>
+ <span class="dt">char</span> *line;
+ <span class="dt">int</span> y = <span class="dv">17</span>;
+
+ puts(<span class="st">"What is your name?"</span>);
+
+ <span class="co">/* gets(name); */</span> <span class="co">/* may overrun buffer */</span>
+ <span class="co">/* scanf("%s\n", name); */</span> <span class="co">/* may overrun buffer */</span>
+ <span class="co">/* fgets(name, NAME_LENGTH, stdin); */</span> <span class="co">/* may truncate input */</span>
+ line = getLine(); <span class="co">/* has none of these problems */</span>
+
+ printf(<span class="st">"Hi %s! Did you know that x == %d and y == %d?</span><span class="ch">\n</span><span class="st">"</span>, line, x, y);
+
+ free(line); <span class="co">/* but we do have to free line when we are done with it */</span>
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/strings/getLine.c" class="uri">examples/strings/getLine.c</a>
+</div>
+<h3 id="Operations_on_strings"><span class="header-section-number">4.10.4</span> Operations on strings</h3>
+<p>Unlike many programming languages, C provides only a rudimentary
+string-processing library. The reason is that many common
+string-processing tasks in C can be done very quickly by hand.</p>
+<p>For example, suppose we want to copy a string from one buffer to another. The library function <code class="backtick">strcpy</code> declared in <code class="backtick">string.h</code>
+ will do this for us (and is usually the right thing to use), but if it
+didn't exist we could write something very close to it using a famous C
+idiom.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+strcpy2(<span class="dt">char</span> *dest, <span class="dt">const</span> <span class="dt">char</span> *src)
+{
+ <span class="co">/* This line copies characters one at a time from *src to *dest. */</span>
+ <span class="co">/* The postincrements increment the pointers (++ binds tighter than *) */</span>
+ <span class="co">/* to get to the next locations on the next iteration through the loop. */</span>
+ <span class="co">/* The loop terminates when *src == '\0' == 0. */</span>
+ <span class="co">/* There is no loop body because there is nothing to do there. */</span>
+ <span class="kw">while</span>(*dest++ = *src++);
+}</code></pre></div>
+<p>The externally visible difference between <code class="backtick">strcpy2</code> and the original <code class="backtick">strcpy</code> is that <code class="backtick">strcpy</code> returns a <code class="backtick">char&nbsp;*</code> equal to its first argument. It is also likely that any implementation of <code class="backtick">strcpy</code> found in a recent C library takes advantage of the width of the memory data path to copy more than one character at a time.</p>
+<p>Most C programmers will recognize the <code class="backtick">while(*dest++&nbsp;=&nbsp;*src++);</code>
+ from having seen it before, although experienced C programmers will
+generally be able to figure out what such highly abbreviated
+constructions mean. Exposure to such constructions is arguably a form of
+ hazing.</p>
+<p>Because C pointers act exactly like array names, you can also write <code class="backtick">strcpy2</code> using explicit array indices. The result is longer but may be more readable if you aren't a C fanatic.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">char</span> *
+strcpy2a(<span class="dt">char</span> *dest, <span class="dt">const</span> <span class="dt">char</span> *src)
+{
+ <span class="dt">int</span> ;
+
+ i = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; src[i] != '\<span class="dv">0</span>'; i++) {
+ dest[i] = src[i];
+ }
+
+ <span class="co">/* note that the final null in src is not copied by the loop */</span>
+ dest[i] = '\<span class="dv">0</span>';
+
+ <span class="kw">return</span> dest;
+}</code></pre></div>
+<p>An advantage of using a separate index in <code class="backtick">strcpy2a</code> is that we don't trash <code class="backtick">dest</code>, so we can return it just like <code class="backtick">strcpy</code> does. (In fairness, <code class="backtick">strcpy2</code> could have saved a copy of the original location of <code class="backtick">dest</code> and done the same thing.)</p>
+<p>Note that nothing in <code class="backtick">strcpy2</code>, <code class="backtick">strcpy2a</code>, or the original <code class="backtick">strcpy</code> will save you if <code class="backtick">dest</code> points to a region of memory that isn't big enough to hold the string at <code class="backtick">src</code>, or if somebody forget to tack a null on the end of <code class="backtick">src</code> (in which case <code class="backtick">strcpy</code>
+ will just keep going until it finds a null character somewhere). As
+elsewhere, it's your job as a programmer to make sure there is enough
+room. Since the compiler has no idea what <code class="backtick">dest</code> points to, this means that you have to remember how much room is available there yourself.</p>
+<p>If you are worried about overrunning <code class="backtick">dest</code>, you could use <code class="backtick">strncpy</code> instead. The <code class="backtick">strncpy</code> function takes a third argument that gives the maximum number of characters to copy; however, if <code class="backtick">src</code> doesn't contain a null character in this range, the resulting string in <code class="backtick">dest</code> won't either. Usually the only practical application to <code class="backtick">strncpy</code> is to extract the first <code class="backtick">k</code> characters of a string, as in</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* copy the substring of src consisting of characters at positions</span>
+<span class="co"> start..end-1 (inclusive) into dest */</span>
+<span class="co">/* If end-1 is past the end of src, copies only as many characters as </span>
+<span class="co"> available. */</span>
+<span class="co">/* If start is past the end of src, the results are unpredictable. */</span>
+<span class="co">/* Returns a pointer to dest */</span>
+<span class="dt">char</span> *
+copySubstring(<span class="dt">char</span> *dest, <span class="dt">const</span> <span class="dt">char</span> *src, <span class="dt">int</span> start, <span class="dt">int</span> end)
+{
+ <span class="co">/* copy the substring */</span>
+ strncpy(dest, src + start, end - start);
+
+ <span class="co">/* add null since strncpy probably didn't */</span>
+ dest[end - start] = '\<span class="dv">0</span>';
+
+ <span class="kw">return</span> dest;
+}</code></pre></div>
+<p>Another quick and dirty way to extract a substring of a string you
+don't care about (and can write to) is to just drop a null character in
+the middle of the sacrificial string. This is generally a bad idea
+unless you are certain you aren't going to need the original string
+again, but it's a surprisingly common practice among C programmers of a
+certain age.</p>
+<p>A similar operation to <code class="backtick">strcpy</code> is <code class="backtick">strcat</code>. The difference is that <code class="backtick">strcat</code> concatenates <code class="backtick">src</code> on to the end of <code class="backtick">dest</code>; so that if <code class="backtick">dest</code> previous pointed to <code class="backtick">"abc"</code> and <code class="backtick">src</code> to <code class="backtick">"def"</code>, <code class="backtick">dest</code> will now point to <code class="backtick">"abcdef"</code>. Like <code class="backtick">strcpy</code>, <code class="backtick">strcat</code> returns its first argument. A no-return-value version of <code class="backtick">strcat</code> is given below.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+strcat2(<span class="dt">char</span> *dest, <span class="dt">const</span> <span class="dt">char</span> *src)
+{
+ <span class="kw">while</span>(*dest) dest++;
+ <span class="kw">while</span>(*dest++ = *src++);
+}</code></pre></div>
+<p>Decoding this abomination is left as an exercise for the reader. There is also a function <code class="backtick">strncat</code> which has the same relationship to <code class="backtick">strcat</code> that <code class="backtick">strncpy</code> has to <code class="backtick">strcpy</code>.</p>
+<p>As with <code class="backtick">strcpy</code>, the actual implementation of <code class="backtick">strcat</code> may be much more subtle, and is likely to be faster than rolling your own.</p>
+<h3 id="Finding_the_length_of_a_string"><span class="header-section-number">4.10.5</span> Finding the length of a string</h3>
+<p>Because the length of a string is of fundamental importance in C
+(e.g., when deciding if you can safely copy it somewhere else), the
+standard C library provides a function <code class="backtick">strlen</code>
+ that counts the number of non-null characters in a string. Note that if
+ you are allocating space for a copy of a string, you will need to add
+one to the value returned by <code>strlen</code> to account for the null.</p>
+<p>Here's a possible implementation:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+strlen(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; *s; i++, s++);
+
+ <span class="kw">return</span> i;
+}</code></pre></div>
+<p>Note the use of the comma operator in the increment step. The comma
+operator applied to two expressions evaluates both of them and discards
+the value of the first; it is usually used only in <code class="backtick">for</code> loops where you want to initialize or advance more than one variable at once.</p>
+<p>Like the other string routines, using <code class="backtick">strlen</code> requires including <code class="backtick">string.h</code>.</p>
+<h4 id="The_strlen_tarpit"><span class="header-section-number">4.10.5.1</span> The strlen tarpit</h4>
+<p>A common mistake is to put a call to <code class="backtick">strlen</code> in the header of a loop; for example:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* like strcpy, but only copies characters at indices 0, 2, 4, ...</span>
+<span class="co"> from src to dest */</span>
+<span class="dt">char</span> *
+copyEvenCharactersBadVersion(<span class="dt">char</span> *dest, <span class="dt">const</span> <span class="dt">char</span> *src)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> j;
+
+ <span class="co">/* BAD: Calls strlen on every pass through the loop */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>, j = <span class="dv">0</span>; i &lt; strlen(src); i += <span class="dv">2</span>, j++) {
+ dest[j] = src[i];
+ }
+
+ dest[j] = '\<span class="dv">0</span>';
+
+ <span class="kw">return</span> dest;
+}</code></pre></div>
+<p>The problem is that <code class="backtick">strlen</code> has to scan all of <code class="backtick">src</code> every time the test is done, which adds time proportional to the length of <code class="backtick">src</code> to each iteration of the loop. So <code class="backtick">copyEvenCharactersBadVersion</code> takes time proportional to the <em>square</em> of the length of <code class="backtick">src</code>.</p>
+<p>Here's a faster version:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* like strcpy, but only copies characters at indices 0, 2, 4, ...</span>
+<span class="co"> from src to dest */</span>
+<span class="dt">char</span> *
+copyEvenCharacters(<span class="dt">char</span> *dest, <span class="dt">const</span> <span class="dt">char</span> *src)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> j;
+ <span class="dt">int</span> len; <span class="co">/* length of src */</span>
+
+ len = strlen(src);
+
+ <span class="co">/* GOOD: uses cached value of strlen(src) */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>, j = <span class="dv">0</span>; i &lt; len; i += <span class="dv">2</span>, j++) {
+ dest[j] = src[i];
+ }
+
+ dest[j] = '\<span class="dv">0</span>';
+
+ <span class="kw">return</span> dest;
+}</code></pre></div>
+<p>Because it doesn't call <code class="backtick">strlen</code> all the time, this version of <code class="backtick">copyEvenCharacters</code> will run much faster than the original even on small strings, and several million times faster if <code class="backtick">src</code> is megabytes long.</p>
+<h3 id="Comparing_strings"><span class="header-section-number">4.10.6</span> Comparing strings</h3>
+<p>If you want to test if strings <code class="backtick">s1</code> and <code class="backtick">s2</code> contain the same characters, writing <code class="backtick">s1&nbsp;==&nbsp;s2</code> won't work, since this tests instead whether <code class="backtick">s1</code> and <code class="backtick">s2</code> point to the same address. Instead, you should use <code class="backtick">strcmp</code>, declared in <code class="backtick">string.h</code>. The <code class="backtick">strcmp</code>
+ function walks along both of its arguments until it either hits a null
+on both and returns 0, or hits two different characters, and returns a
+positive integer if the first string's character is bigger and a
+negative integer if the second string's character is bigger (a typical
+implementation will just subtract the two characters). A straightforward
+ implementation might look like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+strcmp(<span class="dt">const</span> <span class="dt">char</span> *s1, <span class="dt">const</span> <span class="dt">char</span> *s2)
+{
+ <span class="kw">while</span>(*s1 &amp;&amp; *s2 &amp;&amp; *s1 == *s2) {
+ s1++;
+ s2++;
+ }
+
+ <span class="kw">return</span> *s1 - *s2;
+}</code></pre></div>
+<p>To use <code class="backtick">strcmp</code> to test equality, test if the return value is <code class="backtick">0</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(strcmp(s1, s2) == <span class="dv">0</span>) {
+ <span class="co">/* strings are equal */</span>
+ ...
+ }</code></pre></div>
+<p>You may sometimes see this idiom instead:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(!strcmp(s1, s2)) {
+ <span class="co">/* strings are equal */</span>
+ ...
+ }</code></pre></div>
+<p>My own feeling is that the first version is more clear, since <code class="backtick">!strcmp</code> always suggested to me that you were testing for the negation of some property (e.g. not equal). But if you think of <code class="backtick">strcmp</code> as telling you when two strings are different rather than when they are equal, this may not be so confusing.</p>
+<h3 id="Formatted_output_to_strings"><span class="header-section-number">4.10.7</span> Formatted output to strings</h3>
+<p>You can write formatted output to a string buffer with <code class="backtick">sprintf</code> just like you can write it to <code class="backtick">stdout</code> with <code class="backtick">printf</code> or to a file with <code class="backtick">fprintf</code>. Make sure when you do so that there is enough room in the buffer you are writing to, or the usual bad things will happen.</p>
+<h3 id="Dynamic_allocation_of_strings"><span class="header-section-number">4.10.8</span> Dynamic allocation of strings</h3>
+<p>When allocating space for a copy of a string <code class="backtick">s</code> using <code class="backtick">malloc</code>, the required space is <code class="backtick">strlen(s)+1</code>. Don't forget the <code class="backtick">+1</code>, or bad things may happen.<a href="#fn11" class="footnoteRef" id="fnref11"><sup>11</sup></a></p>
+<p>Because allocating space for a copy of a string is such a common operation, many C libraries provide a <code class="backtick">strdup</code> function that does exactly this. If you don't have one (it's not required by the C standard), you can write your own like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return a freshly-malloc'd copy of s */</span>
+<span class="co">/* or 0 if malloc fails */</span>
+<span class="co">/* It is the caller's responsibility to free the returned string when done. */</span>
+<span class="dt">char</span> *
+strdup(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="dt">char</span> *s2;
+
+ s2 = malloc(strlen(s)+<span class="dv">1</span>);
+
+ <span class="kw">if</span>(s2 != <span class="dv">0</span>) {
+ strcpy(s2, s);
+ }
+
+ <span class="kw">return</span> s2;
+}</code></pre></div>
+<p>Exercise: Write a function <code class="backtick">strcatAlloc</code> that returns a freshly-malloc'd string that concatenates its two arguments. Exactly how many bytes do you need to allocate?</p>
+<h3 id="argv"><span class="header-section-number">4.10.9</span> Command-line arguments</h3>
+<p>Now that we know about strings, we can finally do something with <code>argc</code> and <code class="backtick">argv</code>.</p>
+<p>Recall that <code class="backtick">argv</code> in <code class="backtick">main</code> is declared as <code class="backtick">char&nbsp;**</code>; this means that it is a pointer to a pointer to a <code class="backtick">char</code>, or in this case the base address of an array of pointers to <code class="backtick">char</code>,
+ where each such pointer references a string. These strings correspond
+to the command-line arguments to your program, with the program name
+itself appearing in <code class="backtick">argv[0]</code><a href="#fn12" class="footnoteRef" id="fnref12"><sup>12</sup></a></p>
+<p>The count <code class="backtick">argc</code> counts all arguments including <code class="backtick">argv[0]</code>; it is <code class="backtick">1</code> if your program is called with no arguments and larger otherwise.</p>
+<p>Here is a program that prints its arguments. If you get confused about what <code class="backtick">argc</code> and <code class="backtick">argv</code> do, feel free to compile this and play with it:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+
+ printf(<span class="st">"argc = %d</span><span class="ch">\n\n</span><span class="st">"</span>, argc);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; argc; i++) {
+ printf(<span class="st">"argv[%d] = %s</span><span class="ch">\n</span><span class="st">"</span>, i, argv[i]);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/strings/printArgs.c" class="uri">examples/strings/printArgs.c</a>
+</div>
+<p>Like strings, C terminates <code class="backtick">argv</code> with a null: the value of <code class="backtick">argv[argc]</code> is always 0 (a null pointer to <code class="backtick">char</code>). In principle this allows you to recover <code class="backtick">argc</code> if you lose it.</p>
+<h2 id="structuredDataTypes"><span class="header-section-number">4.11</span> Structured data types</h2>
+<p>C has two kinds of structured data types: <code>struct</code>s and <code>union</code>s. A <code>struct</code> holds multiple values in consecutive memory locations, called <strong>fields</strong>, and implements what in type theory is called a <strong>product type</strong>: the set of possible values is the Cartesian product of the sets of possible values for its fields. In contrast, a <code>union</code>
+ has multiple fields but they are all stored in the same location:
+effectively, this means that only one field at a time can hold a value,
+making a <code>union</code> a <strong>sum type</strong> whose set of
+possible values is the union of the sets of possible values for each of
+its fields. Unlike what happens in more sensible programming languages, <code>union</code>s
+ are not tagged: unless you keep track of this somewhere else, you can't
+ tell which field in a union is being used, and you can store a value of
+ one type in a <code>union</code> and try to read it back as a different type, and C won't complain.<a href="#fn13" class="footnoteRef" id="fnref13"><sup>13</sup></a></p>
+<h3 id="structs"><span class="header-section-number">4.11.1</span> Structs</h3>
+<p>A <code class="backtick">struct</code> is a way to define a type that consists of one or more other types pasted together. Here's a typical <code class="backtick">struct</code> definition:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> string {
+ <span class="dt">int</span> length;
+ <span class="dt">char</span> *data;
+};</code></pre></div>
+<p>This defines a new type <code class="backtick">struct&nbsp;string</code> that can be used anywhere you would use a simple type like <code class="backtick">int</code> or <code class="backtick">float</code>. When you declare a variable with type <code class="backtick">struct&nbsp;string</code>, the compiler allocates enough space to hold both an <code class="backtick">int</code> and a <code class="backtick">char&nbsp;*</code> (8 bytes on a typical 32-bit machine). You can get at the individual components using the <code class="backtick">.</code> operator, like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> string {
+ <span class="dt">int</span> length;
+ <span class="dt">char</span> *data;
+};
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">struct</span> string s;
+
+ s.length = <span class="dv">4</span>;
+ s.data = <span class="st">"this string is a lot longer than you think"</span>;
+
+ puts(s.data);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/structs/structExample.c" class="uri">examples/structs/structExample.c</a>
+</div>
+<p>Variables of type <code class="backtick">struct</code> can be
+assigned to, passed into functions, returned from functions, and tested
+for equality, just like any other type. Each such operation is applied
+componentwise; for example, <code class="backtick">s1&nbsp;=&nbsp;s2;</code> is equivalent to <code class="backtick">s1.length&nbsp;=&nbsp;s2.length;&nbsp;s1.data&nbsp;=&nbsp;s2.data;</code> and <code class="backtick">s1&nbsp;==&nbsp;s2</code> is equivalent to <code class="backtick">s1.length&nbsp;==&nbsp;s2.length&nbsp;&amp;&amp;&nbsp;s1.data&nbsp;=&nbsp;s2.data</code>.</p>
+<p>These operations are not used as often as you might think: typically,
+ instead of copying around entire structures, C programs pass around
+pointers, as is done with arrays. Pointers to <code class="backtick">struct</code>s are common enough in C that a special syntax is provided for dereferencing them.<a href="#fn14" class="footnoteRef" id="fnref14"><sup>14</sup></a> Suppose we have:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">struct</span> string s; <span class="co">/* a struct */</span>
+ <span class="kw">struct</span> string *sp; <span class="co">/* a pointer to a struct */</span>
+
+ s.length = <span class="dv">4</span>;
+ s.data = <span class="st">"another overly long string"</span>;
+
+ sp = &amp;s; <span class="co">/* sp now points to s */</span></code></pre></div>
+<p>We can then refer to elements of the <code class="backtick">struct&nbsp;string</code> that <code class="backtick">sp</code> points to (i.e. <code class="backtick">s</code>) in either of two ways:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> puts((*sp).data);
+ puts(sp-&gt;data);</code></pre></div>
+<p>The second is more common, since it involves typing fewer parentheses. It is an error to write <code class="backtick">*sp.data</code> in this case; since <code class="backtick">.</code> binds tighter than <code class="backtick">*</code>, the compiler will attempt to evaluate <code class="backtick">sp.data</code> first and generate an error, since <code class="backtick">sp</code> doesn't have a <code class="backtick">data</code> field.</p>
+<p>Pointers to <code class="backtick">struct</code>s are commonly used in defining <a href="#abstractDataTypes">abstract data data</a>, since it is possible to declare that a function returns e.g. a <code class="backtick">struct&nbsp;string&nbsp;*</code> without specifying the components of a <code class="backtick">struct&nbsp;string</code>. (All pointers to <code class="backtick">struct</code>s
+ in C have the same size and structure, so the compiler doesn't need to
+know the components to pass around the address.) Hiding the components
+discourages code that shouldn't look at them from doing so, and can be
+used, for example, to enforce consistency between fields.</p>
+<p>For example, suppose we wanted to define a <code class="backtick">struct&nbsp;string&nbsp;*</code>
+ type that held counted strings that could only be accessed through a
+restricted interface that prevented (for example) the user from changing
+ the string or its length. We might create a file <code class="backtick">myString.h</code> that contained the declarations:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* make a struct string * that holds a copy of s */</span>
+<span class="co">/* returns 0 if malloc fails */</span>
+<span class="kw">struct</span> string *makeString(<span class="dt">const</span> <span class="dt">char</span> *s);
+
+<span class="co">/* destroy a struct string * */</span>
+<span class="dt">void</span> destroyString(<span class="kw">struct</span> string *);
+
+<span class="co">/* return the length of a struct string * */</span>
+<span class="dt">int</span> stringLength(<span class="kw">struct</span> string *);
+
+<span class="co">/* return the character at position index in the struct string * */</span>
+<span class="co">/* or returns -1 if index is out of bounds */</span>
+<span class="dt">int</span> stringCharAt(<span class="kw">struct</span> string *s, <span class="dt">int</span> index);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/myString/myString.h" class="uri">examples/myString/myString.h</a>
+</div>
+<p>and then the actual implementation in <code class="backtick">myString.c</code> would be the only place where the components of a <code class="backtick">struct&nbsp;string</code> were defined:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+
+<span class="ot">#include "myString.h"</span>
+
+<span class="kw">struct</span> string {
+ <span class="dt">int</span> length;
+ <span class="dt">char</span> *data;
+};
+
+<span class="kw">struct</span> string *
+makeString(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="kw">struct</span> string *s2;
+
+ s2 = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> string));
+ <span class="kw">if</span>(s2 == <span class="dv">0</span>) { <span class="kw">return</span> <span class="dv">0</span>; } <span class="co">/* let caller worry about malloc failures */</span>
+
+ s2-&gt;length = strlen(s);
+
+ s2-&gt;data = malloc(s2-&gt;length);
+ <span class="kw">if</span>(s2-&gt;data == <span class="dv">0</span>) {
+ free(s2);
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+
+ strncpy(s2-&gt;data, s, s2-&gt;length);
+
+ <span class="kw">return</span> s2;
+}
+
+<span class="dt">void</span>
+destroyString(<span class="kw">struct</span> string *s)
+{
+ free(s-&gt;data);
+ free(s);
+}
+
+<span class="dt">int</span>
+stringLength(<span class="kw">struct</span> string *s)
+{
+ <span class="kw">return</span> s-&gt;length;
+}
+
+<span class="dt">int</span>
+stringCharAt(<span class="kw">struct</span> string *s, <span class="dt">int</span> index)
+{
+ <span class="kw">if</span>(index &lt; <span class="dv">0</span> || index &gt;= s-&gt;length) {
+ <span class="kw">return</span> -<span class="dv">1</span>;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> s-&gt;data[index];
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/myString/myString.c" class="uri">examples/myString/myString.c</a>
+</div>
+<p>In practice, we would probably go even further and replace all the <code class="backtick">struct&nbsp;string&nbsp;*</code> types with a new name declared with <code class="backtick">typedef</code>.</p>
+<h4 id="operations-on-structs"><span class="header-section-number">4.11.1.1</span> Operations on structs</h4>
+<p>What you can do to structs is pretty limited: you can look up or set
+individual components in a struct, you can pass structs to functions or
+as return values from functions (which makes a copy of the original
+struct), and you can assign the contents of one struct to another using <code>s1 = s2</code> (which is equivalent to copying each component separately).</p>
+<p>One thing that you <em>can't</em> do is test two structs for equality using <code>==</code>;
+ this is because structs may contain extra space holding junk data. If
+you want to test for equality, you will need to do it componenti by
+component.</p>
+<h4 id="structLayout"><span class="header-section-number">4.11.1.2</span> Layout in memory</h4>
+<p>The C99 standard guarantees that the components of a <code>struct</code>
+ are stored in memory in the same order that they are defined in: that
+is, later components are placed at higher address. This allows sneaky
+tricks like truncating a structure if you don't use all of its
+components. Because of <a href="#alignment">alignment restrictions</a>, the compiler may add padding between components to put each component on its prefered alignment boundary.</p>
+<p>You can find the position of a component within a <code>struct</code> using the <code>offsetof</code> macro, which is defined in <code>stddef.h</code>.
+ This returns the number of bytes from the base of the struct that the
+component starts at, and can be used to do various terrifying
+non-semantic things with pointers.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;stddef.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">struct</span> foo {
+ <span class="dt">int</span> i;
+ <span class="dt">char</span> c;
+ <span class="dt">double</span> d;
+ <span class="dt">float</span> f;
+ <span class="dt">char</span> *s;
+ };
+
+ printf(<span class="st">"i is at %lu</span><span class="ch">\n</span><span class="st">"</span>, offsetof(<span class="kw">struct</span> foo, i));
+ printf(<span class="st">"c is at %lu</span><span class="ch">\n</span><span class="st">"</span>, offsetof(<span class="kw">struct</span> foo, c));
+ printf(<span class="st">"d is at %lu</span><span class="ch">\n</span><span class="st">"</span>, offsetof(<span class="kw">struct</span> foo, d));
+ printf(<span class="st">"f is at %lu</span><span class="ch">\n</span><span class="st">"</span>, offsetof(<span class="kw">struct</span> foo, f));
+ printf(<span class="st">"s is at %lu</span><span class="ch">\n</span><span class="st">"</span>, offsetof(<span class="kw">struct</span> foo, s));
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/structs/offsetof.c" class="uri">examples/structs/offsetof.c</a>
+</div>
+<h4 id="Bit_fields"><span class="header-section-number">4.11.1.3</span> Bit fields</h4>
+<p>It is possible to specify the exact number of bits taken up by a member of a <code>struct</code>
+ of integer type. This is seldom useful, but may in principle let you
+pack more information in less space. Bit fields are sometimes used to
+unpack data from an external source that uses this trick, but this is
+dangerous, because there is no guarantee that the compiler will order
+the bit fields in your <code>struct</code> in any particular order (at the very least, you will need to worry about <a href="http://en.wikipedia.org/wiki/Endianness">endianness</a>.</p>
+<p>Example:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> color {
+ <span class="dt">unsigned</span> <span class="dt">int</span> red : <span class="dv">2</span>;
+ <span class="dt">unsigned</span> <span class="dt">int</span> green : <span class="dv">2</span>;
+ <span class="dt">unsigned</span> <span class="dt">int</span> blue : <span class="dv">2</span>;
+ <span class="dt">unsigned</span> <span class="dt">int</span> alpha : <span class="dv">2</span>;
+};</code></pre></div>
+<p>This defines a <code>struct</code> that (probably) occupies only one byte, and supplies four 2-bit fields, each of which can hold values in the range 0-3.</p>
+<h3 id="unions"><span class="header-section-number">4.11.2</span> Unions</h3>
+<p>A <code class="backtick">union</code> is just like a <code class="backtick">struct</code>,
+ except that instead of allocating space to store all the components,
+the compiler only allocates space to store the largest one, and makes
+all the components refer to the same address. This can be used to save
+space if you know that only one of several components will be meaningful
+ for a particular object. An example might be a type representing an
+object in a LISP-like language like Scheme:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> lispObject {
+ <span class="dt">int</span> type; <span class="co">/* type code */</span>
+ <span class="kw">union</span> {
+ <span class="dt">int</span> intVal;
+ <span class="dt">double</span> floatVal;
+ <span class="dt">char</span> * stringVal;
+ <span class="kw">struct</span> {
+ <span class="kw">struct</span> lispObject *car;
+ <span class="kw">struct</span> lispObject *cdr;
+ } consVal;
+ } u;
+};</code></pre></div>
+<p>Now if you wanted to make a <code class="backtick">struct&nbsp;lispObject</code> that held an integer value, you might write</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> lispObject o;
+
+ o.type = TYPE_INT;
+ o.u.intVal = <span class="dv">27</span>;</code></pre></div>
+<p>Here <code class="backtick">TYPE_INT</code> has presumably been defined somewhere. Note that nothing then prevents you from writing</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> x = <span class="fl">2.7</span> * o.u.floatVal; <span class="co">/* BAD */</span></code></pre></div>
+<p>The effects of this will be strange, since it's likely that the bit pattern representing 27 as an <code class="backtick">int</code> represents something very different as a <code class="backtick">double</code>. Avoiding such mistakes is your responsibility, which is why most uses of <code class="backtick">union</code> occur inside larger <code class="backtick">struct</code>s that contain enough information to figure out which variant of the <code class="backtick">union</code> applies.</p>
+<h3 id="enums"><span class="header-section-number">4.11.3</span> Enums</h3>
+<p>C provides the <code class="backtick">enum</code> construction for the special case where you want to have a sequence of named constants of type <code>int</code>, but you don't care what their actual values are, as in</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">enum</span> color { RED, BLUE, GREEN, MAUVE, TURQUOISE };</code></pre></div>
+<p>This will assign the value <code class="backtick">0</code> to <code class="backtick">RED</code>, <code class="backtick">1</code> to <code class="backtick">BLUE</code>, and so on. These values are effectively of type <code class="backtick">int</code>, although you can declare variables, arguments, and return values as type <code class="backtick">enum&nbsp;color</code> to indicate their intended interpretation.</p>
+<p>Despite declaring a variable <code class="backtick">enum&nbsp;color&nbsp;c</code> (say), the compiler will still allow <code class="backtick">c</code> to hold arbitrary values of type <code class="backtick">int</code>.<br>
+So the following ridiculous code works just fine:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="kw">enum</span> foo { FOO };
+<span class="kw">enum</span> apple { MACINTOSH, CORTLAND, RED_DELICIOUS };
+<span class="kw">enum</span> orange { NAVEL, CLEMENTINE, TANGERINE };
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">enum</span> foo x;
+
+ <span class="kw">if</span>(argc != <span class="dv">1</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ printf(<span class="st">"FOO = %d</span><span class="ch">\n</span><span class="st">"</span>, FOO);
+ printf(<span class="st">"sizeof(enum foo) = %d</span><span class="ch">\n</span><span class="st">"</span>, <span class="kw">sizeof</span>(<span class="kw">enum</span> foo));
+
+ x = <span class="dv">127</span>;
+
+ printf(<span class="st">"x = %d</span><span class="ch">\n</span><span class="st">"</span>, x);
+
+ <span class="co">/* note we can add apples and oranges */</span>
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, RED_DELICIOUS + TANGERINE);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/definitions/enumsAreInts.c" class="uri">examples/definitions/enumsAreInts.c</a>
+</div>
+<h4 id="specifying-particular-values"><span class="header-section-number">4.11.3.1</span> Specifying particular values</h4>
+<p>It is also possible to specify particular values for particular enumerated constants, as in</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">enum</span> color { RED = <span class="dv">37</span>, BLUE = <span class="dv">12</span>, GREEN = <span class="dv">66</span>, MAUVE = <span class="dv">5</span>, TURQUOISE };</code></pre></div>
+<p>Anything that doesn't get a value starts with one plus the previous value; so the above definition would set <code class="backtick">TURQUOISE</code> to <code class="backtick">6</code>. This may result in two names mapping to the same value.</p>
+<h4 id="what-most-people-do"><span class="header-section-number">4.11.3.2</span> What most people do</h4>
+<p>In practice, <code class="backtick">enum</code>s are seldom used, and you will more commonly see a stack of <code class="backtick">#define</code>s:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define RED (0)</span>
+<span class="ot">#define BLUE (1)</span>
+<span class="ot">#define GREEN (2)</span>
+<span class="ot">#define MAUVE (3)</span>
+<span class="ot">#define TURQUOISE (4)</span></code></pre></div>
+<p>The reason for this is partly historical—<code class="backtick">enum</code> arrived late in the evolution of C—but partly practical: a table of <code class="backtick">#define</code>s
+ makes it much easier to figure out which color is represented by 3,
+without having to count through a list. But if you never plan to use the
+ numerical values, <code class="backtick">enum</code> may be a better choice, because it guarantees that all the values will be distinct.</p>
+<h4 id="enumTagsForUnion"><span class="header-section-number">4.11.3.3</span> Using <code>enum</code> with <code>union</code></h4>
+<p>A natural place to use an <code>enum</code> is to tag a <code>union</code> with the type being used. For example, a Lisp-like language might implement the following multi-purpose data type:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">enum</span> TypeCode { TYPE_INT, TYPE_DOUBLE, TYPE_STRING };
+
+<span class="kw">struct</span> LispValue {
+ <span class="kw">enum</span> TypeCode typeCode;
+ <span class="kw">union</span> {
+ <span class="dt">int</span> i;
+ <span class="dt">double</span> d;
+ <span class="dt">char</span> *s;
+ } value;
+};</code></pre></div>
+<p>Here we don't care what the numeric values of <code>TYPE_INT</code>, <code>TYPE_DOUBLE</code>, and <code>TYPE_STRING</code> are, as long as we can apply <code>switch</code> to <code>typeCode</code> to figure out what to do with one of these things.</p>
+<h2 id="typedef"><span class="header-section-number">4.12</span> Type aliases using <code>typedef</code></h2>
+<p>Suppose that you want to represent character strings as</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> string {
+ <span class="dt">int</span> length;
+ <span class="dt">char</span> *data; <span class="co">/* malloc'd block */</span>
+};
+
+<span class="dt">int</span> stringLength(<span class="dt">const</span> <span class="kw">struct</span> string *s);</code></pre></div>
+<p>If you later change the representation to, say, traditional null-terminated <code class="backtick">char&nbsp;*</code> strings or some even more complicated type (<code class="backtick">union&nbsp;string&nbsp;**some_string[2];</code>), you will need to go back and replace ever occurrence of <code class="backtick">struct&nbsp;string&nbsp;*</code>
+ in every program that uses it with the new type. Even if you don't
+expect to change the type, you may still get tired of typing <code class="backtick">struct&nbsp;string&nbsp;*</code> all the time, especially if your fingers slip and give you <code class="backtick">struct&nbsp;string</code> sometimes.</p>
+<p>The solution is to use a <code class="backtick">typedef</code>, which defines a new type name:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="kw">struct</span> string *String;
+
+<span class="dt">int</span> stringLength(String s);</code></pre></div>
+<p>The syntax for <code class="backtick">typedef</code> looks like a variable declaration preceded by <code class="backtick">typedef</code>,
+ except that the variable is replaced by the new type name that acts
+like whatever type the defined variable would have had. You can use a
+name defined with <code class="backtick">typedef</code> anywhere you could use a normal type name, as long as it is later in the source file than the <code class="backtick">typedef</code> definition. Typically <code class="backtick">typedef</code>s are placed in a header file (<code class="backtick">.h</code> file) that is then included anywhere that needs them.</p>
+<p>You are not limited to using <code class="backtick">typedef</code>s
+only for complex types. For example, if you were writing numerical code
+and wanted to declare overtly that a certain quantity was not just any <code class="backtick">double</code> but actually a length in meters, you could write</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="dt">double</span> LengthInMeters;
+<span class="kw">typedef</span> <span class="dt">double</span> AreaInSquareMeters;
+
+AreaInSquareMeters rectangleArea(LengthInMeters height, LengthInMeters width);</code></pre></div>
+<p>Unfortunately, C does not do type enforcement on <code class="backtick">typedef</code>'d types: it is perfectly acceptable to the compiler if you pass a value of type <code class="backtick">AreaInSquareMeters</code> as the first argument to <code class="backtick">rectangleArea</code>, since by the time it checks it has replaced by <code class="backtick">AreaInSquareMeters</code> and <code class="backtick">LengthInMeters</code> by <code class="backtick">double</code>. So this feature is not as useful as it might be, although it does mean that you can write <code class="backtick">rectangleArea(2.0,&nbsp;3.0)</code> without having to do anything to convert <code class="backtick">2.0</code> and <code class="backtick">3.0</code> to type <code class="backtick">LengthInMeters</code>.</p>
+<h3 id="opaqueStructs"><span class="header-section-number">4.12.1</span> Opaque structs</h3>
+<p>There are certain cases where the compiler needs to know the definition of a <code>struct</code>:</p>
+<ol style="list-style-type: decimal">
+<li>When the program accesses its components.</li>
+<li>When the compiler needs to know its size. This may be because you are building an array of these <code>struct</code>s, because they appear in a larger <code>struct</code>, when you are passing the <code>struct</code> as an argument or assigning it to a variable, or just because you applied <code>sizeof</code> to the <code>struct</code>.</li>
+</ol>
+<p>But the compile does <em>not</em> need to know the definition of a <code>struct</code> to know how create a pointer to it. This is because all <code>struct</code> pointers have the same size and structure.</p>
+<p>This allows a trick called an <strong>opaque struct</strong>, which can be used for <strong>information hiding</strong>, where one part of your program is allowed to see the definition of a <code>struct</code> but other parts are not.</p>
+<p>The idea is to create a header file that defines all the functions that might be used to access the <code>struct</code>, but does not define the <code>struct</code> itself. For example, suppose we want to create a counter, where the user can call a function <code>increment</code> that acts like <code>++</code>
+ in the sense that it increments the counter and returns the new value,
+but we don't want to allow the user to change the value of the counter
+in any other way. This header file defines the <strong>interface</strong> to the counter.</p>
+<p>Here is the header file:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Create a new counter, initialized to 0. Call counterDestroy to get rid of it. */</span>
+<span class="kw">struct</span> counter * counterCreate(<span class="dt">void</span>);
+
+<span class="co">/* Free space used by a counter. */</span>
+<span class="dt">void</span> counterDestroy(<span class="kw">struct</span> counter *);
+
+<span class="co">/* Increment a counter and return new value. */</span>
+<span class="dt">int</span> counterIncrement(<span class="kw">struct</span> counter *);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/structs/opaqueStructs/counter.h" class="uri">examples/structs/opaqueStructs/counter.h</a>
+</div>
+<p>We can now write code that uses the <code>struct counter *</code> type without knowing what it is actually pointing to:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "counter.h"</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">struct</span> counter *c;
+ <span class="dt">int</span> value;
+
+ c = counterCreate();
+
+ <span class="kw">while</span>((value = counterIncrement(c)) &lt; <span class="dv">10</span>) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, value);
+ }
+
+ counterDestroy(c);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/structs/opaqueStructs/testCounter.c" class="uri">examples/structs/opaqueStructs/testCounter.c</a>
+</div>
+<p>To make this work, we do have to provide an <strong>implementation</strong>. The obvious way to do it is have a <code>struct counter</code> store the counter value in an <code>int</code>,
+ but one could imagine other (probably bad) implementations that did
+other things, as long as from the outside they acted like we expect.</p>
+<p>We only put the definition of a <code>struct counter</code> in this
+file. This means that only functions in this file can access a counter's
+ components, compute the size of a counter, and so forth. While we can't
+ absolutely prevent some other function from extracting or modifying the
+ contents of a counter (C doesn't provide that kind of memory
+protection), we can at least hint very strongly that the programmer
+shouldn't be doing this.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "counter.h"</span>
+
+<span class="kw">struct</span> counter {
+ <span class="dt">int</span> value;
+};
+
+<span class="kw">struct</span> counter *
+counterCreate(<span class="dt">void</span>)
+{
+ <span class="kw">struct</span> counter *c;
+
+ c = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> counter));
+ assert(c);
+
+ c-&gt;value = <span class="dv">0</span>;
+
+ <span class="kw">return</span> c;
+}
+
+<span class="dt">void</span>
+counterDestroy(<span class="kw">struct</span> counter *c)
+{
+ free(c);
+}
+
+<span class="dt">int</span>
+counterIncrement(<span class="kw">struct</span> counter *c)
+{
+ <span class="kw">return</span> ++(c-&gt;value);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/structs/opaqueStructs/counter.c" class="uri">examples/structs/opaqueStructs/counter.c</a>
+</div>
+<p>We will see this trick used over and over again when we build <a href="#abstractDataTypes">abstract data types</a>.</p>
+<h2 id="macros"><span class="header-section-number">4.13</span> Macros</h2>
+<p>See K&amp;R Appendix A12.3 for full details on macro expansion in ANSI C and <a href="http://gcc.gnu.org/onlinedocs/cpp/Macros.html" class="uri">http://gcc.gnu.org/onlinedocs/cpp/Macros.html</a> for documentation on what <code>gcc</code> supports.</p>
+<p>The short version: the command</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define FOO (12)</span></code></pre></div>
+<p>causes any occurrence of the word <code class="backtick">FOO</code> in your source file to be replaced by <code class="backtick">(12)</code> by the preprocessor. To count as a word, <code class="backtick">FOO</code> can't be adjacent to other alphanumeric characters, so for example <code class="backtick">FOOD</code> will <em>not</em> expand to <code class="backtick">(12)D</code>.</p>
+<h3 id="Macros_with_arguments"><span class="header-section-number">4.13.1</span> Macros with arguments</h3>
+<p>To create a macro with arguments, put them in parentheses separated by commas after the macro name, e.g.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define Square(x) ((x)*(x))</span></code></pre></div>
+<p>Now if you write <code class="backtick">Square(foo)</code> it will expand as <code class="backtick">((foo)*(foo))</code>.
+ Note the heavy use of parentheses inside the macro definition to avoid
+trouble with operator precedence; if instead we had written</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define BadSquare(x) x*x</span></code></pre></div>
+<p>then <code class="backtick">BadSquare(3+4)</code> would give <code class="backtick">3+4*3+4</code>, which evaluates to <code class="backtick">19</code>,
+ which is probably not what we intended. The general rule is that macro
+arguments should always be put in parentheses if you are using them in
+an expression where precedence might be an issue.</p>
+<h4 id="Multiple_arguments"><span class="header-section-number">4.13.1.1</span> Multiple arguments</h4>
+<p>You can have multiple arguments to a macro, e.g.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define Average(x,y) (((x)+(y))/2.0)</span></code></pre></div>
+<p>The usual caveats about using lots of parentheses apply.</p>
+<h4 id="Perils_of_repeating_arguments"><span class="header-section-number">4.13.1.2</span> Perils of repeating arguments</h4>
+<p>Macros can have odd effects if their arguments perform side-effects. For example, <code class="backtick">Square(++x)</code> expands to <code class="backtick">((++x)*(++x))</code>; if <code class="backtick">x</code> starts out equal to <code class="backtick">1</code>, this expression may evaluate to any of <code class="backtick">2</code>, <code class="backtick">6</code>, or <code class="backtick">9</code> depending on when the <code class="backtick">++</code> operators are evaluated, and will definitely leave <code class="backtick">3</code> in <code class="backtick">x</code> instead of the <code class="backtick">2</code>
+ the programmer probably expects. For this reason it is generally best
+to avoid side-effects in macro arguments, and to mark macro names (e.g.
+by capitalization) to clearly distinguish them from function names,
+where this issue doesn't come up.</p>
+<h4 id="Variable-length_argument_lists"><span class="header-section-number">4.13.1.3</span> Variable-length argument lists</h4>
+<p>C99 added <strong>variadic macros</strong> that may have a variable number of arguments; these are mostly useful for dealing with variadic functions (like <code>printf</code>) that also take a variable number of arguments.</p>
+<p>To define a variadic macro, define a macro with arguments where the last argument is three periods: <code>...</code> . The macro <code>__VA_ARGS__</code> then expands to whatever arguments matched this ellipsis in the macro call.</p>
+<p>For example:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="ot">#define Warning(...) fprintf(stderr, __VA_ARGS__)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ Warning(<span class="st">"%s: this program contains no useful code</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+
+ <span class="kw">return</span> <span class="dv">1</span>;
+}</code></pre></div>
+<p>It is possible to mix regular arguments with <code>...</code>, as long as <code>...</code> comes last:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define Useless(format, ...) printf(format, __VA_ARGS__)</span></code></pre></div>
+<h4 id="macros-vs.-inline-functions"><span class="header-section-number">4.13.1.4</span> Macros vs. inline functions</h4>
+<p>It is sometimes tempting to use a macro to avoid having to retype
+some small piece of code that does not seem big enough to justify a
+full-blown function, especially if the cost of the body of the function
+is small relative to the cost of a function call. <strong>Inline functions</strong>
+ are a mechanism that is standard in C99 (and found in some compilers
+for older variants of C) that give you the ability to write a function
+that will never pay this function call overhead; instead, any call to an
+ inline function is effectively replaced by the body of the function.
+Unlike parameterized macros, inline functions do not suffer from issues
+with duplicated parameters or weird text-substitution oddities.</p>
+<p>To take a simple example, the <code>distSquared</code> function that we used to illustrate <a href="#functionDefinitions">function definitions</a> doesn't do very much: just two multiplications and an addition. If we are doing a lot of <code>distSquared</code>
+ computations, we could easily double the cost of each computation with
+function call overhead. One alternative might be to use a macro:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define DistSquared(x,y) ((x)*(x)+(y)*(y))</span></code></pre></div>
+<p>but this suffers from the parameter-duplication problem, which could be particularly unfortunate if we compute <code>DistSquared(expensiveFunctionWithManySideEffects(), 12)</code>. A better alternative is to use an inline function.</p>
+<p>Like macros, inline functions should be defined in header files.
+Ordinary functions always go in C files because (a) we only want to
+compile them once, and (b) the linker will find them in whatever <code>.o</code> file they end up in anyway. But inline functions generally don't get compiled independently, so this doesn't apply.</p>
+<p>Here is a header file for an inline version of <code>distSquared</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Returns the square of the distance between two points separated by </span>
+<span class="co"> dx in the x direction and dy in the y direction. */</span>
+<span class="dt">static</span> <span class="kw">inline</span> <span class="dt">int</span>
+distSquared(<span class="dt">int</span> dx, <span class="dt">int</span> dy)
+{
+ <span class="kw">return</span> dx*dx + dy*dy;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/functions/distSquaredInline.h" class="uri">examples/functions/distSquaredInline.h</a>
+</div>
+<p>This looks exactly like the original <code>distSquared</code>, except that we added <code>static inline</code>. We want this function to be declared <code>static</code>
+ because otherwise some compilers will try to emit a non-inline
+definition for it in ever C file this header is included in, which could
+ have bad results.<a href="#fn15" class="footnoteRef" id="fnref15"><sup>15</sup></a></p>
+<p>The nice thing about this approach is that if we do decide to make <code>distSquared</code>
+ an ordinary function (maybe it will make debugging easier, or we
+realize we want to be able to take its address), then we can just move
+the definition into a <code>.c</code> file and take the <code>static inline</code>
+ off. Indeed, this is probably the safest thing to start with, since we
+can also do the reverse if we find that function call overhead on this
+particular function really does account for a non-trivial part of our
+running time (see <a href="#profiling">profiling</a>).</p>
+<h3 id="Multiple_macros"><span class="header-section-number">4.13.2</span> Macros that include other macros</h3>
+<p>One macro can expand to another; for example, after defining</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define FOO BAR</span>
+<span class="ot">#define BAR (12)</span></code></pre></div>
+<p>it will be the case that <code class="backtick">FOO</code> will expand to <code class="backtick">BAR</code> which will then expand to <code class="backtick">(12)</code>. For obvious reasons, it is a bad idea to have a macro expansion contain the original macro name.</p>
+<h3 id="Macro_tricks"><span class="header-section-number">4.13.3</span> More specialized macros</h3>
+<p>Some standard idioms have evolved over the years to deal with issues
+that come up in defining complex macros. Usually, having a complex macro
+ is a sign of bad design, but these tools can be useful in some
+situations.</p>
+<h4 id="Multiple_expressions_in_a_macro"><span class="header-section-number">4.13.3.1</span> Multiple expressions in a macro</h4>
+<p>Use the comma operator, e.g.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define NoisyInc(x) (puts("incrementing"), (x)++)</span></code></pre></div>
+<p>The comma operator evaluates both of its operands and returns the value of the one on the right-hand side.</p>
+<p>You can also choose between alternatives using the ternary <code class="backtick">?:</code> operator, as in</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define Max(a,b) ((a) &gt; (b) ? (a) : (b))</span></code></pre></div>
+<p>(but see the warning about repeated parameters above).</p>
+<h4 id="nonSyntacticMacros"><span class="header-section-number">4.13.3.2</span> Non-syntactic macros</h4>
+<p>Suppose you get tired of writing</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) ...</code></pre></div>
+<p>all the time. In principle, you can write a macro</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define UpTo(i, n) for((i) = 0; (i) &lt; (n); (i)++)</span></code></pre></div>
+<p>and then write</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> UpTo(i, <span class="dv">10</span>) ...</code></pre></div>
+<p>in place of your former <code class="backtick">for</code> loop headers. This is generally a good way to make your code completely unreadable. Such macros are called <strong>non-syntactic</strong> because they allow code that doesn't look like syntactically correct C.</p>
+<p>Sometimes, however, it makes sense to use non-syntactic macros when
+you want something that writes to a variable without having to pass it
+to a function as a pointer. An example might be something like this <code class="backtick">malloc</code> wrapper:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define TestMalloc(x) ((x) = malloc(sizeof(*x)), assert(x))</span></code></pre></div>
+<p>(Strictly speaking, this is probably more of a "non-semantic" macro.)</p>
+<p>Whether the confusion of having a non-syntactic macro is worth the
+gain in safety or code-writing speed is a judgment call that can only be
+ made after long and painful experience. If in doubt, it's probably best
+ not to do it.</p>
+<h4 id="Multiple_statements_in_one_macro"><span class="header-section-number">4.13.3.3</span> Multiple statements in one macro</h4>
+<p>If you want to write a macro that looks like a function call but contains multiple statements, the correct way to do it is like</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define HiHi() do { puts("hi"); puts("hi"); } while(0)</span></code></pre></div>
+<p>This can safely be used in place of single statements, like this:<a href="#fn16" class="footnoteRef" id="fnref16"><sup>16</sup></a></p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">if</span>(friendly)
+ HiHi();
+ <span class="kw">else</span>
+ snarl();</code></pre></div>
+<p>Note that no construct except <code class="backtick">do..while</code> will work here. Just using braces will cause trouble with the semicolon before the <code class="backtick">else</code>, and no other compound statement besides <code class="backtick">do..while</code> expects to be followed by a semicolon in this way.</p>
+<h4 id="String_expansion"><span class="header-section-number">4.13.3.4</span> String expansion</h4>
+<p>Let's rewrite <code class="backtick">NoisyInc</code> to include the variable name:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define BadNoisyInc2(x) (puts("Incrementing x"), x++)</span></code></pre></div>
+<p>Will this do what we want? No. The C preprocessor is smart enough not to expand macro parameters inside strings, so <code class="backtick">BadNoisyInc2(y)</code> will expand to <code class="backtick">(puts("Incrementing&nbsp;x"),&nbsp;y++)</code>. Instead, we have to write</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define NoisyInc2(x) (puts("Incrementing " #x), x++)</span></code></pre></div>
+<p>Here <code class="backtick">#x</code> expands to whatever the value of <code class="backtick">x</code>
+ is wrapped in double quotes. The resulting string constant is then
+concatenated with the adjacent string constant according to standard C
+string constant concatenation rules.</p>
+<p>To concatenate things that aren't strings, use the <code class="backtick">##</code> operator, as in</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define FakeArray(n) fakeArrayVariableNumber ## n</span></code></pre></div>
+<p>This lets you write <code class="backtick">FakeArray(12)</code> instead of <code class="backtick">fakeArrayVariableNumber12</code>. Note that there is generally no good reason to ever do this.</p>
+<p>Where this feature does become useful is if you want to be able to
+refer to part of the source code of your program. For example, here is
+short program that includes a macro that prints the source code and
+value of an expression:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="ot">#define PrintExpr(x) (printf("%s = %d\n", #x, (x)))</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ PrintExpr(<span class="dv">2+2</span>);
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/macros/printExpr.c" class="uri">examples/macros/printExpr.c</a>
+</div>
+<p>When run, this program prints</p>
+<pre><code>2+2 = 4</code></pre>
+<p>Without using a macro, there is no way to capture the text string <code class="backtick">"2+2"</code> so we can print it.</p>
+<p>This sort of trickery is mostly used in debugging. The <code class="backtick">assert</code> macro is a more sophisticated version, which uses the built-in macros <code class="backtick">__FILE__</code> (which expands to the current source file as a quoted string) and <code class="backtick">__LINE__</code>
+ (which expands to the current source line number, not quoted) to not
+only print out an offending expression, but also the location of it in
+the source.</p>
+<h4 id="Big_macros"><span class="header-section-number">4.13.3.5</span> Big macros</h4>
+<p>Nothing restricts a macro expansion to a single line, although you
+must put a backslash at the end of each line to keep it going. Here is a
+ macro that declares a specialized sorting routine for any type that
+supports <code class="backtick">&lt;</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define DeclareSort(prefix, type) \</span>
+<span class="ot">static int \</span>
+<span class="ot">_DeclareSort_ ## prefix ## _Compare(const void *a, const void *b) \</span>
+<span class="ot">{ \</span>
+<span class="ot"> const type *aa; const type *bb; \</span>
+<span class="ot"> aa = a; bb = b; \</span>
+<span class="ot"> if(*aa &lt; *bb) return -1; \</span>
+<span class="ot"> else if(*bb &lt; *aa) return 1; \</span>
+<span class="ot"> else return 0; \</span>
+<span class="ot">} \</span>
+<span class="ot">\</span>
+<span class="ot">void \</span>
+<span class="ot">prefix ## _sort(type *a, int n)\</span>
+<span class="ot">{ \</span>
+<span class="ot"> qsort(a, n, sizeof(type), _DeclareSort_ ## prefix ## _Compare); \</span>
+<span class="ot">}</span></code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/macros/declareSort.h" class="uri">examples/macros/declareSort.h</a>
+</div>
+<p>A typical use might be</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#include "declareSort.h"</span>
+
+<span class="co">/* note: must appear outside of any function, and has no trailing semicolon */</span>
+DeclareSort(<span class="dt">int</span>, <span class="dt">int</span>)
+
+<span class="ot">#define N (50)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> a[N];
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i=<span class="dv">0</span>; i &lt; N; i++) {
+ a[i] = N-i;
+ }
+
+ int_sort(a, N);
+
+ <span class="kw">for</span>(i=<span class="dv">0</span>; i &lt; N; i++) {
+ printf(<span class="st">"%d "</span>, a[i]);
+ }
+ putchar(<span class="ch">'\n'</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/macros/useDeclareSort.c" class="uri">examples/macros/useDeclareSort.c</a>
+</div>
+<p>Do this too much and you will end up reinventing C++ templates, which
+ are a more or less equivalent mechanism for generating polymorphic code
+ that improve on C macros like the one above by letting you omit the
+backslashes.</p>
+<h3 id="ifdef"><span class="header-section-number">4.13.4</span> Conditional compilation</h3>
+<p>In addition to generating code, macros can be used for <strong>conditional compiliation</strong>, where a section of the source code is included only if a particular macro is defined. This is done using the <code>#ifdef</code> and <code>#ifndef</code> preprocessor directives. In its simplest form, writing <code>#ifdef NAME</code> includes all code up to the next <code>#endif</code> if and only if <code>NAME</code> is defined. Similarly, <code>#ifndef NAME</code> includes all code up to the next <code>#endif</code> if and only if <code>NAME</code> is <em>not</em> defined.</p>
+<p>Like regular C <code>if</code> statements, <code>#ifdef</code> and <code>#ifndef</code> directives can be nested, and can include else cases, which are separated by an <code>#else</code> directive.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+<span class="ot">#ifdef SAY_HI</span>
+ puts(<span class="st">"Hi."</span>);
+<span class="ot">#else </span><span class="co">/* matches #ifdef SAY_HI */</span>
+<span class="ot">#ifndef BE_POLITE</span>
+ puts(<span class="st">"Go away!"</span>);
+<span class="ot">#else </span><span class="co">/* matches #ifndef BE_POLITE */</span>
+ puts(<span class="st">"I'm sorry, I don't feel like talking today."</span>);
+<span class="ot">#endif </span><span class="co">/* matches #ifndef BE_POLITE */</span>
+<span class="ot">#endif </span><span class="co">/* matches #ifdfe SAY_HI */</span>
+
+<span class="ot">#ifdef DEBUG_ARITHMETIC</span>
+ assert(<span class="dv">2+2</span> == <span class="dv">5</span>);
+<span class="ot">#endif</span>
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/macros/ifdef.c" class="uri">examples/macros/ifdef.c</a>
+</div>
+<h3 id="defining-macros-on-the-command-line"><span class="header-section-number">4.13.5</span> Defining macros on the command line</h3>
+<p>You can turn these conditional compilation directives on and off at compile time by passing the <code>-D</code> flag to <code>gcc</code>. Here is the program above, running after compiling with different choices of options:</p>
+<pre><code>$ gcc -DSAY_HI -o ifdef ifdef.c
+$ ./ifdef
+Hi.
+$ gcc -DBE_POLITE -DDEBUG_ARITHMETIC -o ifdef ifdef.c
+$ ./ifdef
+I'm sorry, I don't feel like talking today.
+ifdef: ifdef.c:18: main: Assertion `2+2 == 5' failed.
+Aborted</code></pre>
+<p>An example of how this mechanism can be useful is the <code>NDEBUG</code> macro: if you define this before including <code>assert.h</code>, it turns every <code>assert</code>
+ in your code into a no-op. This can be handy if you are pretty sure
+your code works and you want to speed it up in its final shipped
+version, or if you are pretty sure your code doesn't work but you want
+to hide the evidence. (It also means you should not perform side-effects
+ inside an <code>assert</code> unless you are happy with them not happening.)</p>
+<p>Using the flag <code>-DNAME</code> defines <code>NAME</code> to be <code>1</code>. If you want something else, use <code>-DNAME=VALUE</code>.
+ This can be used to bake useful information into your program at
+compile time, and is often used to specify filenames. Below is a simple
+example.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+<span class="ot">#ifdef MESSAGE</span>
+ puts(MESSAGE);
+<span class="ot">#endif</span>
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/macros/message.c" class="uri">examples/macros/message.c</a>
+</div>
+<pre><code>$ gcc -DMESSAGE='"Hi there!"' -o message message.c
+$ ./message
+Hi there!</code></pre>
+<p>Note that we had to put an extra layer of single quotes in the
+command line to keep the shell from stripping off the double quotes.
+This is unavoidable: had we written <code>puts("MESSAGE")</code> in the code, the preprocessor would have recognized that <code>MESSAGE</code> appeared inside a string and would not have replaced it.<a href="#fn17" class="footnoteRef" id="fnref17"><sup>17</sup></a></p>
+<h3 id="the-if-directive"><span class="header-section-number">4.13.6</span> The <code>#if</code> directive</h3>
+<p>The preprocessor also includes a more general <code>#if</code>
+directive that evaluates simple arithmetic expressions. The limitations
+are that it can only do integer arithmetic (using the widest signed
+integer type available to the compiler) and can only do it to integer
+and character constants and the special operator <code>defined(NAME)</code>, which evaluates to 1 if <code>NAME</code> is defined and 0 otherwise. The most common use of this is to combine several <code>#ifdef</code>-like tests into one:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+<span class="ot">#if VERBOSITY &gt;= 3 &amp;&amp; defined(SAY_HI)</span>
+ puts(<span class="st">"Hi!"</span>);
+<span class="ot">#endif</span>
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/macros/if.c" class="uri">examples/macros/if.c</a>
+</div>
+<h3 id="Debugging_macro_expansions"><span class="header-section-number">4.13.7</span> Debugging macro expansions</h3>
+<p>One problem with using a lot of macros is that you can end up with no
+ idea what input is actually fed to the compiler after the preprocessor
+is done with it. You can tell <code class="backtick">gcc</code> to tell you how everything expands using <code class="backtick">gcc&nbsp;-E&nbsp;source_file.c</code>. If your source file contains any <code class="backtick">#include</code> statements it is probably a good idea to send the output of <code class="backtick">gcc&nbsp;-E</code> to a file so you can scroll down past the thousands of lines of text they may generate.</p>
+<h3 id="Can_a_macro_call_a_preprocessor_command.3F"><span class="header-section-number">4.13.8</span> Can a macro call a preprocessor command?</h3>
+<p>E.g., can you write something like</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define DefinePlus1(x, y) #define x ((y)+1)</span>
+<span class="ot">#define IncludeLib(x) #include "lib/" #x</span></code></pre></div>
+<p>The answer is <strong>no</strong>. C preprocessor commands are only
+recognized in unexpanded text. If you want self-modifying macros you
+will need to use a fancier macro processor like <a href="http://en.wikipedia.org/wiki/M4_%28computer_language%29">m4</a>.</p>
+<h1 id="dataStructuresAndProgrammingTechniques"><span class="header-section-number">5</span> Data structures and programming techniques</h1>
+<p>Up until this point we have mostly concentrated on the details of the
+ C programming language. In this part of the notes, we will be looking
+more at how to construct data structures and how to organize a program.
+In principle, these techniques can be applied to any programming
+language that supports the appropriate low-level data types, but we will
+ continue to emphasize issues involved with implementation in C.</p>
+<h2 id="asymptoticNotation"><span class="header-section-number">5.1</span> Asymptotic notation</h2>
+<p><strong>Asymptotic notation</strong> is a tool for measuring the
+growth rate of functions, which for program design usually means the way
+ in which the time or space costs of a program scale with the size of
+the input. We'll start with an example of why this is important.</p>
+<h3 id="two-sorting-algorithms"><span class="header-section-number">5.1.1</span> Two sorting algorithms</h3>
+<p>Suppose we want to sort in increasing order a deck of <span class="math inline"><em>n</em></span> cards, numbered <span class="math inline">1</span> through <span class="math inline"><em>n</em></span>. Here are two algorithms for doing this.</p>
+<p>In the '''mergesort''' algorithm, we start with <span class="math inline"><em>n</em></span>
+ piles of one card each. We then take pairs of piles and merge them
+together, by repeatedly pulling the smaller of the two smallest cards
+off the top of the pile and putting it on the bottom of our output pile.
+ After the first round of this, we have <span class="math inline"><em>n</em>/2</span> piles of two cards each. After another round, <span class="math inline"><em>n</em>/4</span> piles of four cards each, and so on until we get one pile with <span class="math inline"><em>n</em></span> cards after roughly <span class="math inline">log<sub>2</sub><em>n</em></span> rounds of merging.</p>
+<p>Here's a picture of this algorithm in action on 8 cards:</p>
+<pre><code>5 7 1 2 3 4 8 6
+
+57 12 34 68
+
+1257 3468
+
+12345678</code></pre>
+<p>Suppose that we want to estimate the cost of this algorithm without
+actually coding it up. We might observe that each time a card is merged
+into a new pile, we need to do some small, fixed number of operations to
+ decide that it's the smaller card, and then do an additional small,
+fixed number of operations to physically move it to a new place. If we
+are really clever, we might notice that since the size of the pile a
+card is in doubles with each round, there can be at most <span class="math inline">⌈log<sub>2</sub><em>n</em>⌉</span> rounds until all cards are in the same pile. So the cost of getting a single card in the right place will be at most <span class="math inline"><em>c</em>log<em>n</em></span> where <span class="math inline"><em>c</em></span>
+ counts the "small, fixed" number of operations that we keep mentioning,
+ and the cost of getting every card in the right place will be at most <span class="math inline"><em>c</em><em>n</em>log<em>n</em></span>.</p>
+<p>In the '''selection sort''' algorithm, we look through all the cards
+to find the smallest one, swap it to the beginning of the list, then
+look through the remaining cards for the second smallest, swap it to the
+ next position, and so on.</p>
+<p>Here's a picture of this algorithm in action on 8 cards:</p>
+<pre><code>57123486
+
+17523486
+
+12573486
+
+12375486
+
+12345786
+
+12345786
+
+12345687
+
+12345678</code></pre>
+<p>This is a simpler algorithm to implement that mergesort, but it is
+usually slower on large inputs. We can formalize this by arguing that
+each time we scan <span class="math inline"><em>k</em></span> cards to
+find the smallest, it's going to take some small, fixed number of
+operations to test each card against the best one we found so far, and
+an additional small, fixed number of operations to swap the smallest
+card to the right place. To compute the total cost we have to add these
+costs for all cards, which will give us a total cost that looks
+something like <span class="math inline">$(c_1 n + c_2) + (c_1 (n-1) + c_2) + (c_1 (n-2) + c_2) + \ldots + (c1 \dot 1 + c_2) = c_1 n(n+1)/2 + c_2 n$</span>.</p>
+<p>For large <span class="math inline"><em>n</em></span>, it looks like
+this is going to cost more than mergesort. But how can we make this
+claim cleanly, particularly if we don't know the exact values of <span class="math inline"><em>c</em></span>, <span class="math inline"><em>c</em><sub>1</sub></span>, and <span class="math inline"><em>c</em><sub>2</sub></span>?</p>
+<h3 id="big-o-to-the-rescue"><span class="header-section-number">5.1.2</span> Big-O to the rescue</h3>
+<p>The idea is to replace complex running time formulae like <span class="math inline"><em>c</em><em>n</em>log<em>n</em></span> or <span class="math inline"><em>c</em><sub>1</sub><em>n</em>(<em>n</em> + 1)/2 + <em>c</em><sub>2</sub><em>n</em></span> with an asymptotic growth rate <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span> or <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span>.
+ These asymptotic growth rates omit the specific details of exactly how
+fast our algorithms run (which we don't necessarily know without
+actually coding them up) and concentrate solely on how the cost scales
+as the size of the input <span class="math inline"><em>n</em></span> becomes large.</p>
+<p>This avoids two issues:</p>
+<ol style="list-style-type: decimal">
+<li>Different computers run at different speeds, and we'd like to be
+able to say that one algorithm is better than another without having to
+measure its running time on specific hardware.</li>
+<li>Performance on large inputs is more important than performance on
+small inputs, since programs running on small inputs are usually pretty
+fast.</li>
+</ol>
+<p>The idea of '''asymptotic notation''' is to consider the shape of the worst-case cost <span class="math inline"><em>T</em>(<em>n</em>)</span> to process an input of size <span class="math inline"><em>n</em></span>.
+ Here, worst-case means we consider the input that gives the greatest
+cost, where cost is usually time, but may be something else like space.
+To formalize the notion of shape, we define classes of functions that
+behave like particular interesting functions for large inputs. The
+definition looks much like a limit in calculus:</p>
+<dl>
+<dt><span class="math inline"><em>O</em>(<em>n</em>)</span></dt>
+<dd>A function <span class="math inline"><em>f</em>(<em>n</em>)</span> is in the class <span class="math inline"><em>O</em>(<em>g</em>(<em>n</em>))</span> if there exist constants <span class="math inline"><em>N</em></span> and <span class="math inline"><em>c</em></span> such that <span class="math inline"><em>f</em>(<em>n</em>)&lt;<em>c</em> ⋅ <em>g</em>(<em>n</em>)</span> when <span class="math inline"><em>n</em> &gt; <em>N</em></span>.
+</dd>
+</dl>
+<p>If <span class="math inline"><em>f</em>(<em>n</em>)</span> is in <span class="math inline"><em>O</em>(<em>g</em>(<em>n</em>))</span> we say <span class="math inline"><em>f</em>(<em>n</em>)</span> is '''big-O''' of <span class="math inline"><em>g</em>(<em>n</em>)</span> or just <span class="math inline"><em>f</em>(<em>n</em>)=<em>O</em>(<em>g</em>(<em>n</em>))</span>.<a href="#fn18" class="footnoteRef" id="fnref18"><sup>18</sup></a></p>
+<p>Unpacked, this definition says that <span class="math inline"><em>f</em>(<em>n</em>)</span> is less than a constant times <span class="math inline"><em>g</em>(<em>n</em>)</span> when <span class="math inline"><em>n</em></span> is large enough.</p>
+<p>Some examples:</p>
+<ul>
+<li>Let <span class="math inline"><em>f</em>(<em>n</em>)=3<em>n</em> + 12</span>, and let <span class="math inline"><em>g</em>(<em>n</em>)=<em>n</em></span>. To show that <span class="math inline"><em>f</em>(<em>n</em>)</span> is in <span class="math inline"><em>O</em>(<em>g</em>(<em>n</em>)) = <em>O</em>(<em>n</em>)</span>, we can pick whatever constants we want for <span class="math inline"><em>c</em></span> and <span class="math inline"><em>N</em></span> (as long as they work). So let's make <span class="math inline"><em>N</em></span> be <span class="math inline">100</span> and <span class="math inline"><em>c</em></span> be <span class="math inline">4</span>. Then we need to show that if <span class="math inline"><em>n</em> &gt; 100</span>, <span class="math inline">3<em>n</em> + 12 &lt; 4<em>n</em></span>. But <span class="math inline">3<em>n</em> + 12 &lt; 4<em>n</em></span> holds precisely when <span class="math inline">12 &lt; <em>n</em></span>, which is implied by our assumption that <span class="math inline"><em>n</em> &gt; 100</span>.</li>
+<li>Let <span class="math inline"><em>f</em>(<em>n</em>)=4<em>n</em><sup>2</sup> + 23<em>n</em> + 15</span>, and let <span class="math inline"><em>g</em>(<em>n</em>)=<em>n</em><sup>2</sup></span>. Now let <span class="math inline"><em>N</em></span> be <span class="math inline">100</span> again and <span class="math inline"><em>c</em></span> be <span class="math inline">5</span>. So we need <span class="math inline">4<em>n</em><sup>2</sup> + 23<em>n</em> + 15 &lt; 5<em>n</em><sup>2</sup></span>, or <span class="math inline">23<em>n</em> + 15 &lt; <em>n</em><sup>2</sup></span>. But <span class="math inline"><em>n</em> &gt; 100</span> means that <span class="math inline"><em>n</em><sup>2</sup> &gt; 100<em>n</em> = 50<em>n</em> + 50<em>n</em> &gt; 50<em>n</em> + 5000 &gt; 23<em>n</em> + 15</span>, which proves that <span class="math inline"><em>f</em>(<em>n</em>)</span> is in <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span>.</li>
+<li>Let <span class="math inline"><em>f</em>(<em>n</em>)&lt;146</span> for all <span class="math inline"><em>n</em></span>, and let <span class="math inline"><em>g</em>(<em>n</em>)=1</span>. Then for <span class="math inline"><em>N</em> = 0</span> and <span class="math inline"><em>c</em> = 146</span>, <span class="math inline"><em>f</em>(<em>n</em>)&lt;146 = 146<em>g</em>(<em>n</em>)</span>, and <span class="math inline"><em>f</em>(<em>n</em>)</span> is in <span class="math inline"><em>O</em>(1)</span>.</li>
+</ul>
+<p>Writing proofs like this over and over again is a nuisance, so we can use some basic rules of thumb to reduce messy functions <span class="math inline"><em>f</em>(<em>n</em>)</span> to their asymptotic forms:</p>
+<ul>
+<li>If <span class="math inline"><em>c</em></span> is a constant (doesn't depend on <span class="math inline"><em>n</em></span>), then <span class="math inline"><em>c</em> ⋅ <em>f</em>(<em>n</em>)=<em>O</em>(<em>f</em>(<em>n</em>))</span>. This follows immediately from being able to pick <span class="math inline"><em>c</em></span> in the definition. So we can always get rid of constant factors: <span class="math inline">137<em>n</em><sup>5</sup> = <em>O</em>(<em>n</em><sup>5</sup>)</span>.</li>
+<li>If <span class="math inline"><em>f</em>(<em>n</em>)=<em>g</em>(<em>n</em>)+<em>h</em>(<em>n</em>)</span>, then the bigger of <span class="math inline"><em>g</em>(<em>n</em>)</span> or <span class="math inline"><em>h</em>(<em>n</em>)</span> wins. This is because if <span class="math inline"><em>g</em>(<em>n</em>)≤<em>h</em>(<em>n</em>)</span>, then <span class="math inline"><em>g</em>(<em>n</em>)+<em>h</em>(<em>n</em>)≤2<em>g</em>(<em>n</em>)</span>, and then big-O eats the 2. So <span class="math inline">12<em>n</em><sup>2</sup> + 52<em>n</em> + 3 = <em>O</em>(<em>n</em><sup>2</sup>)</span> because <span class="math inline"><em>n</em><sup>2</sup></span> dominates all the other terms.</li>
+<li>To figure out which of two terms dominates, the rule is
+<ul>
+<li>Bigger exponents win: If <span class="math inline"><em>a</em> &lt; <em>b</em></span>, then <span class="math inline"><em>O</em>(<em>n</em><sup><em>a</em></sup>)+<em>O</em>(<em>n</em><sup><em>b</em></sup>)=<em>O</em>(<em>n</em><sup><em>b</em></sup>)</span>.</li>
+<li>Polynomials beat logarithms: For any <span class="math inline"><em>a</em></span> and any <span class="math inline"><em>b</em> &gt; 0</span>, <span class="math inline"><em>O</em>(log<sup><em>a</em></sup><em>n</em>)+<em>O</em>(<em>n</em><sup><em>b</em></sup>)=<em>O</em>(<em>n</em><sup><em>b</em></sup>)</span>.</li>
+<li>Exponentials beat polynomials: For any <span class="math inline"><em>a</em></span> and any <span class="math inline"><em>b</em> &gt; 1</span>, <span class="math inline"><em>O</em>(<em>n</em><sup><em>a</em></sup>)+<em>O</em>(<em>b</em><sup><em>n</em></sup>)=<em>O</em>(<em>b</em><sup><em>n</em></sup>)</span>.</li>
+<li>The distributive law works: Because <span class="math inline"><em>O</em>(log<em>n</em>)</span> dominates <span class="math inline"><em>O</em>(1)</span>, <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span> dominates <span class="math inline"><em>O</em>(<em>n</em>)</span>.</li>
+</ul></li>
+</ul>
+<p>This means that almost any asymptotic bound can be reduced down to
+one of a very small list of common bounds. Ones that you will typically
+see in practical algorithms, listed in increasing order, are <span class="math inline"><em>O</em>(1)</span>, <span class="math inline"><em>O</em>(log<em>n</em>)</span>, <span class="math inline"><em>O</em>(<em>n</em>)</span>, <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span>, or <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span>.</p>
+<p>Applying these rules to mergesort and selection sort gives us asymptotic bounds of <span class="math inline"><em>c</em><em>n</em>log<em>n</em> = <em>O</em>(<em>n</em>log<em>n</em>)</span> (the constant vanishes) and <span class="math inline"><em>c</em><sub>1</sub><em>n</em>(<em>n</em> + 1)/2 + <em>c</em><sub>2</sub><em>n</em> = <em>c</em><sub>1</sub><em>n</em><sup>2</sup>/2 + <em>c</em><sub>1</sub><em>n</em>/2 + <em>c</em><sub>2</sub><em>n</em> = <em>O</em>(<em>n</em><sup>2</sup>)+<em>O</em>(<em>n</em>)+<em>O</em>(<em>n</em>)=<em>O</em>(<em>n</em><sup>2</sup>)</span> (the constants vanish and then <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span>
+ dominates). Here we see that no matter how fast our machine is at
+different low-level operations, for large enough inputs mergesort will
+beat selection sort.</p>
+<h3 id="asymptotic-cost-of-programs"><span class="header-section-number">5.1.3</span> Asymptotic cost of programs</h3>
+<p>To compute the asymptotic cost of a program, the rule of thumb is that any simple statement costs <span class="math inline"><em>O</em>(1)</span>
+ time to evaluate, and larger costs are the result of loops or calls to
+expensive functions, where a loop multiplies the cost by the number of
+iterations in the loop. When adding costs together, the biggest cost
+wins:</p>
+<p>So this function takes <span class="math inline"><em>O</em>(1)</span> time:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the sum of the integers i with 0 &lt;= i and i &lt; n */</span>
+<span class="dt">int</span>
+sumTo(<span class="dt">int</span> n)
+{
+ <span class="kw">return</span> n*(n<span class="dv">-1</span>)/<span class="dv">2</span>;
+}</code></pre></div>
+<p>But this function, which computes exactly the same value, takes <span class="math inline"><em>O</em>(<em>n</em>)</span> time:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the sum of the integers i with 0 &lt;= i and i &lt; n */</span>
+<span class="dt">int</span>
+sumTo(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> sum = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ sum += i;
+ }
+
+ <span class="kw">return</span> sum;
+}</code></pre></div>
+<p>The reason it takes so long is that each iteration of the loop takes only <span class="math inline"><em>O</em>(1)</span> time, but we execute the loop <span class="math inline"><em>n</em></span> times, and <span class="math inline"><em>n</em> ⋅ <em>O</em>(1)=<em>O</em>(<em>n</em>)</span>.</p>
+<p>Here's an even worse version that takes <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span> time:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* return the sum of the integers i with 0 &lt;= i and i &lt; n */</span>
+<span class="dt">int</span>
+sumTo(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> j;
+ <span class="dt">int</span> sum = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; i; j++) {
+ sum++;
+ }
+ }
+
+ <span class="kw">return</span> sum;
+}</code></pre></div>
+<p>Here we have two nested loops. The outer loop iterates exactly <span class="math inline"><em>n</em></span> times, and for each iteration the inner loop iterates at most <span class="math inline"><em>n</em></span> times, and the innermost iteration costs <span class="math inline"><em>O</em>(1)</span> each time, so the total is at most <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span>. (In fact, it's no better than this, because at least <span class="math inline"><em>n</em>/2</span> times we execute the inner loop, we do at least <span class="math inline"><em>n</em>/2</span> iterations.)</p>
+<p>So even if we knew that the constant on the first implementation was
+really large (maybe our CPU is bad at dividing by 2?), for big values of
+ <span class="math inline"><em>n</em></span> it's still likely to be faster than the other two.</p>
+<p>(This example is a little misleading, because <span class="math inline"><em>n</em></span> is not the size of the input but the actual input value. More typical might be a statement that the cost of <code>strlen</code> is <span class="math inline"><em>O</em>(<em>n</em>)</span> where <span class="math inline"><em>n</em></span> is the length of the string.)</p>
+<h3 id="other-variants-of-asymptotic-notation"><span class="header-section-number">5.1.4</span> Other variants of asymptotic notation</h3>
+<p>Big-O notation is good for upper bounds, but the inequality in the
+definition means that it can't be used for anything else: it is the case
+ that <span class="math inline">12 = <em>O</em>(<em>n</em><sup>67</sup>)</span> just because <span class="math inline">12 &lt; <em>n</em><sup>67</sup></span> when <span class="math inline"><em>n</em></span> is large enough. There is an alternative definition, called '''big-Omega''', that works in the other direction:</p>
+<dl>
+<dt><span class="math inline"><em>Ω</em>(<em>n</em>)</span></dt>
+<dd>A function <span class="math inline"><em>f</em>(<em>n</em>)</span> is in the class <span class="math inline"><em>Ω</em>(<em>g</em>(<em>n</em>))</span> if there exist constants <span class="math inline"><em>N</em></span> and <span class="math inline"><em>c</em></span> such that <span class="math inline"><em>f</em>(<em>n</em>)&gt;<em>c</em> ⋅ <em>g</em>(<em>n</em>)</span> when <span class="math inline"><em>n</em> &gt; <em>N</em></span>.
+</dd>
+</dl>
+<p>This is exactly the same as the definition of <span class="math inline"><em>O</em>(<em>g</em>(<em>n</em>))</span>
+ except that the inequality goes in the other direction. So if we want
+to express that some algorithm is very expensive, we might write that
+it's <span class="math inline"><em>Ω</em>(<em>n</em><sup>2</sup>)</span>, which says that once the size of the input is big enough, then the cost grows at least as fast as <span class="math inline"><em>n</em><sup>2</sup></span>.</p>
+<p>If you want to claim that your bound is <strong>tight</strong>—both an upper and a lower bound—use <strong>big-Theta</strong>: <span class="math inline"><em>f</em>(<em>n</em>)</span> is <span class="math inline"><em>Θ</em>(<em>g</em>(<em>n</em>))</span> if it is both <span class="math inline"><em>O</em>(<em>f</em>(<em>n</em>))</span> and <span class="math inline"><em>Ω</em>(<em>g</em>(<em>n</em>))</span>.</p>
+<p>Mostly we will just use big-O, with the understanding that when we say that a particular algorithm is <span class="math inline"><em>O</em>(<em>n</em>)</span>, that's the best bound we could come up with.</p>
+<h2 id="linkedLists"><span class="header-section-number">5.2</span> Linked lists</h2>
+<p>Linked lists are about the simplest data structure beyond arrays.
+They aren't very efficient for many purposes, but have very good
+performance for certain specialized applications.</p>
+<p>The basic idea is that instead of storing <span class="math inline"><em>n</em></span> items in one big array, we store each item in its own <code>struct</code>, and each of these <code>structs</code> includes a pointer to the next <code>struct</code>
+ in the list (with a null pointer to indicate that there are no more
+elements). If we follow the pointers we can eventually reach all of the
+elements.</p>
+<p>For example, if we declare the struct holding each element like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> elt {
+ <span class="kw">struct</span> elt *next; <span class="co">/* pointer to next element in the list */</span>
+ <span class="dt">int</span> contents; <span class="co">/* contents of this element */</span>
+};</code></pre></div>
+<p>We can build a structure like this:</p>
+<p><img src="" alt="Basic linked list"><br>
+The box on the far left is not a <code>struct elt</code>, but a <code>struct elt *</code>;
+ in order to keep track of the list we need a pointer to the first
+element. As usual in C, we will have to do all the work of allocating
+these elements and assigning the right pointer values in the right
+places ourselves.</p>
+<h3 id="stacks"><span class="header-section-number">5.2.1</span> Stacks</h3>
+<p>The selling point of linked lists in comparison to arrays is that
+inserting or removing elements can be cheap: at the front of the list,
+inserting a new element just requires allocating another <code>struct</code>
+ and hooking up a few pointers, while removing an element just requires
+moving the pointer to the first element to point to the second element
+instead, and then freeing the first element.</p>
+<p>For example here's what happens the linked list above looks like after we insert a new element at the front:</p>
+<p><img src="" alt="Linked list after insertion"><br>
+To make this work, we need to change two pointers: the head pointer and the <code>next</code> pointer in the new element holding 0. These operations aren't affected by the size of the rest of the list and so take <span class="math inline"><em>O</em>(1)</span> time.</p>
+<p>Removal is the reverse of installation: We patch out the first
+element by shifting the head pointer to the second element, then
+deallocate it with <code>free</code>. (We do have to be careful to get any data we need out of it before calling free). This is also an <span class="math inline"><em>O</em>(1)</span> operation.</p>
+<p>The fact that we can add and remove elements at the start of linked
+lists for cheap makes them particularly useful for implementing a <strong>stack</strong>, an abstract data type that supports operations <strong>push</strong> (insert a new element on the top of the stack) and <strong>pop</strong>
+ (remove and return the element at the top of the stack. Here is an
+example of a simple linked-list implementation of a stack, together with
+ some test code:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="kw">struct</span> elt {
+ <span class="kw">struct</span> elt *next;
+ <span class="dt">int</span> value;
+};
+
+<span class="co">/* </span>
+<span class="co"> * We could make a struct for this,</span>
+<span class="co"> * but it would have only one component,</span>
+<span class="co"> * so this is quicker.</span>
+<span class="co"> */</span>
+<span class="kw">typedef</span> <span class="kw">struct</span> elt *Stack;
+
+<span class="ot">#define STACK_EMPTY (0)</span>
+
+<span class="co">/* push a new value onto top of stack */</span>
+<span class="dt">void</span>
+stackPush(Stack *s, <span class="dt">int</span> value)
+{
+ <span class="kw">struct</span> elt *e;
+
+ e = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> elt));
+ assert(e);
+
+ e-&gt;value = value;
+ e-&gt;next = *s;
+ *s = e;
+}
+
+<span class="dt">int</span>
+stackEmpty(<span class="dt">const</span> Stack *s)
+{
+ <span class="kw">return</span> (*s == <span class="dv">0</span>);
+}
+
+<span class="dt">int</span>
+stackPop(Stack *s)
+{
+ <span class="dt">int</span> ret;
+ <span class="kw">struct</span> elt *e;
+
+ assert(!stackEmpty(s));
+
+ ret = (*s)-&gt;value;
+
+ <span class="co">/* patch out first element */</span>
+ e = *s;
+ *s = e-&gt;next;
+
+ free(e);
+
+ <span class="kw">return</span> ret;
+}
+
+<span class="co">/* print contents of stack on a single line */</span>
+<span class="dt">void</span>
+stackPrint(<span class="dt">const</span> Stack *s)
+{
+ <span class="kw">struct</span> elt *e;
+
+ <span class="kw">for</span>(e = *s; e != <span class="dv">0</span>; e = e-&gt;next) {
+ printf(<span class="st">"%d "</span>, e-&gt;value);
+ }
+
+ putchar(<span class="ch">'\n'</span>);
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+ Stack s;
+
+ s = STACK_EMPTY;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">5</span>; i++) {
+ printf(<span class="st">"push %d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ stackPush(&amp;s, i);
+ stackPrint(&amp;s);
+ }
+
+ <span class="kw">while</span>(!stackEmpty(&amp;s)) {
+ printf(<span class="st">"pop gets %d</span><span class="ch">\n</span><span class="st">"</span>, stackPop(&amp;s));
+ stackPrint(&amp;s);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/linkedLists/stack.c" class="uri">examples/linkedLists/stack.c</a>
+</div>
+<p>Unlike most of our abstract data types, we do not include a <code>struct</code>
+ representing the linked list itself. This is because the only thing we
+need to keep track of a linked list is the head pointer, and it feels a
+little silly to have a <code>struct</code> with just one component. But
+we might choose to do this if we wanted to make the linked list
+implementation opaque or allow for including more information later.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> stack {
+ <span class="kw">struct</span> elt *head;
+};</code></pre></div>
+<h4 id="Building_a_stack_out_of_an_array"><span class="header-section-number">5.2.1.1</span> Building a stack out of an array</h4>
+<p>When the elements of a stack are small, or when a maximum number of
+elements is known in advance, it often makes sense to build a stack from
+ an array (with a variable storing the index of the top element) instead
+ of a linked list. The reason is that pushes and pops only require
+updating the stack pointer instead of calling <code class="backtick">malloc</code> or <code class="backtick">free</code>
+ to allocate space, and pre-allocating is almost always faster than
+allocating as needed. This is the strategy used to store the function
+call stack in almost all programs (the exception is in languages like
+Scheme, where the call stack is allocated on the heap because stack
+frames may outlive the function call that creates them).</p>
+<h3 id="queues"><span class="header-section-number">5.2.2</span> Queues</h3>
+<p>Stacks are last-in-first-out (LIFO) data structures: when we pop, we
+get the last item we pushed. What if we want a first-in-first-out (FIFO)
+ data structure? Such a data structure is called a <strong>queue</strong> and can also be implemented by a linked list. The difference is that if we want <span class="math inline"><em>O</em>(1)</span> time for both the <strong>enqueue</strong> (push) and <strong>dequeue</strong> (pop) operations, we must keep around pointers to both ends of the linked list.</p>
+<p>So now we get something that looks like this:</p>
+<p><img src="" alt="Queue as a linked list"><br>
+Enqueuing a new element typically requires (a) allocating a new <code>struct</code> to hold it; (b) making the old tail <code>struct</code> point at the new <code>struct</code>; and (c) updating the <code>tail</code> pointer to also point to the new <code>struct</code>. There is a minor complication when the stack is empty; in this case instead of updating <code>tail-&gt;next</code> we must put a pointer to the new <code>struct</code> in <code>head</code>. Dequeuing an element involves updating the head pointer and freeing the removed <code>struct</code>, exactly like a stack pop.</p>
+<p>Here is the queue above after enqueuing a new element 6. The updated pointers are indicated by dotted lines:</p>
+<p><img src="" alt="Queue after enqueuing a new element"><br>
+Because we are only changing two pointers, each of which we can reach by following a constant number of pointers from the main <code>struct</code>, we can do this in <span class="math inline"><em>O</em>(1)</span> time.</p>
+<p>There is a slight complication when we enqueue the very first
+element, because we need to update the head pointer instead of the
+pointer in the previous tail (which doesn't yet exist). This requires
+testing for an empty queue in the enqueue routine, which we'll do in the
+ sample code below.</p>
+<p>Dequeuing is easier because it requires updating only one pointer:</p>
+<p><img src="" alt="Queue after dequeuing first element"><br>
+If we adopt the convention that a null in <code>head</code> means an empty queue, and use this property to check if the queue is empty when enqueuing, we don't even have to clear out <code>tail</code> when we dequeue the last element.</p>
+<p>Here is a simple implementation of a queue holding <code class="backtick">int</code>s, together with some test code showing how its behavior differs from a stack:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+
+
+<span class="kw">struct</span> elt {
+ <span class="kw">struct</span> elt *next;
+ <span class="dt">int</span> value;
+};
+
+<span class="kw">struct</span> queue {
+ <span class="kw">struct</span> elt *head;
+ <span class="kw">struct</span> elt *tail;
+};
+
+<span class="kw">typedef</span> <span class="kw">struct</span> queue *q;
+
+<span class="kw">struct</span> queue *
+queueCreate(<span class="dt">void</span>)
+{
+ <span class="kw">struct</span> queue *q;
+
+ q = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> queue));
+
+ q-&gt;head = q-&gt;tail = <span class="dv">0</span>;
+
+ <span class="kw">return</span> q;
+}
+
+<span class="co">/* push a new value onto top of Queue */</span>
+<span class="dt">void</span>
+enq(<span class="kw">struct</span> queue *q, <span class="dt">int</span> value)
+{
+ <span class="kw">struct</span> elt *e;
+
+ e = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> elt));
+ assert(e);
+
+ e-&gt;value = value;
+
+ <span class="co">/* Because I will be the tail, nobody is behind me */</span>
+ e-&gt;next = <span class="dv">0</span>;
+
+ <span class="kw">if</span>(q-&gt;head == <span class="dv">0</span>) {
+ <span class="co">/* If the queue was empty, I become the head */</span>
+ q-&gt;head = e;
+ } <span class="kw">else</span> {
+ <span class="co">/* Otherwise I get in line after the old tail */</span>
+ q-&gt;tail-&gt;next = e;
+ }
+
+ <span class="co">/* I become the new tail */</span>
+ q-&gt;tail = e;
+}
+
+<span class="dt">int</span>
+queueEmpty(<span class="dt">const</span> <span class="kw">struct</span> queue *q)
+{
+ <span class="kw">return</span> (q-&gt;head == <span class="dv">0</span>);
+}
+
+<span class="dt">int</span>
+deq(<span class="kw">struct</span> queue *q)
+{
+ <span class="dt">int</span> ret;
+ <span class="kw">struct</span> elt *e;
+
+ assert(!queueEmpty(q));
+
+ ret = q-&gt;head-&gt;value;
+
+ <span class="co">/* patch out first element */</span>
+ e = q-&gt;head;
+ q-&gt;head = e-&gt;next;
+
+ free(e);
+
+ <span class="kw">return</span> ret;
+}
+
+<span class="co">/* print contents of queue on a single line, head first */</span>
+<span class="dt">void</span>
+queuePrint(<span class="kw">struct</span> queue *q)
+{
+ <span class="kw">struct</span> elt *e;
+
+ <span class="kw">for</span>(e = q-&gt;head; e != <span class="dv">0</span>; e = e-&gt;next) {
+ printf(<span class="st">"%d "</span>, e-&gt;value);
+ }
+
+ putchar(<span class="ch">'\n'</span>);
+}
+
+<span class="co">/* free a queue and all of its elements */</span>
+<span class="dt">void</span>
+queueDestroy(<span class="kw">struct</span> queue *q)
+{
+ <span class="kw">while</span>(!queueEmpty(q)) {
+ deq(q);
+ }
+
+ free(q);
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+ <span class="kw">struct</span> queue *q;
+
+ q = queueCreate();
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">5</span>; i++) {
+ printf(<span class="st">"enq %d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ enq(q, i);
+ queuePrint(q);
+ }
+
+ <span class="kw">while</span>(!queueEmpty(q)) {
+ printf(<span class="st">"deq gets %d</span><span class="ch">\n</span><span class="st">"</span>, deq(q));
+ queuePrint(q);
+ }
+
+ queueDestroy(q);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/linkedLists/queue.c" class="uri">examples/linkedLists/queue.c</a>
+</div>
+<p>It is a bit trickier to build a queue out of an array than to build a
+ stack. The difference is that while a stack pointer can move up and
+down, leaving the base of the stack in the same place, a naive
+implementation of a queue would have head and tail pointers both
+marching ever onward across the array leaving nothing but empty cells in
+ their wake. While it is possible to have the pointers wrap around to
+the beginning of the array when they hit the end, if the queue size is
+unbounded the tail pointer will eventually catch up to the head pointer.
+ At this point (as in a stack that overflows), it is necessary to
+allocate more space and copy the old elements over. See the section on <a href="#ringBuffers">ring buffers</a> for an example of how to do this.</p>
+<h3 id="Looping_over_a_linked_list"><span class="header-section-number">5.2.3</span> Looping over a linked list</h3>
+<p>Looping over a linked list is not hard if you have access to the <code class="backtick">next</code> pointers. (For a more abstract way to do this see <a href="#iterators">iterators</a>.)</p>
+<p>Let's imagine somebody gave us a pointer to the first <code class="backtick">struct&nbsp;stack</code> in a list; call this pointer <code class="backtick">first</code>. Then we can write a loop like this that prints the contents of the stack:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+stackPrint(<span class="kw">struct</span> stack *first)
+{
+ <span class="kw">struct</span> stack *elt;
+
+ <span class="kw">for</span>(elt = first; elt != <span class="dv">0</span>; elt = elt-&gt;next) {
+ puts(elt-&gt;book);
+ }
+}</code></pre></div>
+<p>There's not a whole lot to notice here except that <code class="backtick">for</code>
+ is perfectly happy to iterate over something that isn't a range of
+integers. The running time is linear in the length of the list (<span class="math inline"><em>O</em>(<em>n</em>)</span>).</p>
+<h3 id="Looping_over_a_linked_list_backwards"><span class="header-section-number">5.2.4</span> Looping over a linked list backwards</h3>
+<p>What if we want to loop over a linked list backwards? The <code class="backtick">next</code>
+ pointers all go the wrong way, so we have to save a trail of
+breadcrumbs to get back. The safest way to do this is to reverse the
+original list into an auxiliary list:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+stackPrintReversed(<span class="kw">struct</span> stack *first)
+{
+ <span class="kw">struct</span> stack *elt;
+ Stack s2; <span class="co">/* uses imperative implementation */</span>
+
+ s2 = stackCreate();
+
+ <span class="kw">for</span>(elt = first; elt != <span class="dv">0</span>; elt = elt-&gt;next) {
+ s2 = stackPush(s2, elt-&gt;book);
+ }
+
+ stackPrint(s2);
+ stackDestroy(s2);
+}</code></pre></div>
+<p>Pushing all the elements from the first list onto <code class="backtick">s2</code> puts the first element on the bottom, so when we print <code class="backtick">s2</code> out, it's in the reverse order of the original stack.</p>
+<p>We can also write a recursive function that prints the elements
+backwards. This function effectively uses the function call stack in
+place of the extra stack <code class="backtick">s2</code> above.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+stackPrintReversedRecursive(<span class="kw">struct</span> stack *first)
+{
+ <span class="kw">if</span>(first != <span class="dv">0</span>) {
+ <span class="co">/* print the rest of the stack */</span>
+ stackPrintReversedRecursive(first-&gt;next);
+
+ <span class="co">/* then print the first element */</span>
+ puts(first-&gt;book);
+ }
+}</code></pre></div>
+<p>The code in <code class="backtick">stackPrintReversedRecursive</code> is shorter than the code in <code class="backtick">stackPrintReversed</code>,
+ and it is likely to be faster since it doesn't require allocating a
+second stack and copying all the elements. But it will only work for
+small stacks: because the function call stack is really a fixed-size
+array, if the input to <code class="backtick">stackPrintReversedRecursive</code> is too big the recursion will go too deep cause a <em>stack overflow</em>.</p>
+<p>If we want to do this sort of thing a lot, we should build a <strong>doubly-linked list</strong>,
+ with a pointer in each element both to the next element and the
+previous element instead of a singly-linked list (see below for more).</p>
+<h3 id="deques"><span class="header-section-number">5.2.5</span> Deques and doubly-linked lists</h3>
+<p>Suppose we want a data structure that represents a line of elements
+where we can push or pop elements at either end. Such a data structure
+is known as a <strong>deque</strong> (pronounced like "deck"), and can be implemented with all operations taking <span class="math inline"><em>O</em>(1)</span> time by a <strong>doubly-linked list</strong>, where each element has a pointer to both its successor and its predecessor.</p>
+<p>An ordinary singly-linked list is not good enough. The reason is that
+ even if we keep a pointer to both ends as in a queue, when it comes
+time to pop an element off the tail, we have no pointer to its
+predecessor ready to hand; the best we can do is scan from the head
+until we get to an element whose successor is the tail, which takes <span class="math inline"><em>O</em>(<em>n</em>)</span> time.</p>
+<p>So instead we need a doubly-linked list, where each node points to
+both its successor and predecessor. The most straightforward way to
+build this is to make it circular, and use a dummy node to represent the
+ head of the list. The resulting data structure might look something
+like this:</p>
+<p><img src="" alt="Circular doubly-linked list"><br>
+Below is an implementation of this structure. We have separated the interface in <code>deque.h</code> from the implementation in <code>deque.c</code>. This will allow us to change the implementation if we decide we don't like it, without affecting any other code in the system.</p>
+<p>A nice feature of this data structure is that we don't need to use
+null pointers to mark the ends of the deque. Instead, each end is marked
+ by a pointer to the dummy head element. For an empty deque, this just
+means that the head points to itself. The cost of this is that to detect
+ an empty deque we have to test for equality with the head (which might
+be slightly more expensive that just testing for null) and the head may
+contain some wasted space for its missing value if we allocate it like
+any other element.<a href="#fn19" class="footnoteRef" id="fnref19"><sup>19</sup></a></p>
+<p>To keep things symmetric, we implement the pointers as an array, indexed by the directions <code>DEQUE_FRONT</code> and <code>DEQUE_BACK</code> (defined in <code>deque.h</code>). This means we can use the same code to push or pop on either end of the deque.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="kw">struct</span> deque Deque;
+
+<span class="ot">#define DEQUE_FRONT (0)</span>
+<span class="ot">#define DEQUE_BACK (1)</span>
+
+<span class="ot">#define DEQUE_EMPTY (-1) </span><span class="co">/* returned by dequePop if deque is empty */</span>
+
+<span class="co">/* return a new empty deque */</span>
+Deque *dequeCreate(<span class="dt">void</span>);
+
+<span class="co">/* push new value onto direction side of deque d */</span>
+<span class="dt">void</span> dequePush(Deque *d, <span class="dt">int</span> direction, <span class="dt">int</span> value);
+
+<span class="co">/* pop and return first value on direction side of deque d */</span>
+<span class="co">/* returns DEQUE_EMPTY if deque is empty */</span>
+<span class="dt">int</span> dequePop(Deque *d, <span class="dt">int</span> direction);
+
+<span class="co">/* return 1 if deque contains no elements, 0 otherwise */</span>
+<span class="dt">int</span> dequeIsEmpty(<span class="dt">const</span> Deque *d);
+
+<span class="co">/* free space used by a deque */</span>
+<span class="dt">void</span> dequeDestroy(Deque *d);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/linkedLists/deque/deque.h" class="uri">examples/linkedLists/deque/deque.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;stddef.h&gt; </span><span class="co">/* for offsetof */</span>
+
+<span class="ot">#include "deque.h"</span>
+
+<span class="ot">#define NUM_DIRECTIONS (2)</span>
+
+<span class="kw">struct</span> deque {
+ <span class="kw">struct</span> deque *next[NUM_DIRECTIONS];
+ <span class="dt">int</span> value;
+};
+
+Deque *
+dequeCreate(<span class="dt">void</span>)
+{
+ Deque *d;
+
+ <span class="co">/*</span>
+<span class="co"> * We don't allocate the full space for this object</span>
+<span class="co"> * because we don't use the value field in the dummy head.</span>
+<span class="co"> *</span>
+<span class="co"> * Saving these 4 bytes doesn't make a lot of sense here,</span>
+<span class="co"> * but it might be more significant if value where larger.</span>
+<span class="co"> */</span>
+ d = malloc(offsetof(<span class="kw">struct</span> deque, value));
+
+ <span class="co">/* test is to deal with malloc failure */</span>
+ <span class="kw">if</span>(d) {
+ d-&gt;next[DEQUE_FRONT] = d-&gt;next[DEQUE_BACK] = d;
+ }
+
+ <span class="kw">return</span> d;
+}
+
+<span class="dt">void</span>
+dequePush(Deque *d, <span class="dt">int</span> direction, <span class="dt">int</span> value)
+{
+ <span class="kw">struct</span> deque *e; <span class="co">/* new element */</span>
+
+ assert(direction == DEQUE_FRONT || direction == DEQUE_BACK);
+
+ e = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> deque));
+ assert(e);
+
+ e-&gt;next[direction] = d-&gt;next[direction];
+ e-&gt;next[!direction] = d;
+ e-&gt;value = value;
+
+ d-&gt;next[direction] = e;
+ e-&gt;next[direction]-&gt;next[!direction] = e; <span class="co">/* preserves invariant */</span>
+}
+
+<span class="dt">int</span>
+dequePop(Deque *d, <span class="dt">int</span> direction)
+{
+ <span class="kw">struct</span> deque *e;
+ <span class="dt">int</span> retval;
+
+ assert(direction == DEQUE_FRONT || direction == DEQUE_BACK);
+
+ e = d-&gt;next[direction];
+
+ <span class="kw">if</span>(e == d) {
+ <span class="kw">return</span> DEQUE_EMPTY;
+ }
+
+ <span class="co">/* else remove it */</span>
+ d-&gt;next[direction] = e-&gt;next[direction];
+ e-&gt;next[direction]-&gt;next[!direction] = d;
+
+ retval = e-&gt;value;
+
+ free(e);
+
+ <span class="kw">return</span> retval;
+}
+
+<span class="dt">int</span>
+dequeIsEmpty(<span class="dt">const</span> Deque *d)
+{
+ <span class="kw">return</span> d-&gt;next[DEQUE_FRONT] == d;
+}
+
+<span class="dt">void</span>
+dequeDestroy(Deque *d)
+{
+ <span class="kw">while</span>(!dequeIsEmpty(d)) {
+ dequePop(d, DEQUE_FRONT);
+ }
+
+ free(d);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/linkedLists/deque/deque.c" class="uri">examples/linkedLists/deque/deque.c</a>
+</div>
+<p>And here is some test code:</p>
+<p><a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/linkedLists/deque/testDeque.c">testDeque.c</a>.</p>
+<h4 id="ringBuffers"><span class="header-section-number">5.2.5.1</span> Alternate implementation using a ring buffer</h4>
+<p>The <code>deque.h</code> file carefully avoids revealing any details
+of the implementation of a deque. This allows us to replace the
+implementation with a different implementation that is more efficient in
+ its use of both time and space, at the cost of additional code
+complexity. Below is a replacement for <code>deque.c</code> that uses a <strong>ring buffer</strong> in place of the circular linked list.</p>
+<p>The idea of a ring buffer is to store the deque elements in an array,
+ with a pointer to the first element and a length field that says how
+many elements are in the deque. The information needed to manage the
+array (which is allocated using <code>malloc</code>) is stored in a <code>struct</code>.</p>
+<p>The sequence of elements wraps around the endpoints of the array,
+leaving a gap somewhere in the middle. Deque pushes extend the sequence
+into this gap from one side or another, while pops increase the size of
+the gap. If the user wants to do a push and the array is full, we build a
+ new, larger deque, move all the elements there, and then transplant all
+ the bits of the new <code>struct deque</code> into the old one. This transplant trick avoids changing the address of the <code>struct deque</code> that the user needs to access it.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "deque.h"</span>
+
+<span class="co">/*</span>
+<span class="co"> * Alternative implementation of a deque using a ring buffer.</span>
+<span class="co"> *</span>
+<span class="co"> * Conceptually, this is an array whose indices wrap around at</span>
+<span class="co"> * the endpoints. </span>
+<span class="co"> *</span>
+<span class="co"> * The region in use is specified by a base index pointing</span>
+<span class="co"> * to the first element, and a length count giving the number</span>
+<span class="co"> * of elements. A size field specifies the number of slots</span>
+<span class="co"> * in the block.</span>
+<span class="co"> *</span>
+<span class="co"> * Picture:</span>
+<span class="co"> *</span>
+<span class="co"> * ---------------------------------------------------</span>
+<span class="co"> * |7|8|9| | | | | | | | | | | | | | | | |1|2|3|4|5|6|</span>
+<span class="co"> * ---------------------------------------------------</span>
+<span class="co"> * ^ ^</span>
+<span class="co"> * | |</span>
+<span class="co"> * base + length - 1 base</span>
+<span class="co"> *</span>
+<span class="co"> */</span>
+
+<span class="kw">struct</span> deque {
+ size_t base; <span class="co">/* location of front element */</span>
+ size_t length; <span class="co">/* length of region in use */</span>
+ size_t size; <span class="co">/* total number of positions in contents */</span>
+ <span class="dt">int</span> *contents;
+};
+
+<span class="ot">#define INITIAL_SIZE (8)</span>
+
+<span class="co">/* create a new deque of the given size */</span>
+<span class="dt">static</span> Deque *
+dequeCreateInternal(size_t size)
+{
+ <span class="kw">struct</span> deque *d;
+
+ d = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> deque));
+ assert(d);
+
+ d-&gt;base = <span class="dv">0</span>;
+ d-&gt;length = <span class="dv">0</span>;
+ d-&gt;size = size;
+
+ d-&gt;contents = malloc(<span class="kw">sizeof</span>(<span class="dt">int</span>) * d-&gt;size);
+ assert(d-&gt;contents);
+
+ <span class="kw">return</span> d;
+}
+
+<span class="co">/* return a new empty deque */</span>
+Deque *
+dequeCreate(<span class="dt">void</span>)
+{
+ <span class="kw">return</span> dequeCreateInternal(INITIAL_SIZE);
+}
+
+<span class="dt">void</span>
+dequePush(Deque *d, <span class="dt">int</span> direction, <span class="dt">int</span> value)
+{
+ <span class="kw">struct</span> deque *d2; <span class="co">/* replacement deque if we grow */</span>
+ <span class="dt">int</span> *oldContents; <span class="co">/* old contents of d */</span>
+
+ <span class="co">/*</span>
+<span class="co"> * First make sure we have space.</span>
+<span class="co"> */</span>
+
+ <span class="kw">if</span>(d-&gt;length == d-&gt;size) {
+ <span class="co">/* nope */</span>
+ d2 = dequeCreateInternal(d-&gt;size * <span class="dv">2</span>);
+
+ <span class="co">/* evacuate d */</span>
+ <span class="kw">while</span>(!dequeIsEmpty(d)) {
+ dequePush(d2, DEQUE_BACK, dequePop(d, DEQUE_FRONT));
+ }
+
+ <span class="co">/* do a transplant from d2 to d */</span>
+ <span class="co">/* but save old contents so we can free them */</span>
+ oldContents = d-&gt;contents;
+ *d = *d2; <span class="co">/* this is equivalent to copying the components one by one */</span>
+
+ <span class="co">/* these are the pieces we don't need any more */</span>
+ free(oldContents);
+ free(d2);
+ }
+
+ <span class="co">/*</span>
+<span class="co"> * This requires completely different code </span>
+<span class="co"> * depending on the direction, which is </span>
+<span class="co"> * annoying.</span>
+<span class="co"> */</span>
+ <span class="kw">if</span>(direction == DEQUE_FRONT) {
+ <span class="co">/* d-&gt;base is unsigned, so we have to check for zero first */</span>
+ <span class="kw">if</span>(d-&gt;base == <span class="dv">0</span>) {
+ d-&gt;base = d-&gt;size - <span class="dv">1</span>;
+ } <span class="kw">else</span> {
+ d-&gt;base--;
+ }
+
+ d-&gt;length++;
+
+ d-&gt;contents[d-&gt;base] = value;
+ } <span class="kw">else</span> {
+ d-&gt;contents[(d-&gt;base + d-&gt;length++) % d-&gt;size] = value;
+ }
+}
+
+<span class="co">/* pop and return first value on direction side of deque d */</span>
+<span class="co">/* returns DEQUE_EMPTY if deque is empty */</span>
+<span class="dt">int</span>
+dequePop(Deque *d, <span class="dt">int</span> direction)
+{
+ <span class="dt">int</span> retval;
+
+ <span class="kw">if</span>(dequeIsEmpty(d)) {
+ <span class="kw">return</span> DEQUE_EMPTY;
+ }
+
+ <span class="co">/* else */</span>
+ <span class="kw">if</span>(direction == DEQUE_FRONT) {
+ <span class="co">/* base goes up by one, length goes down by one */</span>
+ retval = d-&gt;contents[d-&gt;base];
+
+ d-&gt;base = (d-&gt;base<span class="dv">+1</span>) % d-&gt;size;
+ d-&gt;length--;
+
+ <span class="kw">return</span> retval;
+ } <span class="kw">else</span> {
+ <span class="co">/* length goes down by one */</span>
+ <span class="kw">return</span> d-&gt;contents[(d-&gt;base + --d-&gt;length) % d-&gt;size];
+ }
+}
+
+<span class="dt">int</span>
+dequeIsEmpty(<span class="dt">const</span> Deque *d)
+{
+ <span class="kw">return</span> d-&gt;length == <span class="dv">0</span>;
+}
+
+<span class="dt">void</span>
+dequeDestroy(Deque *d)
+{
+ free(d-&gt;contents);
+ free(d);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/linkedLists/deque/ringBuffer.c" class="uri">examples/linkedLists/deque/ringBuffer.c</a>
+</div>
+<p>Here is a <code>Makefile</code> that compiles <code>testDeque.c</code> against both the linked list and the ring buffer implementations. You can do <code>make time</code> to race them against each other.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode makefile"><code class="sourceCode makefile"><span class="dt">CC</span><span class="ch">=</span><span class="st">gcc</span>
+<span class="dt">CFLAGS=-std</span><span class="ch">=</span><span class="st">c99 -Wall -pedantic -O3 -g3</span>
+
+<span class="co"># how many iterations for test</span>
+<span class="dt">ITERATIONS</span><span class="ch">=</span><span class="st">10000000</span>
+<span class="dt">VALGRIND_ITERATIONS</span><span class="ch">=</span><span class="st">100</span>
+
+<span class="dv">all:</span><span class="dt"> testDeque testRingBuffer</span>
+
+<span class="dv">test:</span><span class="dt"> all</span>
+ ./testDeque <span class="ch">$(</span><span class="dt">ITERATIONS</span><span class="ch">)</span>
+ valgrind -q --leak-check=yes ./testDeque <span class="ch">$(</span><span class="dt">VALGRIND_ITERATIONS</span><span class="ch">)</span>
+ ./testRingBuffer <span class="ch">$(</span><span class="dt">ITERATIONS</span><span class="ch">)</span>
+ valgrind -q --leak-check=yes ./testRingBuffer <span class="ch">$(</span><span class="dt">VALGRIND_ITERATIONS</span><span class="ch">)</span>
+
+<span class="dv">time:</span><span class="dt"> all</span>
+ time ./testDeque <span class="ch">$(</span><span class="dt">ITERATIONS</span><span class="ch">)</span>
+ time ./testRingBuffer <span class="ch">$(</span><span class="dt">ITERATIONS</span><span class="ch">)</span>
+
+<span class="dv">testDeque:</span><span class="dt"> testDeque.o deque.o</span>
+ <span class="ch">$(</span><span class="dt">CC</span><span class="ch">)</span> <span class="ch">$(</span><span class="dt">CFLAGS</span><span class="ch">)</span> -o <span class="ch">$@</span> <span class="ch">$^</span>
+
+<span class="dv">testRingBuffer:</span><span class="dt"> testDeque.o ringBuffer.o</span>
+ <span class="ch">$(</span><span class="dt">CC</span><span class="ch">)</span> <span class="ch">$(</span><span class="dt">CFLAGS</span><span class="ch">)</span> -o <span class="ch">$@</span> <span class="ch">$^</span>
+
+
+<span class="dv">clean:</span>
+ <span class="ch">$(</span><span class="dt">RM</span><span class="ch">)</span> testDeque testRingBuffer *.o</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/linkedLists/deque/Makefile" class="uri">examples/linkedLists/deque/Makefile</a>
+</div>
+<h3 id="Circular_linked_lists"><span class="header-section-number">5.2.6</span> Circular linked lists</h3>
+<p>For some applications, there is no obvious starting or ending point
+to a list, and a circular list (where the last element points back to
+the first) may be appropriate. Circular doubly-linked lists can also be
+used to build deques; a single pointer into the list tracks the head of
+the deque, with some convention adopted for whether the head is an
+actual element of the list (at the front, say, with its left neighbor at
+ the back) or a dummy element that is not considered to be part of the
+list.</p>
+<p>The selling point of circular doubly-linked lists as a concrete data
+structure is that insertions and deletions can be done anywhere in the
+list with only local information. For example, here are some routines
+for manipulating a doubly-linked list directly. We'll make our lives
+easy and assume (for the moment) that the list has no actual contents to
+ keep track of.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="co">/* directions for doubly-linked list next pointers */</span>
+<span class="ot">#define RIGHT (0)</span>
+<span class="ot">#define LEFT (1)</span>
+
+<span class="kw">struct</span> elt {
+ <span class="kw">struct</span> elt *next[<span class="dv">2</span>];
+};
+
+<span class="kw">typedef</span> <span class="kw">struct</span> elt *Elt;
+
+<span class="co">/* create a new circular doubly-linked list with 1 element */</span>
+<span class="co">/* returns 0 on allocation error */</span>
+Elt
+listCreate(<span class="dt">void</span>)
+{
+ Elt e;
+
+ e = malloc(<span class="kw">sizeof</span>(*e));
+ <span class="kw">if</span>(e) {
+ e-&gt;next[LEFT] = e-&gt;next[RIGHT] = e;
+ }
+
+ <span class="kw">return</span> e;
+}
+
+<span class="co">/* remove an element from a list */</span>
+<span class="co">/* Make sure you keep a pointer to some other element! */</span>
+<span class="co">/* does not free the removed element */</span>
+<span class="dt">void</span>
+listRemove(Elt e)
+{
+ <span class="co">/* splice e out */</span>
+ e-&gt;next[RIGHT]-&gt;next[LEFT] = e-&gt;next[LEFT];
+ e-&gt;next[LEFT]-&gt;next[RIGHT] = e-&gt;next[RIGHT];
+}
+
+<span class="co">/* insert an element e into list in direction dir from head */</span>
+<span class="dt">void</span>
+listInsert(Elt head, <span class="dt">int</span> dir, Elt e)
+{
+ <span class="co">/* fill in e's new neighbors */</span>
+ e-&gt;next[dir] = head-&gt;next[dir];
+ e-&gt;next[!dir] = head;
+
+ <span class="co">/* make neigbhors point back at e */</span>
+ e-&gt;next[dir]-&gt;next[!dir] = e;
+ e-&gt;next[!dir]-&gt;next[dir] = e;
+}
+
+<span class="co">/* split a list, removing all elements between e1 and e2 */</span>
+<span class="co">/* e1 is the leftmost node of the removed subsequence, e2 rightmost */</span>
+<span class="co">/* the removed elements are formed into their own linked list */</span>
+<span class="co">/* comment: listRemove could be implemented as listSplit(e,e) */</span>
+<span class="dt">void</span>
+listSplit(Elt e1, Elt e2)
+{
+ <span class="co">/* splice out the new list */</span>
+ e2-&gt;next[RIGHT]-&gt;next[LEFT] = e1-&gt;next[LEFT];
+ e1-&gt;next[LEFT]-&gt;next[RIGHT] = e2-&gt;next[RIGHT];
+
+ <span class="co">/* fix up the ends */</span>
+ e2-&gt;next[RIGHT] = e1;
+ e1-&gt;next[LEFT] = e2;
+}
+
+<span class="co">/* splice a list starting at e2 after e1 */</span>
+<span class="co">/* e2 becomes e1's right neighbor */</span>
+<span class="co">/* e2's left neighbor becomes left neighbor of e1's old right neighbor */</span>
+<span class="dt">void</span>
+listSplice(Elt e1, Elt e2)
+{
+ <span class="co">/* fix up tail end */</span>
+ e2-&gt;next[LEFT]-&gt;next[RIGHT] = e1-&gt;next[RIGHT];
+ e1-&gt;next[RIGHT]-&gt;next[LEFT] = e2-&gt;next[LEFT];
+
+ <span class="co">/* fix up e1 and e2 */</span>
+ e1-&gt;next[RIGHT] = e2;
+ e2-&gt;next[LEFT] = e1;
+}
+
+<span class="co">/* free all elements of the list containing e */</span>
+<span class="dt">void</span>
+listDestroy(Elt e)
+{
+ Elt target;
+ Elt next;
+
+ <span class="co">/* we'll free elements until we get back to e, then free e */</span>
+ <span class="co">/* note use of pointer address comparison to detect end of loop */</span>
+ <span class="kw">for</span>(target = e-&gt;next[RIGHT]; target != e; target = next) {
+ next = target-&gt;next[RIGHT];
+ free(target);
+ }
+
+ free(e);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/linkedLists/circular.c" class="uri">examples/linkedLists/circular.c</a>
+</div>
+<p>The above code might or might not actually work. What if it doesn't?
+It may make sense to include some sanity-checking code that we can run
+to see if our pointers are all going to the right place:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* assert many things about correctness of the list */</span>
+<span class="co">/* Amazingly, this is guaranteed to abort or return no matter</span>
+<span class="co"> how badly screwed up the list is. */</span>
+<span class="dt">void</span>
+listSanityCheck(Elt e)
+{
+ Elt check;
+
+ assert(e != <span class="dv">0</span>);
+
+ check = e;
+
+ <span class="kw">do</span> {
+
+ <span class="co">/* are our pointers consistent with our neighbors? */</span>
+ assert(check-&gt;next[RIGHT]-&gt;next[LEFT] == check);
+ assert(check-&gt;next[LEFT]-&gt;next[RIGHT] == check);
+
+ <span class="co">/* on to the next */</span>
+ check = check-&gt;next[RIGHT];
+
+ } <span class="kw">while</span>(check != e);
+}</code></pre></div>
+<p>What if we want to store something in this list? The simplest approach is to extend the definition of <code class="backtick">struct&nbsp;elt</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> elt {
+ <span class="kw">struct</span> elt *next[<span class="dv">2</span>];
+ <span class="dt">char</span> *name;
+ <span class="dt">int</span> socialSecurityNumber;
+ <span class="dt">int</span> gullibility;
+};</code></pre></div>
+<p>But then we can only use the code for one particular type of data. An alternative approach is to define a new <code class="backtick">Elt</code>-plus struct:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> fancyElt {
+ <span class="kw">struct</span> elt *next[<span class="dv">2</span>];
+ <span class="dt">char</span> *name;
+ <span class="dt">int</span> socialSecurityNumber;
+ <span class="dt">int</span> gullibility;
+};</code></pre></div>
+<p>and then use pointer casts to convert the fancy structs into <code class="backtick">Elt</code>s:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="kw">struct</span> fancyElt *e;
+
+ e = malloc(<span class="kw">sizeof</span>(*e));
+
+ <span class="co">/* fill in fields on e */</span>
+
+ listInsert(someList, (Elt) e);</code></pre></div>
+<p>The trick here is that as long as the initial part of the <code class="backtick">struct&nbsp;fancyElt</code> looks like a <code class="backtick">struct&nbsp;elt</code>, any code that expects a <code class="backtick">struct&nbsp;elt</code> will happily work with it and ignore the fields that happen to be sitting later in memory. (This trick is how <a href="#cplusplus">C++</a> inheritance works.)</p>
+<p>The downside is that if something needs to be done with the other fields (e.g freeing <code class="backtick">e-&gt;name</code> if <code class="backtick">e</code> is freed), then the <code class="backtick">Elt</code> functions won't know to do this. So if you use this trick you should be careful.</p>
+<p>A similar technique using <code class="backtick">void&nbsp;*</code> pointers can be used to implement <a href="#genericContainers">generic containers</a>.</p>
+<h3 id="What_linked_lists_are_and_are_not_good_for"><span class="header-section-number">5.2.7</span> What linked lists are and are not good for</h3>
+<p>Linked lists are good for any task that involves inserting or
+deleting elements next to an element you already have a pointer to; such
+ operations can usually be done in <span class="math inline"><em>O</em>(1)</span>
+ time. They generally beat arrays (even resizeable arrays) if you need
+to insert or delete in the middle of a list, since an array has to copy
+any elements above the insertion point to make room; if inserts or
+deletes always happen at the end, an array may be better.</p>
+<p>Linked lists are not good for any operation that requires random
+access, since reaching an arbitrary element of a linked list takes as
+much as <span class="math inline"><em>O</em>(<em>n</em>)</span> time.
+For such applications, arrays are better if you don't need to insert in
+the middle; if you do, you should use some sort of <a href="#binaryTrees">tree</a>.</p>
+<h3 id="Further_reading"><span class="header-section-number">5.2.8</span> Further reading</h3>
+<p>A description of many different kinds of linked lists with pictures can be found in <a href="http://en.wikipedia.org/wiki/Linked_list">the WikiPedia article on the subject</a>.</p>
+<p>Animated versions can be found at <a href="http://www.cs.usfca.edu/%7Egalles/visualization/Algorithms.html" class="uri">http://www.cs.usfca.edu/~galles/visualization/Algorithms.html</a>.</p>
+<h2 id="abstractDataTypes"><span class="header-section-number">5.3</span> Abstract data types</h2>
+<p>One of the hard parts about computer programming is that, in general, <em>programs are bigger than brains</em>.
+ Unless you have an unusally capacious brain, it is unlikely that you
+will be able to understand even a modestly large program in its
+entirety. So in order to be able to write and debug large programs, it
+is important to be able to break it up into pieces, where each piece can
+ be treated as a tool whose use and description is simpler (and therefor
+ fits in your brain better) than its actual code. Then you can forget
+about what is happening inside that piece, and just treat it as an
+easily-understood black box from the outside.</p>
+<p>This process of wrapping functionality up in a box and forgetting about its internals is called <strong>abstraction</strong>,
+ and it is the single most important concept in computer science. In
+these notes we will describe a particular kind of abstraction, the
+construction of <strong>abstract data types</strong> or ADTs. Abstract
+data types are data types whose implementation is not visible to their
+user; from the outside, all the user knows about an ADT is what
+operations can be performed on it and what those operations are supposed
+ to do.</p>
+<p>ADTs have an outside and an inside. The outside is called the <strong>interface</strong>; it consists of the minimal set of type and function declarations needed to use the ADT. The inside is called the <strong>implementation</strong>; it consists of type and function definitions, and sometime auxiliary data or helper functions, that are <em>not</em> visible to users of the ADT. This separation between interface and implementation is called the <strong>abstraction barrier</strong>, and allows the implementation to change without affecting the rest of the program.</p>
+<p>What joins the implementation to the interface is an <strong>abstraction function</strong>.
+ This is a function (in the mathematical sense) that takes any state of
+the implementation and trims off any irrelevant details to leave behind
+an idealized pictures of what the data type is doing. For example, a
+linked list implementation translates to a sequence abstract data type
+by forgetting about the pointers used to hook up the elements and just
+keeping the sequence of elements themselves. To exclude bad states of
+the implementation (for example, a singly-linked list that loops back on
+ itself instead of having a terminating null pointer), we may have a <strong>representation invariant</strong>,
+ which is just some property of the implementation that is always true.
+Representation invariants are also useful for detecting when we've
+bungled our implementation, and a good debugging strategy for
+misbehaving abstract data type implementations is often to look for the
+first point at which they violated some property that we thought was an
+invariant.</p>
+<p>Some programming language include very strong mechanisms for
+enforcing abstraction barriers. C relies somewhat more on politeness,
+and as a programmer you violate an abstraction barrier (by using details
+ of an implementation that are supposed to be hidden) at your peril. In
+C, the interface will typically consist of function and type
+declarations contained in a header file, with implementation made up of
+the corresponding function definitions (and possibly a few extra <code>static</code> functions) in one or more <code>.c</code> files. The <a href="#opaqueStructs">opaque struct</a> technique can be used to hide implementation details of the type.</p>
+<h3 id="abstractDataTypeExample"><span class="header-section-number">5.3.1</span> A sequence type</h3>
+<p>Too much abstraction at once can be hard to take, so let's look at a
+concrete example of an abstract data type. This ADT will represent an
+infinite sequence of <code>int</code>s. Each <strong>instance</strong> of the <code>Sequence</code> type supports a single operation <code>seq_next</code> that returns the next <code>int</code> in the sequence. We will also need to provide one or more <strong>constructor</strong> functions to generate new <code>Sequence</code>s, and a <strong>destructor</strong> function to tear them down.</p>
+<p>Here is an example of a typical use of a <code>Sequence</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+seq_print(Sequence s, <span class="dt">int</span> limit)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = seq_next(s); i &lt; limit; i = seq_next(s)) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+}</code></pre></div>
+<p>Note that <code>seq_print</code> doesn't need to know anything at all about what a <code>Sequence</code> is or how <code>seq_next</code>
+ works in order to print out all the values in the sequence until it
+hits one greater than or equal to limit. This is a good thing--- it
+means that we can use with any implementation of <code>Sequence</code> we like, and we don't have to change it if <code>Sequence</code> or <code>seq_next</code> changes.</p>
+<h4 id="Interface"><span class="header-section-number">5.3.1.1</span> Interface</h4>
+<p>In C, the interface of an abstract data type will usually be declared
+ in a header file, which is included both in the file that implements
+the ADT (so that the compiler can check that the declarations match up
+with the actual definitions in the implementation. Here's a header file
+for sequences:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* opaque struct: hides actual components of struct sequence,</span>
+<span class="co"> * which are defined in sequence.c */</span>
+<span class="kw">typedef</span> <span class="kw">struct</span> sequence *Sequence;
+
+<span class="co">/* constructors */</span>
+<span class="co">/* all our constructors return a null pointer on allocation failure */</span>
+
+<span class="co">/* returns a Sequence representing init, init+1, init+2, ... */</span>
+Sequence seq_create(<span class="dt">int</span> init);
+
+<span class="co">/* returns a Sequence representing init, init+step, init+2*step, ... */</span>
+Sequence seq_create_step(<span class="dt">int</span> init, <span class="dt">int</span> step);
+
+<span class="co">/* destructor */</span>
+<span class="co">/* destroys a Sequence, recovering all interally-allocated data */</span>
+<span class="dt">void</span> seq_destroy(Sequence);
+
+<span class="co">/* accessor */</span>
+<span class="co">/* returns the first element in a sequence not previously returned */</span>
+<span class="dt">int</span> seq_next(Sequence);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/ADT/sequence/sequence.h" class="uri">examples/ADT/sequence/sequence.h</a>
+</div>
+<p>Here we have defined two different constructors for <code>Sequence</code>s,
+ one of which gives slightly more control over the sequence than the
+other. If we were willing to put more work into the implementation, we
+could imagine building a very complicated <code>Sequence</code> type
+that supported a much wider variety of sequences (for example, sequences
+ generated by functions or sequences read from files); but we'll try to
+keep things simple for now. We can always add more functionality later,
+since the users won't notice if the <code>Sequence</code> type changes internally.</p>
+<h4 id="adtImplementation"><span class="header-section-number">5.3.1.2</span> Implementation</h4>
+<p>The implementation of an ADT in C is typically contained in one (or sometimes more than one) <code>.c</code> file. This file can be compiled and linked into any program that needs to use the ADT. Here is our implementation of <code>Sequence</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#include "sequence.h"</span>
+
+<span class="kw">struct</span> sequence {
+ <span class="dt">int</span> next; <span class="co">/* next value to return */</span>
+ <span class="dt">int</span> step; <span class="co">/* how much to increment next by */</span>
+};
+
+Sequence
+seq_create(<span class="dt">int</span> init)
+{
+ <span class="kw">return</span> seq_create_step(init, <span class="dv">1</span>);
+}
+
+Sequence
+seq_create_step(<span class="dt">int</span> init, <span class="dt">int</span> step)
+{
+ Sequence s;
+
+ s = malloc(<span class="kw">sizeof</span>(*s));
+ <span class="kw">if</span>(s == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ s-&gt;next = init;
+ s-&gt;step = step;
+ <span class="kw">return</span> s;
+}
+
+<span class="dt">void</span>
+seq_destroy(Sequence s)
+{
+ free(s);
+}
+
+<span class="dt">int</span>
+seq_next(Sequence s)
+{
+ <span class="dt">int</span> ret; <span class="co">/* saves the old value before we increment it */</span>
+
+ ret = s-&gt;next;
+ s-&gt;next += s-&gt;step;
+
+ <span class="kw">return</span> ret;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/ADT/sequence/sequence.c" class="uri">examples/ADT/sequence/sequence.c</a>
+</div>
+<p>Things to note here: the definition of <code>struct&nbsp;sequence</code> appears only in this file; this means that only the functions defined here can (easily) access the <code>next</code> and <code>step</code> components. This protects <code>Sequence</code>s
+ to a limited extent from outside interference, and defends against
+users who might try to "violate the abstraction boundary" by examining
+the components of a <code>Sequence</code> directly. It also means that if we change the components or meaning of the components in <code>struct&nbsp;sequence</code>, we only have to fix the functions defined in <code>sequence.c</code>.</p>
+<h4 id="Compiling_and_linking"><span class="header-section-number">5.3.1.3</span> Compiling and linking</h4>
+<p>Now that we have <code>sequence.h</code> and <code>sequence.c</code>, how do we use them? Let's suppose we have a simple main program:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="ot">#include "sequence.h"</span>
+
+
+<span class="dt">void</span>
+seq_print(Sequence s, <span class="dt">int</span> limit)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = seq_next(s); i &lt; limit; i = seq_next(s)) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+}
+
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ Sequence s;
+ Sequence s2;
+
+ puts(<span class="st">"Stepping by 1:"</span>);
+
+ s = seq_create(<span class="dv">0</span>);
+ seq_print(s, <span class="dv">5</span>);
+ seq_destroy(s);
+
+ puts(<span class="st">"Now stepping by 3:"</span>);
+
+ s2 = seq_create_step(<span class="dv">1</span>, <span class="dv">3</span>);
+ seq_print(s2, <span class="dv">20</span>);
+ seq_destroy(s2);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/ADT/sequence/main.c" class="uri">examples/ADT/sequence/main.c</a>
+</div>
+<p>We can compile <code>main.c</code> and <code>sequence.c</code> together into a single binary with the command <code>c99&nbsp;main.c&nbsp;sequence.c</code>. Or we can build a <code>Makefile</code> which will compile the two files separately and then link them. Using <code>make</code> may be more efficient, especially for large programs consisting of many components, since if we make any changes <code>make</code> will only recompile those files we have changed. So here is our <code>Makefile</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode makefile"><code class="sourceCode makefile"><span class="dt">CC</span><span class="ch">=</span><span class="st">c99</span>
+<span class="dt">CFLAGS</span><span class="ch">=</span><span class="st">-g3 -pedantic -Wall</span>
+
+<span class="dv">all:</span><span class="dt"> seqprinter</span>
+
+<span class="dv">seqprinter:</span><span class="dt"> main.o sequence.o</span>
+ <span class="ch">$(</span><span class="dt">CC</span><span class="ch">)</span> <span class="ch">$(</span><span class="dt">CFLAGS</span><span class="ch">)</span> -o <span class="ch">$@</span> <span class="ch">$^</span>
+
+<span class="dv">test:</span><span class="dt"> seqprinter</span>
+ ./seqprinter
+
+<span class="co"># these rules say to rebuild main.o and sequence.o if sequence.h changes</span>
+<span class="dv">main.o:</span><span class="dt"> main.c sequence.h</span>
+<span class="dv">sequence.o:</span><span class="dt"> sequence.c sequence.h</span>
+
+<span class="dv">clean:</span>
+ <span class="ch">$(</span><span class="dt">RM</span><span class="ch">)</span> -f seqprinter *.o</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/ADT/sequence/Makefile" class="uri">examples/ADT/sequence/Makefile</a>
+</div>
+<p>And now running <code>make&nbsp;test</code> produces this output. Notice how the built-in <code>make</code> variables <code>$@</code> and <code>$^</code> expand out to the left-hand side and right-hand side of the dependency line for building <code>seqprinter</code>.</p>
+<pre><code>$ make test
+c99 -g3 -pedantic -Wall -c -o main.o main.c
+c99 -g3 -pedantic -Wall -c -o sequence.o sequence.c
+c99 -g3 -pedantic -Wall -o seqprinter main.o sequence.o
+./seqprinter
+Stepping by 1:
+0
+1
+2
+3
+4
+Now stepping by 3:
+1
+4
+7
+10
+13
+16
+19</code></pre>
+<h3 id="Designing_abstract_data_types"><span class="header-section-number">5.3.2</span> Designing abstract data types</h3>
+<p>Now we've seen how to implement an abstract data type. How do we
+choose when to use when, and what operations to give it? Let's try
+answering the second question first.</p>
+<h4 id="Parnas.27s_Principle"><span class="header-section-number">5.3.2.1</span> Parnas's Principle</h4>
+<p>Parnas's Principle is a statement of the fundamental idea of <em>information hiding</em>, which says that abstraction boundaries should be as narrow as possible:</p>
+<ul>
+<li>The developer of a software component must provide the intended user
+ with all the information needed to make effective use of the services
+provided by the component, and should provide no other information.</li>
+<li>The developer of a software component must be provided with all the
+information necessary to carry out the given responsibilities assigned
+to the component, and should be provided with no other information.</li>
+</ul>
+<p>(David Parnas, "On the Criteria to Be Used in Decomposing Systems into Modules," <em>Communications of the ACM</em>, 15(12): 1059--1062, 1972.)</p>
+<p>For ADTs, this means we should provide as few functions for accessing and modifying the ADT as we can get away with. The <code>Sequence</code> type we defined early has a particularly narrow interface; the developer of <code>Sequence</code> (whoever is writing <code>sequence.c</code>) needs to know nothing about what its user wants except for the arguments passed in to <code>seq_create</code> or <code>seq_create_step</code>, and the user only needs to be able to call <code>seq_next</code>.
+ More complicated ADTs might provide larger sets of operations, but in
+general we know that an ADT provides a successful abstraction when the
+operations are all "natural" ones given our high-level description. If
+we find ourselves writing a lot of extra operations to let users tinker
+with the guts of our implementation, that may be a sign that either we
+aren't taking our abstraction barrier seriously enough, or that we need
+to put the abstraction barrier in a different place.</p>
+<h4 id="When_to_build_an_abstract_data_type"><span class="header-section-number">5.3.2.2</span> When to build an abstract data type</h4>
+<p>The short answer: Whenever you can.</p>
+<p>A better answer: The best heuristic I know for deciding what ADTs to
+include in a program is to write down a description of how your program
+is going to work. For each noun or noun phrase in the description,
+either identify a built-in data type to implement it or design an
+abstract data type.</p>
+<p>For example: a grade database maintains a list of students, and for
+each student it keeps a list of grades. So here we might want data types
+ to represent:</p>
+<ul>
+<li>A list of students,</li>
+<li>A student,</li>
+<li>A list of grades,</li>
+<li>A grade.</li>
+</ul>
+<p>If grades are simple, we might be able to make them just be <code>int</code>s (or maybe <code>double</code>s); to be on the safe side, we should probably create a <code>Grade</code> type with a <code>typedef</code>.
+ The other types are likely to be more complicated. Each student might
+have in addition to his or her grades a long list of other attributes,
+such as a name, an email address, etc. By wrapping students up as
+abstract data types we can extend these attributes if we need to, or
+allow for very general implementations (say, by allowing a student to
+have an arbitrary list of keyword-attribute pairs). The two kinds of
+lists are likely to be examples of <em>sequence</em> types; we'll be
+seeing a lot of ways to implement these as the course progresses. If we
+want to perform the same kinds of operations on both lists, we might
+want to try to implement them as a single list data type, which then is
+specialized to hold either students or grades; this is not always easy
+to do in C, but we'll see examples of how to do this, too.</p>
+<p>Whether or not this set of four types is the set we will finally use,
+ writing it down gives us a place to start writing our program. We can
+start writing interface files for each of the data types, and then
+evolve their implementations and the main program in parallel, adjusting
+ the interfaces as we find that we have provided too little (or too
+much) data for each component to do what it must.</p>
+<h2 id="hashTables"><span class="header-section-number">5.4</span> Hash tables</h2>
+<p>A <strong>hash table</strong> is a randomized data structure that supports the INSERT, DELETE, and FIND operations in expected <span class="math inline"><em>O</em>(1)</span> time. The core idea behind hash tables is to use a <em>hash function</em>
+ that maps a large keyspace to a smaller domain of array indices, and
+then use constant-time array operations to store and retrieve the data.</p>
+<h3 id="dictionaries"><span class="header-section-number">5.4.1</span> Dictionary data types</h3>
+<p>A hash table is typically used to implement a <strong>dictionary data type</strong>, where keys are mapped to values, but unlike an array, the keys are not conveniently arranged as integers <span class="math inline">0, 1, 2, …</span>. Dictionary data types are a fundamental data structure often found in <a href="http://en.wikipedia.org/wiki/Scripting_language" title="WikiPedia">scripting languages</a> like <a href="http://en.wikipedia.org/wiki/AWK" title="WikiPedia">AWK</a>, <a href="http://en.wikipedia.org/wiki/Perl" title="WikiPedia">Perl</a>, <a href="http://en.wikipedia.org/wiki/Python" title="WikiPedia">Python</a>, <a href="http://en.wikipedia.org/wiki/PHP" title="WikiPedia">PHP</a>, <a href="http://en.wikipedia.org/wiki/Lua" title="WikiPedia">Lua</a>, or <a href="http://en.wikipedia.org/wiki/Ruby" title="WikiPedia">Ruby</a>. For example, here is some Python code that demonstrates use of a dictionary accessed using an array-like syntax:</p>
+<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python">title <span class="op">=</span> {} <span class="co"># empty dictionary</span>
+title[<span class="st">"Barack"</span>] <span class="op">=</span> <span class="st">"President"</span>
+user <span class="op">=</span> <span class="st">"Barack"</span>
+<span class="bu">print</span>(<span class="st">"Welcome"</span> <span class="op">+</span> title[user] <span class="op">+</span> <span class="st">" "</span> <span class="op">+</span> user)</code></pre></div>
+<p>In C, we don't have the convenience of reusing <code class="backtick">[]</code> for dictionary lookups (we'd need <a href="#cplusplus">C++</a>
+ for that), but we can still get the same effect with more typing using
+functions. For example, using an abstract dictionary in C might look
+like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c">Dict *title;
+<span class="dt">const</span> <span class="dt">char</span> *user;
+
+title = dictCreate();
+dictSet(title, <span class="st">"Barack"</span>, <span class="st">"President"</span>);
+user = <span class="st">"Barack"</span>;
+printf(<span class="st">"Welcome %s %s</span><span class="ch">\n</span><span class="st">"</span>, dictGet(title, user), user);</code></pre></div>
+<p>As with other abstract data types, the idea is that the user of the
+dictionary type doesn't need to know how it is implemented. For example,
+ we could implement the dictionary as an array of <code>struct</code>s that we search through, but that would be expensive: <span class="math inline"><em>O</em>(<em>n</em>)</span> time to find a key in the worst case.</p>
+<p>Closely related to a dictionary is a <strong>set</strong>, which has
+keys but no values. It's usually pretty straightforward to turn an
+implementation of a dictionary into a set (leave out the values) or vice
+ versa (add values to the end of keys but don't use them in searching).</p>
+<h3 id="Basics_of_hashing"><span class="header-section-number">5.4.2</span> Basics of hashing</h3>
+<p>If our keys were conveniently named <span class="math inline">0, 1, 2, …, <em>n</em> − 1</span>,
+ we could simply use an array, and be able to find a record given a key
+in constant time. Unfortunately, naming conventions for most objects are
+ not so convenient, and even enumerations like Social Security numbers
+are likely to span a larger range than we want to allocate. But we would
+ like to get the constant-time performance of an array anyway.</p>
+<p>The solution is to feed the keys through some hash function <span class="math inline"><em>H</em></span>, which maps them down to array indices. So in a database of people, to find "Smith, Wayland", we would first compute <span class="math inline"><em>H</em></span>("Smith, Wayland")$ = 137$ (say), and then look in position <span class="math inline">137</span> in the array. Because we are always using the same function <span class="math inline"><em>H</em></span>, we will always be directed to the same position <span class="math inline">137</span>.</p>
+<h3 id="Resolving_collisions"><span class="header-section-number">5.4.3</span> Resolving collisions</h3>
+<p>But what if <span class="math inline"><em>H</em></span>("Smith, Wayland") and <span class="math inline"><em>H</em></span>("Hephaestos") both equal 137? Now we have a <strong>collision</strong>,
+ and we have to resolve it by finding some way to either (a) effectively
+ store both records in a single array location, or (b) move one of the
+records to a new location that we can still find later. Let's consider
+these two approaches separately.</p>
+<h4 id="Chaining"><span class="header-section-number">5.4.3.1</span> Chaining</h4>
+<p>We can't really store more than one record in an array location, but
+we can fake it by making each array location be a pointer to a linked
+list. Every time we insert a new element in a particular location, we
+simply add it to this list.</p>
+<p>Since the cost of scanning a linked list is linear in its size, this
+means that the worst-case cost of searching for a particular key will be
+ linear in the number of keys in the table that hash to the same
+location. Under the assumption that the hash function is a random
+function (which does not mean that it returns random values every time
+you call it but instead means that we picked one of the many possible
+hash functions uniformly at random), on average we get <span class="math inline"><em>n</em>/<em>m</em></span> elements in each list.<br>
+So on average a failed search takes <span class="math inline"><em>O</em>(<em>n</em>/<em>m</em>)</span> time.</p>
+<p>This quantity <span class="math inline"><em>n</em>/<em>m</em></span> is called the <strong>load factor</strong> of the hash table and is often written as <span class="math inline"><em>α</em></span>.
+ If we want our hash table to be efficient, we will need to keep this
+load factor down. If we can guarantee that it's a constant, then we get
+constant-time searches.</p>
+<h4 id="Open_addressing"><span class="header-section-number">5.4.3.2</span> Open addressing</h4>
+<p>With <em>open addressing</em>, we store only one element per
+location, and handle collisions by storing the extra elements in other
+unused locations in the array. To find these other locations, we fix
+some <em>probe sequence</em> that tells us where to look if <span class="math inline"><em>A</em>[<em>H</em>(<em>x</em>)]</span> contains an element that is not <span class="math inline"><em>x</em></span>. A typical probe sequence (called <em>linear probing</em>) is just <span class="math inline"><em>H</em>(<em>x</em>),<em>H</em>(<em>x</em>)+1, <em>H</em>(<em>x</em>)+2, …, </span>
+ wrapping around at the end of the array. The idea is that if we can't
+put an element in a particular place, we just keep walking up through
+the array until we find an empty slot. As long as we follow the same
+probe sequence when looking for an element, we will be able to find the
+element again. If we are looking for an element and reach an empty
+location, then we know that the element is not present in the table.</p>
+<p>For open addressing, we always have that <span class="math inline"><em>α</em> = <em>n</em>/<em>m</em></span>
+ is less than or equal to 1, since we can't store more elements in the
+table than we have locations. In fact, we must ensure that the load
+factor is strictly less than 1, or some searches will never terminate
+because they never reach an empty location. Assuming <span class="math inline"><em>α</em> &lt; 1</span>
+ and that the hash function is uniform, it is possible to calculate the
+worst-case expected cost of a FIND operation, which as before will occur
+ when we have an unsuccessful FIND. Though we won't do this calculation
+here, the result is bounded by <span class="math inline">1/(1 − <em>n</em>/<em>m</em>)</span>, which gets pretty bad if <span class="math inline"><em>n</em>/<em>m</em></span> is very close to <span class="math inline">1</span>, but is a constant as long as <span class="math inline"><em>n</em>/<em>m</em></span> is bounded by a constant (say <span class="math inline">3/4</span>, which makes the expected number of probes at most <span class="math inline">4</span>).</p>
+<h3 id="Choosing_a_hash_function"><span class="header-section-number">5.4.4</span> Choosing a hash function</h3>
+<p>Here we will describe three methods for generating hash functions.
+The first two are typical methods used in practice. The last has
+additional desirable theoretical properties.</p>
+<h4 id="Division_method"><span class="header-section-number">5.4.4.1</span> Division method</h4>
+<p>We want our hash function to look as close as it can to a random
+function, but random functions are (provably) expensive to store. So in
+practice we do something simpler and hope for the best. If the keys are
+large integers, a typical approach is to just compute the remainder mod <span class="math inline"><em>m</em></span>. This can cause problems if <span class="math inline"><em>m</em></span>
+ is, say, a power of 2, since it may be that the low-order bits of all
+the keys are similar, which will produce lots of collisions. So in
+practice with this method <span class="math inline"><em>m</em></span> is typically chosen to be a large prime.</p>
+<p>What if we want to hash strings instead of integers? The trick is to treat the strings as integers. Given a string <span class="math inline"><em>a</em><sub>1</sub><em>a</em><sub>2</sub><em>a</em><sub>3</sub>…<em>a</em><sub><em>k</em></sub></span>, we represent it as <span class="math inline">∑<sub><em>i</em></sub><em>a</em><sub><em>i</em></sub><em>b</em><sup><em>i</em></sup></span>, where <span class="math inline"><em>b</em></span>
+ is a base chosen to be larger than the number of characters. We can
+then feed this resulting huge integer to our hash function. Typically we
+ do not actually compute the huge integer directly, but instead compute
+its remainder mod <span class="math inline"><em>m</em></span>, as in this short C function:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* treat strings as base-256 integers */</span>
+<span class="co">/* with digits in the range 1 to 255 */</span>
+<span class="ot">#define BASE (256)</span>
+
+size_t
+hash(<span class="dt">const</span> <span class="dt">char</span> *s, size_t m)
+{
+ size_t h;
+ <span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *us;
+
+ <span class="co">/* cast s to unsigned const char * */</span>
+ <span class="co">/* this ensures that elements of s will be treated as having values &gt;= 0 */</span>
+ us = (<span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *) s;
+
+ h = <span class="dv">0</span>;
+ <span class="kw">while</span>(*us != '\<span class="dv">0</span>') {
+ h = (h * BASE + *us) % m;
+ us++;
+ }
+
+ <span class="kw">return</span> h;
+}</code></pre></div>
+<p>The division method works best when <span class="math inline"><em>m</em></span>
+ is a prime, as otherwise regularities in the keys can produce
+clustering in the hash values. (Consider, for example, what happens if <span class="math inline"><em>m</em></span>
+ is 256). But this can be awkward for computing hash functions quickly,
+because computing remainders is a relatively slow operation.</p>
+<h4 id="Multiplication_method"><span class="header-section-number">5.4.4.2</span> Multiplication method</h4>
+<p>For this reason, the most commonly-used hash functions replace the modulus <span class="math inline"><em>m</em></span> with something like <span class="math inline">2<sup>32</sup></span> and replace the base with some small prime, relying on the multiplier to break up patterns in the input. This yields the <strong>multiplication method</strong>. Typical code might look something like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define MULTIPLIER (37)</span>
+
+size_t
+hash(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ size_t h;
+ <span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *us;
+
+ <span class="co">/* cast s to unsigned const char * */</span>
+ <span class="co">/* this ensures that elements of s will be treated as having values &gt;= 0 */</span>
+ us = (<span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *) s;
+
+ h = <span class="dv">0</span>;
+ <span class="kw">while</span>(*us != '\<span class="dv">0</span>') {
+ h = h * MULTIPLIER + *us;
+ us++;
+ }
+
+ <span class="kw">return</span> h;
+}</code></pre></div>
+<p>The only difference between this code and the division method code is that we've renamed <code class="backtick">BASE</code> to <code class="backtick">MULTIPLIER</code> and dropped <span class="math inline"><em>m</em></span>.
+ There is still some remainder-taking happening: since C truncates the
+result of any operation that exceeds the size of the integer type that
+holds it, the <code class="backtick">h&nbsp;=&nbsp;h&nbsp;*&nbsp;MULTIPLIER&nbsp;+&nbsp;*us;</code> line effectively has a hidden mod <span class="math inline">2<sup>32</sup></span> or <span class="math inline">2<sup>64</sup></span> at the end of it (depending on how big your <code>size_t</code> is). Now we can't use, say, <span class="math inline">256</span>, as the multiplier, because then the hash value <code class="backtick">h</code> would be determined by just the last four characters of <code class="backtick">s</code>.</p>
+<p>The choice of <span class="math inline">37</span> is based on folklore. I like <span class="math inline">97</span> myself, and <span class="math inline">31</span> also has supporters. Almost any medium-sized prime should work.</p>
+<h4 id="Universal_hashing"><span class="header-section-number">5.4.4.3</span> Universal hashing</h4>
+<p>The preceding hash functions offer no guarantees that the adversary can't find a set of <span class="math inline"><em>n</em></span> keys that all hash to the same location; indeed, since they're deterministic, as long as the keyspace contains at least <span class="math inline"><em>n</em><em>m</em></span>
+ keys the adversary can always do so. Universal families of hash
+functions avoid this problem by choosing the hash function randomly,
+from some set of possible functions that is small enough that we can
+write our random choice down.</p>
+<p>The property that makes a family of hash functions <span class="math inline">{<em>H</em><sub><em>r</em></sub>}</span> universal is that, for any distinct keys <span class="math inline"><em>x</em></span> and <span class="math inline"><em>y</em></span>, the probability that <span class="math inline"><em>r</em></span> is chosen so that <span class="math inline"><em>H</em><sub><em>r</em></sub>(<em>x</em>)=<em>H</em><sub><em>r</em></sub>(<em>y</em>)</span> is exactly <span class="math inline">1/<em>m</em></span>.</p>
+<p>Why is this important? Recall that for chaining, the expected number of collisions between an element <span class="math inline"><em>x</em></span> and other elements was just the sum over all particular elements <span class="math inline"><em>y</em></span> of the probability that <span class="math inline"><em>x</em></span> collides with that particular element. If <span class="math inline"><em>H</em><sub><em>r</em></sub></span> is drawn from a universal family, this probability is <span class="math inline">1/<em>m</em></span> for each <span class="math inline"><em>y</em></span>, and we get the same <span class="math inline"><em>n</em>/<em>m</em></span> expected collisions as if <span class="math inline"><em>H</em><sub><em>r</em></sub></span> were completely random.</p>
+<p>Several universal families of hash functions are known. Here is a
+simple one that works when the size of the keyspace and the size of the
+output space are both powers of <span class="math inline">2</span>. Let the keyspace consist of <span class="math inline"><em>n</em></span>-bit strings and let <span class="math inline"><em>m</em> = 2<sup><em>k</em></sup></span>. Then the random index <span class="math inline"><em>r</em></span> consists of <span class="math inline"><em>n</em><em>k</em></span> independent random bits organized as <span class="math inline"><em>n</em></span> <span class="math inline"><em>m</em></span>-bit strings <span class="math inline"><em>a</em><sub>1</sub><em>a</em><sub>2</sub>…<em>a</em><sub><em>n</em></sub></span>. To compute the hash function of a particular input <span class="math inline"><em>x</em></span>, compute the bitwise exclusive or of <span class="math inline"><em>a</em><sub><em>i</em></sub></span> for each position <span class="math inline"><em>i</em></span> where the <span class="math inline"><em>i</em></span>-th bit of <span class="math inline"><em>x</em></span> is <span class="math inline">1</span>.</p>
+<p>We can implement this in C as</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* implements universal hashing using random bit-vectors in x */</span>
+<span class="co">/* assumes number of elements in x is at least BITS_PER_CHAR * MAX_STRING_SIZE */</span>
+
+<span class="ot">#define BITS_PER_CHAR (8) </span><span class="co">/* not true on all machines! */</span>
+<span class="ot">#define MAX_STRING_SIZE (128) </span><span class="co">/* we'll stop hashing after this many */</span>
+<span class="ot">#define MAX_BITS (BITS_PER_CHAR * MAX_STRING_SIZE)</span>
+
+size_t
+hash(<span class="dt">const</span> <span class="dt">char</span> *s, size_t x[])
+{
+ size_t h;
+ <span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *us;
+ <span class="dt">int</span> i;
+ <span class="dt">unsigned</span> <span class="dt">char</span> c;
+ <span class="dt">int</span> shift;
+
+ <span class="co">/* cast s to unsigned const char * */</span>
+ <span class="co">/* this ensures that elements of s will be treated as having values &gt;= 0 */</span>
+ us = (<span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *) s;
+
+ h = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; *us != <span class="dv">0</span> &amp;&amp; i &lt; MAX_BITS; us++) {
+ c = *us;
+ <span class="kw">for</span>(shift = <span class="dv">0</span>; shift &lt; BITS_PER_CHAR; shift++, i++) {
+ <span class="co">/* is low bit of c set? */</span>
+ <span class="kw">if</span>(c &amp; <span class="bn">0x1</span>) {
+ h ^= x[i];
+ }
+
+ <span class="co">/* shift c to get new bit in lowest position */</span>
+ c &gt;&gt;= <span class="dv">1</span>;
+ }
+ }
+
+ <span class="kw">return</span> h;
+}</code></pre></div>
+<p>As you can see, this requires a lot of bit-fiddling. It also fails if we get a lot of strings that are identical for the first <code class="backtick">MAX_STRING_SIZE</code> characters. Conceivably, the latter problem could be dealt with by growing <code class="backtick">x</code> dynamically as needed. But we also haven't addressed the question of where we get these random values from—see the chapter on <a href="#randomization">randomization</a> for some possibilities.</p>
+<p>In practice, universal families of hash functions are seldom used,
+since a reasonable fixed hash function is unlikely to be correlated with
+ any patterns in the actual input. But they are useful for demonstrating
+ provably good performance.</p>
+<h3 id="Maintaining_a_constant_load_factor"><span class="header-section-number">5.4.5</span> Maintaining a constant load factor</h3>
+<p>All of the running time results for hash tables depend on keeping the load factor <span class="math inline"><em>α</em></span>
+ small. But as more elements are inserted into a fixed-size table, the
+load factor grows without bound. The usual solution to this problem is
+rehashing: when the load factor crosses some threshold, we create a new
+hash table of size <span class="math inline">2<em>n</em></span> or thereabouts and migrate all the elements to it.</p>
+<p>This approach raises the worst-case cost of an insertion to <span class="math inline"><em>O</em>(<em>n</em>)</span>. However, we can bring the <em>expected</em> cost down to <span class="math inline"><em>O</em>(1)</span> by rehashing only with probability <span class="math inline"><em>O</em>(1/<em>n</em>)</span> for each insert after the threshold is crossed. Or we can apply <strong>amortized analysis</strong> to argue that the amortized cost (total cost divided by number of operations) is <span class="math inline"><em>O</em>(1)</span>
+ assuming we double the table size on each rehash. Neither the
+expected-cost nor the amortized-cost approaches actually change the
+worst-case cost, but they make it look better by demonstrating that we
+at least don't incur that cost every time.</p>
+<p>With enough machinery, it may be possible to <strong>deamortize</strong>
+ the cost of rehashing by doing a little bit of it with every insertion.
+ The idea is to build the new hash table incrementally, and start moving
+ elements to it once it is fully initialized. This requires keeping
+around two copies of the hash table and searching both, and for most
+purposes is more trouble than it's worth. But a mechanism like this is
+often used for real-time garbage collection, where it's important not to
+ have the garbage collector lock up the entire system while it does its
+work.</p>
+<h3 id="Examples"><span class="header-section-number">5.4.6</span> Examples</h3>
+<h4 id="A_low-overhead_hash_table_using_open_addressing"><span class="header-section-number">5.4.6.1</span> A low-overhead hash table using open addressing</h4>
+<p>Here is a very low-overhead hash table based on open addressing. The
+application is rapidly verifying ID numbers in the range 000000000 to
+999999999 by checking them against a list of known good IDs. Since the
+quantity of valid ID numbers may be very large, a goal of the mechanism
+is to keep the amount of extra storage used as small as possible. This
+implementation uses a tunable overhead parameter. Setting the parameter
+to a high value makes lookups fast but requires more space per ID number
+ in the list. Setting it to a low value can reduce the storage cost
+arbitrarily close to 4 bytes per ID, at the cost of increasing search
+times.</p>
+<p>Here is the header file giving the interface:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="kw">struct</span> idList *IDList;
+
+<span class="ot">#define MIN_ID (0)</span>
+<span class="ot">#define MAX_ID (999999999)</span>
+
+<span class="co">/* build an IDList out of an unsorted array of n good ids */</span>
+<span class="co">/* returns 0 on allocation failure */</span>
+IDList IDListCreate(<span class="dt">int</span> n, <span class="dt">int</span> unsortedIdList[]);
+
+<span class="co">/* destroy an IDList */</span>
+<span class="dt">void</span> IDListDestroy(IDList list);
+
+<span class="co">/* check an id against the list */</span>
+<span class="co">/* returns nonzero if id is in the list */</span>
+<span class="dt">int</span> IDListContains(IDList list, <span class="dt">int</span> id);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/idList/idList.h" class="uri">examples/hashTables/idList/idList.h</a>
+</div>
+<p>And here is the implementation:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "idList.h"</span>
+
+<span class="co">/* overhead parameter that determines both space and search costs */</span>
+<span class="co">/* must be strictly greater than 1 */</span>
+<span class="ot">#define OVERHEAD (1.1)</span>
+<span class="ot">#define NULL_ID (-1)</span>
+
+
+<span class="kw">struct</span> idList {
+ <span class="dt">int</span> size;
+ <span class="dt">int</span> ids[<span class="dv">1</span>]; <span class="co">/* we'll actually malloc more space than this */</span>
+};
+
+IDList
+IDListCreate(<span class="dt">int</span> n, <span class="dt">int</span> unsortedIdList[])
+{
+ IDList list;
+ <span class="dt">int</span> size;
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> probe;
+
+ size = (<span class="dt">int</span>) (n * OVERHEAD + <span class="dv">1</span>);
+
+ list = malloc(<span class="kw">sizeof</span>(*list) + <span class="kw">sizeof</span>(<span class="dt">int</span>) * (size<span class="dv">-1</span>));
+ <span class="kw">if</span>(list == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ <span class="co">/* else */</span>
+ list-&gt;size = size;
+
+ <span class="co">/* clear the hash table */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; size; i++) {
+ list-&gt;ids[i] = NULL_ID;
+ }
+
+ <span class="co">/* load it up */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+
+ assert(unsortedIdList[i] &gt;= MIN_ID);
+ assert(unsortedIdList[i] &lt;= MAX_ID);
+
+ <span class="co">/* hashing with open addressing by division */</span>
+ <span class="co">/* this MUST be the same pattern as in IDListContains */</span>
+ <span class="kw">for</span>(probe = unsortedIdList[i] % list-&gt;size;
+ list-&gt;ids[probe] != NULL_ID;
+ probe = (probe + <span class="dv">1</span>) % list-&gt;size);
+
+ assert(list-&gt;ids[probe] == NULL_ID);
+
+ list-&gt;ids[probe] = unsortedIdList[i];
+ }
+
+ <span class="kw">return</span> list;
+}
+
+<span class="dt">void</span>
+IDListDestroy(IDList list)
+{
+ free(list);
+}
+
+<span class="dt">int</span>
+IDListContains(IDList list, <span class="dt">int</span> id)
+{
+ <span class="dt">int</span> probe;
+
+ <span class="co">/* this MUST be the same pattern as in IDListCreate */</span>
+ <span class="kw">for</span>(probe = id % size;
+ list-&gt;ids[probe] != NULL_ID;
+ probe = (probe + <span class="dv">1</span>) % size) {
+ <span class="kw">if</span>(list-&gt;ids[probe] == id) {
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/idList/idList.c" class="uri">examples/hashTables/idList/idList.c</a>
+</div>
+<h4 id="A_string_to_string_dictionary_using_chaining"><span class="header-section-number">5.4.6.2</span> A string to string dictionary using chaining</h4>
+<p>Here is a more complicated string to string dictionary based on chaining.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="kw">struct</span> dict *Dict;
+
+<span class="co">/* create a new empty dictionary */</span>
+Dict DictCreate(<span class="dt">void</span>);
+
+<span class="co">/* destroy a dictionary */</span>
+<span class="dt">void</span> DictDestroy(Dict);
+
+<span class="co">/* insert a new key-value pair into an existing dictionary */</span>
+<span class="dt">void</span> DictInsert(Dict, <span class="dt">const</span> <span class="dt">char</span> *key, <span class="dt">const</span> <span class="dt">char</span> *value);
+
+<span class="co">/* return the most recently inserted value associated with a key */</span>
+<span class="co">/* or 0 if no matching key is present */</span>
+<span class="dt">const</span> <span class="dt">char</span> *DictSearch(Dict, <span class="dt">const</span> <span class="dt">char</span> *key);
+
+<span class="co">/* delete the most recently inserted record with the given key */</span>
+<span class="co">/* if there is no such record, has no effect */</span>
+<span class="dt">void</span> DictDelete(Dict, <span class="dt">const</span> <span class="dt">char</span> *key);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/dict/dict.h" class="uri">examples/hashTables/dict/dict.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+
+<span class="ot">#include "dict.h"</span>
+
+<span class="kw">struct</span> elt {
+ <span class="kw">struct</span> elt *next;
+ <span class="dt">char</span> *key;
+ <span class="dt">char</span> *value;
+};
+
+<span class="kw">struct</span> dict {
+ <span class="dt">int</span> size; <span class="co">/* size of the pointer table */</span>
+ <span class="dt">int</span> n; <span class="co">/* number of elements stored */</span>
+ <span class="kw">struct</span> elt **table;
+};
+
+<span class="ot">#define INITIAL_SIZE (1024)</span>
+<span class="ot">#define GROWTH_FACTOR (2)</span>
+<span class="ot">#define MAX_LOAD_FACTOR (1)</span>
+
+<span class="co">/* dictionary initialization code used in both DictCreate and grow */</span>
+Dict
+internalDictCreate(<span class="dt">int</span> size)
+{
+ Dict d;
+ <span class="dt">int</span> i;
+
+ d = malloc(<span class="kw">sizeof</span>(*d));
+
+ assert(d != <span class="dv">0</span>);
+
+ d-&gt;size = size;
+ d-&gt;n = <span class="dv">0</span>;
+ d-&gt;table = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> elt *) * d-&gt;size);
+
+ assert(d-&gt;table != <span class="dv">0</span>);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; d-&gt;size; i++) d-&gt;table[i] = <span class="dv">0</span>;
+
+ <span class="kw">return</span> d;
+}
+
+Dict
+DictCreate(<span class="dt">void</span>)
+{
+ <span class="kw">return</span> internalDictCreate(INITIAL_SIZE);
+}
+
+<span class="dt">void</span>
+DictDestroy(Dict d)
+{
+ <span class="dt">int</span> i;
+ <span class="kw">struct</span> elt *e;
+ <span class="kw">struct</span> elt *next;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; d-&gt;size; i++) {
+ <span class="kw">for</span>(e = d-&gt;table[i]; e != <span class="dv">0</span>; e = next) {
+ next = e-&gt;next;
+
+ free(e-&gt;key);
+ free(e-&gt;value);
+ free(e);
+ }
+ }
+
+ free(d-&gt;table);
+ free(d);
+}
+
+<span class="ot">#define MULTIPLIER (97)</span>
+
+<span class="dt">static</span> <span class="dt">unsigned</span> <span class="dt">long</span>
+hash_function(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *us;
+ <span class="dt">unsigned</span> <span class="dt">long</span> h;
+
+ h = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(us = (<span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *) s; *us; us++) {
+ h = h * MULTIPLIER + *us;
+ }
+
+ <span class="kw">return</span> h;
+}
+
+<span class="dt">static</span> <span class="dt">void</span>
+grow(Dict d)
+{
+ Dict d2; <span class="co">/* new dictionary we'll create */</span>
+ <span class="kw">struct</span> dict swap; <span class="co">/* temporary structure for brain transplant */</span>
+ <span class="dt">int</span> i;
+ <span class="kw">struct</span> elt *e;
+
+ d2 = internalDictCreate(d-&gt;size * GROWTH_FACTOR);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; d-&gt;size; i++) {
+ <span class="kw">for</span>(e = d-&gt;table[i]; e != <span class="dv">0</span>; e = e-&gt;next) {
+ <span class="co">/* note: this recopies everything */</span>
+ <span class="co">/* a more efficient implementation would</span>
+<span class="co"> * patch out the strdups inside DictInsert</span>
+<span class="co"> * to avoid this problem */</span>
+ DictInsert(d2, e-&gt;key, e-&gt;value);
+ }
+ }
+
+ <span class="co">/* the hideous part */</span>
+ <span class="co">/* We'll swap the guts of d and d2 */</span>
+ <span class="co">/* then call DictDestroy on d2 */</span>
+ swap = *d;
+ *d = *d2;
+ *d2 = swap;
+
+ DictDestroy(d2);
+}
+
+<span class="co">/* insert a new key-value pair into an existing dictionary */</span>
+<span class="dt">void</span>
+DictInsert(Dict d, <span class="dt">const</span> <span class="dt">char</span> *key, <span class="dt">const</span> <span class="dt">char</span> *value)
+{
+ <span class="kw">struct</span> elt *e;
+ <span class="dt">unsigned</span> <span class="dt">long</span> h;
+
+ assert(key);
+ assert(value);
+
+ e = malloc(<span class="kw">sizeof</span>(*e));
+
+ assert(e);
+
+ e-&gt;key = strdup(key);
+ e-&gt;value = strdup(value);
+
+ h = hash_function(key) % d-&gt;size;
+
+ e-&gt;next = d-&gt;table[h];
+ d-&gt;table[h] = e;
+
+ d-&gt;n++;
+
+ <span class="co">/* grow table if there is not enough room */</span>
+ <span class="kw">if</span>(d-&gt;n &gt;= d-&gt;size * MAX_LOAD_FACTOR) {
+ grow(d);
+ }
+}
+
+<span class="co">/* return the most recently inserted value associated with a key */</span>
+<span class="co">/* or 0 if no matching key is present */</span>
+<span class="dt">const</span> <span class="dt">char</span> *
+DictSearch(Dict d, <span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ <span class="kw">struct</span> elt *e;
+
+ <span class="kw">for</span>(e = d-&gt;table[hash_function(key) % d-&gt;size]; e != <span class="dv">0</span>; e = e-&gt;next) {
+ <span class="kw">if</span>(!strcmp(e-&gt;key, key)) {
+ <span class="co">/* got it */</span>
+ <span class="kw">return</span> e-&gt;value;
+ }
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+
+<span class="co">/* delete the most recently inserted record with the given key */</span>
+<span class="co">/* if there is no such record, has no effect */</span>
+<span class="dt">void</span>
+DictDelete(Dict d, <span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ <span class="kw">struct</span> elt **prev; <span class="co">/* what to change when elt is deleted */</span>
+ <span class="kw">struct</span> elt *e; <span class="co">/* what to delete */</span>
+
+ <span class="kw">for</span>(prev = &amp;(d-&gt;table[hash_function(key) % d-&gt;size]);
+ *prev != <span class="dv">0</span>;
+ prev = &amp;((*prev)-&gt;next)) {
+ <span class="kw">if</span>(!strcmp((*prev)-&gt;key, key)) {
+ <span class="co">/* got it */</span>
+ e = *prev;
+ *prev = e-&gt;next;
+
+ free(e-&gt;key);
+ free(e-&gt;value);
+ free(e);
+
+ <span class="kw">return</span>;
+ }
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/dict/dict.c" class="uri">examples/hashTables/dict/dict.c</a>
+</div>
+<p>And here is some (very minimal) test code.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "dict.h"</span>
+
+<span class="dt">int</span>
+main()
+{
+ Dict d;
+ <span class="dt">char</span> buf[<span class="dv">512</span>];
+ <span class="dt">int</span> i;
+
+ d = DictCreate();
+
+ DictInsert(d, <span class="st">"foo"</span>, <span class="st">"hello world"</span>);
+ puts(DictSearch(d, <span class="st">"foo"</span>));
+ DictInsert(d, <span class="st">"foo"</span>, <span class="st">"hello world2"</span>);
+ puts(DictSearch(d, <span class="st">"foo"</span>));
+ DictDelete(d, <span class="st">"foo"</span>);
+ puts(DictSearch(d, <span class="st">"foo"</span>));
+ DictDelete(d, <span class="st">"foo"</span>);
+ assert(DictSearch(d, <span class="st">"foo"</span>) == <span class="dv">0</span>);
+ DictDelete(d, <span class="st">"foo"</span>);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">10000</span>; i++) {
+ sprintf(buf, <span class="st">"%d"</span>, i);
+ DictInsert(d, buf, buf);
+ }
+
+ DictDestroy(d);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+
+ </code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/dict/test_dict.c" class="uri">examples/hashTables/dict/test_dict.c</a>
+</div>
+<h2 id="genericContainers"><span class="header-section-number">5.5</span> Generic containers</h2>
+<p>The first rule of programming is that you should never write the same
+ code twice. Suppose that you happen to have lying around a dictionary
+type whose keys are <code>int</code>s and whose values are strings.
+Tomorrow you realize that what you really want is a dictionary type
+whose keys are strings and whose values are <code>int</code>s, or one whose keys are <code>int</code>s but whose values are stacks. If you have <span class="math inline"><em>n</em></span> different types that may appear as keys or values, can you avoid writing <span class="math inline"><em>n</em><sup>2</sup></span> different dictionary implementations to get every possible combination?</p>
+<p>Many languages provide special mechanisms to support <strong>generic types</strong>,
+ ones for which part of the type is not specified. It's as if you could
+declare an array in C to be an array of some type to be specified later,
+ and then write functions that operate on any such array without knowing
+ what the missing type is going to be (<strong>templates</strong> in C++
+ are an example of such a mechanism). Unfortunately, C does not provide
+generic types. But by aggressive use of function pointers and <code>void&nbsp;*</code>, it is possible to fake them.</p>
+<h3 id="Generic_dictionary:_interface"><span class="header-section-number">5.5.1</span> Generic dictionary: interface</h3>
+<p>Below is an example of an interface to a generic dictionary type for storing maps from constant values to constant values. The <code>void&nbsp;*</code> pointers are used to avoid having to declare exactly what kinds of keys and values the dictionary will contain.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Set dict[key] = value. */</span>
+<span class="co">/* Both key and value are copied internally. */</span>
+<span class="co">/* If data is the null pointer, remove dict[key]. */</span>
+<span class="dt">void</span> dictSet(Dict d, <span class="dt">const</span> <span class="dt">void</span> *key, <span class="dt">const</span> <span class="dt">void</span> *value);
+
+<span class="co">/* Return dict[key], or null if dict[key] has not been set. */</span>
+<span class="dt">const</span> <span class="dt">void</span> *dictGet(Dict d, <span class="dt">const</span> <span class="dt">void</span> *key);</code></pre></div>
+<p>We'll also need a constructor and destructor, but we'll get to those in a moment. First we need to think about what <code>dictSet</code> and <code>dictGet</code>
+ are supposed to do, and how we might possibly be able to implement
+them. Suppose we want to build a dictionary with strings as both keys
+and values. Internally, this might be represented as some sort of hash
+table or tree. Suppose it's a hash table. Now, given some <code>void&nbsp;*key</code>, we'd like to be able to compute its hash value. But we don't know what type <code>key</code>
+ points to, and if we guess wrong we are likely to end up with
+segmentation faults or worse. So we need some way to register a hash
+function for our keys, whatever type they might really be behind that <code>void&nbsp;*</code>.</p>
+<p>Similarly, we will want to be able to compare keys for equality
+(since not all keys that hash together will necessarily be the same),
+and we may want to be able to copy keys and values so that the data
+inside the dictionary is not modified if somebody changes a value passed
+ in from the outside. So we need a fair bit of information about keys
+and values. We'll organize all of this information in a struct made up
+of function pointers. (This includes a few extra components that came up
+ while writing the implementation.)</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Provides operations for working with keys or values */</span>
+<span class="kw">struct</span> dictContentsOperations {
+ <span class="co">/* hash function */</span>
+ <span class="dt">unsigned</span> <span class="dt">long</span> (*hash)(<span class="dt">const</span> <span class="dt">void</span> *datum, <span class="dt">void</span> *arg);
+
+ <span class="co">/* returns nonzero if *datum1 == *datum2 */</span>
+ <span class="dt">int</span> (*equal)(<span class="dt">const</span> <span class="dt">void</span> *datum1, <span class="dt">const</span> <span class="dt">void</span> *datum2, <span class="dt">void</span> *arg);
+
+ <span class="co">/* make a copy of datum that will survive changes to original */</span>
+ <span class="dt">void</span> *(*copy)(<span class="dt">const</span> <span class="dt">void</span> *datum, <span class="dt">void</span> *arg);
+
+ <span class="co">/* free a copy */</span>
+ <span class="dt">void</span> (*delete)(<span class="dt">void</span> *datum, <span class="dt">void</span> *arg);
+
+ <span class="co">/* extra argument, to allow further specialization */</span>
+ <span class="dt">void</span> *arg;
+};</code></pre></div>
+<p>We could write a similar but smaller struct for values, but to save a little bit of effort in the short run we'll use the same <code>struct</code>
+ for both keys and values. We can now write a constructor for our
+generic dictionary that consumes two such structs that provide
+operations for working on keys and values, respectively:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* create a new dictionary with given key and value operations */</span>
+<span class="co">/* Note: valueOps.hash and valueOps.equal are not used. */</span>
+Dict dictCreate(<span class="kw">struct</span> dictContentsOperations keyOps,
+ <span class="kw">struct</span> dictContentsOperations valueOps);</code></pre></div>
+<p>So now to create a dict, we just need to fill in two <code>dictContentsOperations</code> structures. For convenience, it might be nice if <code>dict.c</code> provided some preloaded structures for common types like <code>int</code>s and strings. We can also use the <code>arg</code> field in <code>struct&nbsp;dictContentsOperations</code> to make the keys and values themselves be parameterized types, for example a type of byte-vectors of given length.</p>
+<p>We can declare these various convenience structures in <code>dict.h</code> as</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Some predefined dictContentsOperations structures */</span>
+
+<span class="co">/* </span>
+<span class="co"> * DictIntOps supports int's that have been cast to (void *), e.g.:</span>
+<span class="co"> * d = dictCreate(DictIntOps, DictIntOps);</span>
+<span class="co"> * dictSet(d, (void *) 1, (void * 2));</span>
+<span class="co"> * x = (int) dictGet(d, (void * 1));</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> dictContentsOperations DictIntOps;
+
+<span class="co">/*</span>
+<span class="co"> * Supports null-terminated strings, e.g.:</span>
+<span class="co"> * d = dictCreate(DictStringOps, DictStringOps);</span>
+<span class="co"> * dictSet(d, "foo", "bar");</span>
+<span class="co"> * s = dictGet(d, "foo");</span>
+<span class="co"> * Note: no casts are needed since C automatically converts</span>
+<span class="co"> * between (void *) and other pointer types.</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> dictContentsOperations DictStringOps;
+
+<span class="co">/*</span>
+<span class="co"> * Supports fixed-size blocks of memory, e.g.:</span>
+<span class="co"> * int x = 1;</span>
+<span class="co"> * int y = 2;</span>
+<span class="co"> * d = dictCreate(dictMemOps(sizeof(int)), dictMemOps(sizeof(int));</span>
+<span class="co"> * dictSet(d, &amp;x, &amp;y);</span>
+<span class="co"> * printf("%d", *dictGet(d, &amp;x);</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> dictContentsOperations dictMemOps(<span class="dt">int</span> size);</code></pre></div>
+<p>We'll define the operations in <code>DictIntOps</code> to expect <code>int</code>s cast directly to <code>void&nbsp;*</code>, the operations in <code>DictStringOps</code> to expect <code>char&nbsp;*</code> cast to <code>void&nbsp;*</code>, and the operations in <code>dictMemOps(size)</code> will expect <code>void&nbsp;*</code> arguments pointing to blocks of the given size. There is a subtle difference between a dictionary using <code>DictIntOps</code> and <code>dictMemOps(sizeof(int))</code>; in the former case, keys and values are the <code>int</code>s themselves (after being case), which in the latter, keys and values are pointers to <code>int</code>s.</p>
+<p>Implementations of these structures can be found <a href="#genericDictionaryImplementation">below</a>.</p>
+<p>To make a dictionary that maps strings to ints, we just call:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> d = dictCreate(DictStringOps, DictIntOps);</code></pre></div>
+<p>and then we can do things like:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> dictSet(d, <span class="st">"foo"</span>, (<span class="dt">void</span> *) <span class="dv">2</span>);
+ v = (<span class="dt">int</span>) dictGet(d, <span class="st">"foo');</span></code></pre></div>
+<p>If we find ourselves working with an integer-valued dictionary a lot,
+ we might want to define a few macros or inline functions to avoid
+having to type casts all the time.</p>
+<h3 id="genericDictionaryImplementation"><span class="header-section-number">5.5.2</span> Generic dictionary: implementation</h3>
+<p>To implement our generic dictionary, we just take our favorite
+non-generic hash table, and replace any calls to fixed hash functions,
+copier, <code>free</code>, etc. with calls to elements of the appropriate structure. The result is shown below.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="kw">struct</span> dict *Dict;
+
+<span class="co">/* Provides operations for working with keys or values */</span>
+<span class="kw">struct</span> dictContentsOperations {
+ <span class="co">/* hash function */</span>
+ <span class="dt">unsigned</span> <span class="dt">long</span> (*hash)(<span class="dt">const</span> <span class="dt">void</span> *datum, <span class="dt">void</span> *arg);
+
+ <span class="co">/* returns nonzero if *datum1 == *datum2 */</span>
+ <span class="dt">int</span> (*equal)(<span class="dt">const</span> <span class="dt">void</span> *datum1, <span class="dt">const</span> <span class="dt">void</span> *datum2, <span class="dt">void</span> *arg);
+
+ <span class="co">/* make a copy of datum that will survive changes to original */</span>
+ <span class="dt">void</span> *(*copy)(<span class="dt">const</span> <span class="dt">void</span> *datum, <span class="dt">void</span> *arg);
+
+ <span class="co">/* free a copy */</span>
+ <span class="dt">void</span> (*delete)(<span class="dt">void</span> *datum, <span class="dt">void</span> *arg);
+
+ <span class="co">/* extra argument, to allow further specialization */</span>
+ <span class="dt">void</span> *arg;
+};
+
+<span class="co">/* create a new dictionary with given key and value operations */</span>
+<span class="co">/* Note: valueOps.hash and valueOps.equal are not used. */</span>
+Dict dictCreate(<span class="kw">struct</span> dictContentsOperations keyOps,
+ <span class="kw">struct</span> dictContentsOperations valueOps);
+
+<span class="co">/* free a dictionary and all the space it contains */</span>
+<span class="co">/* This will call the appropriate delete function for all keys and */</span>
+<span class="co">/* values. */</span>
+<span class="dt">void</span> dictDestroy(Dict d);
+
+<span class="co">/* Set dict[key] = value. */</span>
+<span class="co">/* Both key and value are copied internally. */</span>
+<span class="co">/* If data is the null pointer, remove dict[key]. */</span>
+<span class="dt">void</span> dictSet(Dict d, <span class="dt">const</span> <span class="dt">void</span> *key, <span class="dt">const</span> <span class="dt">void</span> *value);
+
+<span class="co">/* Return dict[key], or null if dict[key] has not been set. */</span>
+<span class="dt">const</span> <span class="dt">void</span> *dictGet(Dict d, <span class="dt">const</span> <span class="dt">void</span> *key);
+
+<span class="co">/* Some predefined dictContentsOperations structures */</span>
+
+<span class="co">/* </span>
+<span class="co"> * DictIntOps supports int's that have been cast to (void *), e.g.:</span>
+<span class="co"> * d = dictCreate(DictIntOps, DictIntOps);</span>
+<span class="co"> * dictSet(d, (void *) 1, (void * 2));</span>
+<span class="co"> * x = (int) dictGet(d, (void * 1));</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> dictContentsOperations DictIntOps;
+
+<span class="co">/*</span>
+<span class="co"> * Supports null-terminated strings, e.g.:</span>
+<span class="co"> * d = dictCreate(DictStringOps, DictStringOps);</span>
+<span class="co"> * dictSet(d, "foo", "bar");</span>
+<span class="co"> * s = dictGet(d, "foo");</span>
+<span class="co"> * Note: no casts are needed since C automatically converts</span>
+<span class="co"> * between (void *) and other pointer types.</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> dictContentsOperations DictStringOps;
+
+<span class="co">/*</span>
+<span class="co"> * Supports fixed-size blocks of memory, e.g.:</span>
+<span class="co"> * int x = 1;</span>
+<span class="co"> * int y = 2;</span>
+<span class="co"> * d = dictCreate(dictMemOps(sizeof(int)), dictMemOps(sizeof(int));</span>
+<span class="co"> * dictSet(d, &amp;x, &amp;y);</span>
+<span class="co"> * printf("%d", *dictGet(d, &amp;x);</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> dictContentsOperations dictMemOps(<span class="dt">int</span> size);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/generic/dict.h" class="uri">examples/generic/dict.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+<span class="ot">#include "dict.h"</span>
+
+<span class="kw">struct</span> dictElt {
+ <span class="dt">unsigned</span> <span class="dt">long</span> hash; <span class="co">/* full hash of key */</span>
+ <span class="dt">void</span> *key;
+ <span class="dt">void</span> *value;
+ <span class="kw">struct</span> dictElt *next;
+};
+
+<span class="kw">struct</span> dict {
+ <span class="dt">int</span> tableSize; <span class="co">/* number of slots in table */</span>
+ <span class="dt">int</span> numElements; <span class="co">/* number of elements */</span>
+ <span class="kw">struct</span> dictElt **table; <span class="co">/* linked list heads */</span>
+ <span class="co">/* these save arguments passed at creation */</span>
+ <span class="kw">struct</span> dictContentsOperations keyOps;
+ <span class="kw">struct</span> dictContentsOperations valueOps;
+};
+
+<span class="ot">#define INITIAL_TABLESIZE (16)</span>
+<span class="ot">#define TABLESIZE_MULTIPLIER (2)</span>
+<span class="ot">#define TABLE_GROW_DENSITY (1)</span>
+
+Dict
+dictCreate(<span class="kw">struct</span> dictContentsOperations keyOps,
+ <span class="kw">struct</span> dictContentsOperations valueOps)
+{
+ Dict d;
+ <span class="dt">int</span> i;
+
+ d = malloc(<span class="kw">sizeof</span>(*d));
+ <span class="kw">if</span>(d == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ d-&gt;tableSize = INITIAL_TABLESIZE;
+ d-&gt;numElements = <span class="dv">0</span>;
+ d-&gt;keyOps = keyOps;
+ d-&gt;valueOps = valueOps;
+ d-&gt;table = malloc(<span class="kw">sizeof</span>(*(d-&gt;table)) * d-&gt;tableSize);
+ <span class="kw">if</span>(d-&gt;table == <span class="dv">0</span>) {
+ free(d);
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; d-&gt;tableSize; i++) d-&gt;table[i] = <span class="dv">0</span>;
+
+ <span class="kw">return</span> d;
+}
+
+<span class="dt">void</span>
+dictDestroy(Dict d)
+{
+ <span class="dt">int</span> i;
+ <span class="kw">struct</span> dictElt *e;
+ <span class="kw">struct</span> dictElt *next;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; d-&gt;tableSize; i++) {
+ <span class="kw">for</span>(e = d-&gt;table[i]; e != <span class="dv">0</span>; e = next) {
+ next = e-&gt;next;
+ d-&gt;keyOps.delete(e-&gt;key, d-&gt;keyOps.arg);
+ d-&gt;valueOps.delete(e-&gt;value, d-&gt;valueOps.arg);
+ free(e);
+ }
+ }
+ free(d-&gt;table);
+ free(d);
+}
+
+<span class="co">/* return pointer to element with given key, if any */</span>
+<span class="dt">static</span> <span class="kw">struct</span> dictElt *
+dictFetch(Dict d, <span class="dt">const</span> <span class="dt">void</span> *key)
+{
+ <span class="dt">unsigned</span> <span class="dt">long</span> h;
+ <span class="dt">int</span> i;
+ <span class="kw">struct</span> dictElt *e;
+
+ h = d-&gt;keyOps.hash(key, d-&gt;keyOps.arg);
+ i = h % d-&gt;tableSize;
+ <span class="kw">for</span>(e = d-&gt;table[i]; e != <span class="dv">0</span>; e = e-&gt;next) {
+ <span class="kw">if</span>(e-&gt;hash == h &amp;&amp; d-&gt;keyOps.equal(key, e-&gt;key, d-&gt;keyOps.arg)) {
+ <span class="co">/* found it */</span>
+ <span class="kw">return</span> e;
+ }
+ }
+ <span class="co">/* didn't find it */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+
+<span class="co">/* increase the size of the dictionary, rehashing all table elements */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+dictGrow(Dict d)
+{
+ <span class="kw">struct</span> dictElt **old_table;
+ <span class="dt">int</span> old_size;
+ <span class="dt">int</span> i;
+ <span class="kw">struct</span> dictElt *e;
+ <span class="kw">struct</span> dictElt *next;
+ <span class="dt">int</span> new_pos;
+
+ <span class="co">/* save old table */</span>
+ old_table = d-&gt;table;
+ old_size = d-&gt;tableSize;
+
+ <span class="co">/* make new table */</span>
+ d-&gt;tableSize *= TABLESIZE_MULTIPLIER;
+ d-&gt;table = malloc(<span class="kw">sizeof</span>(*(d-&gt;table)) * d-&gt;tableSize);
+ <span class="kw">if</span>(d-&gt;table == <span class="dv">0</span>) {
+ <span class="co">/* put the old one back */</span>
+ d-&gt;table = old_table;
+ d-&gt;tableSize = old_size;
+ <span class="kw">return</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="co">/* clear new table */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; d-&gt;tableSize; i++) d-&gt;table[i] = <span class="dv">0</span>;
+
+ <span class="co">/* move all elements of old table to new table */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; old_size; i++) {
+ <span class="kw">for</span>(e = old_table[i]; e != <span class="dv">0</span>; e = next) {
+ next = e-&gt;next;
+ <span class="co">/* find the position in the new table */</span>
+ new_pos = e-&gt;hash % d-&gt;tableSize;
+ e-&gt;next = d-&gt;table[new_pos];
+ d-&gt;table[new_pos] = e;
+ }
+ }
+
+ <span class="co">/* don't need this any more */</span>
+ free(old_table);
+}
+
+<span class="dt">void</span>
+dictSet(Dict d, <span class="dt">const</span> <span class="dt">void</span> *key, <span class="dt">const</span> <span class="dt">void</span> *value)
+{
+ <span class="dt">int</span> tablePosition;
+ <span class="kw">struct</span> dictElt *e;
+
+ e = dictFetch(d, key);
+ <span class="kw">if</span>(e != <span class="dv">0</span>) {
+ <span class="co">/* change existing setting */</span>
+ d-&gt;valueOps.delete(e-&gt;value, d-&gt;valueOps.arg);
+ e-&gt;value = value ? d-&gt;valueOps.copy(value, d-&gt;valueOps.arg) : <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ <span class="co">/* create new element */</span>
+ e = malloc(<span class="kw">sizeof</span>(*e));
+ <span class="kw">if</span>(e == <span class="dv">0</span>) abort();
+
+ e-&gt;hash = d-&gt;keyOps.hash(key, d-&gt;keyOps.arg);
+ e-&gt;key = d-&gt;keyOps.copy(key, d-&gt;keyOps.arg);
+ e-&gt;value = value ? d-&gt;valueOps.copy(value, d-&gt;valueOps.arg) : <span class="dv">0</span>;
+
+ <span class="co">/* link it in */</span>
+ tablePosition = e-&gt;hash % d-&gt;tableSize;
+ e-&gt;next = d-&gt;table[tablePosition];
+ d-&gt;table[tablePosition] = e;
+
+ d-&gt;numElements++;
+
+ <span class="kw">if</span>(d-&gt;numElements &gt; d-&gt;tableSize * TABLE_GROW_DENSITY) {
+ <span class="co">/* grow and rehash */</span>
+ dictGrow(d);
+ }
+ }
+}
+
+<span class="dt">const</span> <span class="dt">void</span> *
+dictGet(Dict d, <span class="dt">const</span> <span class="dt">void</span> *key)
+{
+ <span class="kw">struct</span> dictElt *e;
+
+ e = dictFetch(d, key);
+ <span class="kw">if</span>(e != <span class="dv">0</span>) {
+ <span class="kw">return</span> e-&gt;value;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+}
+
+<span class="co">/* int functions */</span>
+<span class="co">/* We assume that int can be cast to void * and back without damage */</span>
+<span class="dt">static</span> <span class="dt">unsigned</span> <span class="dt">long</span> dictIntHash(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">void</span> *arg) { <span class="kw">return</span> (<span class="dt">int</span>) x; }
+<span class="dt">static</span> <span class="dt">int</span> dictIntEqual(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">const</span> <span class="dt">void</span> *y, <span class="dt">void</span> *arg)
+{
+ <span class="kw">return</span> ((<span class="dt">int</span>) x) == ((<span class="dt">int</span>) y);
+}
+<span class="dt">static</span> <span class="dt">void</span> *dictIntCopy(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">void</span> *arg) { <span class="kw">return</span> (<span class="dt">void</span> *) x; }
+<span class="dt">static</span> <span class="dt">void</span> dictIntDelete(<span class="dt">void</span> *x, <span class="dt">void</span> *arg) { ; }
+
+<span class="kw">struct</span> dictContentsOperations DictIntOps = {
+ dictIntHash,
+ dictIntEqual,
+ dictIntCopy,
+ dictIntDelete,
+ <span class="dv">0</span>
+};
+
+<span class="co">/* common utilities for string and mem */</span>
+<span class="dt">static</span> <span class="dt">unsigned</span> <span class="dt">long</span> hashMem(<span class="dt">const</span> <span class="dt">unsigned</span> <span class="dt">char</span> *s, <span class="dt">int</span> len)
+{
+ <span class="dt">unsigned</span> <span class="dt">long</span> h;
+ <span class="dt">int</span> i;
+
+ h = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; len; i++) {
+ h = (h &lt;&lt; <span class="dv">13</span>) + (h &gt;&gt; <span class="dv">7</span>) + h + s[i];
+ }
+ <span class="kw">return</span> h;
+}
+
+<span class="dt">static</span> <span class="dt">void</span> dictDeleteFree(<span class="dt">void</span> *x, <span class="dt">void</span> *arg) { free(x); }
+
+<span class="co">/* string functions */</span>
+<span class="dt">static</span> <span class="dt">unsigned</span> <span class="dt">long</span> dictStringHash(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">void</span> *arg)
+{
+ <span class="kw">return</span> hashMem(x, strlen(x));
+}
+
+<span class="dt">static</span> <span class="dt">int</span> dictStringEqual(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">const</span> <span class="dt">void</span> *y, <span class="dt">void</span> *arg)
+{
+ <span class="kw">return</span> !strcmp((<span class="dt">const</span> <span class="dt">char</span> *) x, (<span class="dt">const</span> <span class="dt">char</span> *) y);
+}
+
+<span class="dt">static</span> <span class="dt">void</span> *dictStringCopy(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">void</span> *arg)
+{
+ <span class="dt">const</span> <span class="dt">char</span> *s;
+ <span class="dt">char</span> *s2;
+
+ s = x;
+ s2 = malloc(<span class="kw">sizeof</span>(*s2) * (strlen(s)+<span class="dv">1</span>));
+ strcpy(s2, s);
+ <span class="kw">return</span> s2;
+}
+
+<span class="kw">struct</span> dictContentsOperations DictStringOps = {
+ dictStringHash,
+ dictStringEqual,
+ dictStringCopy,
+ dictDeleteFree,
+ <span class="dv">0</span>
+};
+
+<span class="co">/* mem functions */</span>
+<span class="dt">static</span> <span class="dt">unsigned</span> <span class="dt">long</span> dictMemHash(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">void</span> *arg)
+{
+ <span class="kw">return</span> hashMem(x, (<span class="dt">int</span>) arg);
+}
+
+<span class="dt">static</span> <span class="dt">int</span> dictMemEqual(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">const</span> <span class="dt">void</span> *y, <span class="dt">void</span> *arg)
+{
+ <span class="kw">return</span> !memcmp(x, y, (size_t) arg);
+}
+
+<span class="dt">static</span> <span class="dt">void</span> *dictMemCopy(<span class="dt">const</span> <span class="dt">void</span> *x, <span class="dt">void</span> *arg)
+{
+ <span class="dt">void</span> *x2;
+
+ x2 = malloc((size_t) arg);
+ memcpy(x2, x, (size_t) arg);
+ <span class="kw">return</span> x2;
+}
+
+<span class="kw">struct</span> dictContentsOperations
+dictMemOps(<span class="dt">int</span> len)
+{
+ <span class="kw">struct</span> dictContentsOperations memOps;
+
+ memOps.hash = dictMemHash;
+ memOps.equal = dictMemEqual;
+ memOps.copy = dictMemCopy;
+ memOps.delete = dictDeleteFree;
+ memOps.arg = (<span class="dt">void</span> *) len;
+
+ <span class="kw">return</span> memOps;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/generic/dict.c" class="uri">examples/generic/dict.c</a>
+</div>
+<p>And here is some test code and a Makefile: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/generic/test-dict.c">test-dict.c</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/generic/tester.h">tester.h</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/generic/tester.c">tester.c</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/generic/Makefile">Makefile</a>.</p>
+<h2 id="recursion"><span class="header-section-number">5.6</span> Recursion</h2>
+<p><strong>Recursion</strong> is when a function calls itself. Some programming languages (particularly functional programming languages like <a href="http://en.wikipedia.org/wiki/Scheme_%28programming_language%29" title="WikiPedia">Scheme</a>, <a href="http://en.wikipedia.org/wiki/ML_%28programming_language%29" title="WikiPedia">ML</a>, or <a href="http://www.haskell.org/">Haskell</a> use recursion as a basic tool for implementing algorithms that in other languages would typically be expressed using <strong>iteration</strong> (loops). Procedural languages like C tend to emphasize iteration over recursion, but can support recursion as well.</p>
+<h3 id="Example_of_recursion_in_C"><span class="header-section-number">5.6.1</span> Example of recursion in C</h3>
+<p>Here are a bunch of routines that print the numbers from 0 to 9:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="co">/* all of these routines print numbers i where start &lt;= i &lt; stop */</span>
+
+<span class="dt">void</span>
+printRangeIterative(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = start; i &lt; stop; i++) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+}
+
+<span class="dt">void</span>
+printRangeRecursive(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ <span class="kw">if</span>(start &lt; stop) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, start);
+ printRangeRecursive(start<span class="dv">+1</span>, stop);
+ }
+}
+
+<span class="dt">void</span>
+printRangeRecursiveReversed(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ <span class="kw">if</span>(start &lt; stop) {
+ printRangeRecursiveReversed(start<span class="dv">+1</span>, stop);
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, start);
+ }
+}
+
+<span class="dt">void</span>
+printRangeRecursiveSplit(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ <span class="dt">int</span> mid;
+
+ <span class="kw">if</span>(start &lt; stop) {
+ mid = (start + stop) / <span class="dv">2</span>;
+
+ printRangeRecursiveSplit(start, mid);
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, mid);
+ printRangeRecursiveSplit(mid<span class="dv">+1</span>, stop);
+ }
+}
+
+<span class="ot">#define Noisy(x) (puts(#x), x)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+
+ <span class="kw">if</span>(argc != <span class="dv">1</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ Noisy(printRangeIterative(<span class="dv">0</span>, <span class="dv">10</span>));
+ Noisy(printRangeRecursive(<span class="dv">0</span>, <span class="dv">10</span>));
+ Noisy(printRangeRecursiveReversed(<span class="dv">0</span>, <span class="dv">10</span>));
+ Noisy(printRangeRecursiveSplit(<span class="dv">0</span>, <span class="dv">10</span>));
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/recursion/recursion.c" class="uri">examples/recursion/recursion.c</a>
+</div>
+<p>And here is the output:</p>
+<pre><code>printRangeIterative(0, 10)
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+printRangeRecursive(0, 10)
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+printRangeRecursiveReversed(0, 10)
+9
+8
+7
+6
+5
+4
+3
+2
+1
+0
+printRangeRecursiveSplit(0, 10)
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9</code></pre>
+<p>The first function <code class="backtick">printRangeIterative</code> is simple and direct: it's what we've been doing to get loops forever. The others are a bit more mysterious.</p>
+<p>The function <code class="backtick">printRangeRecursive</code> is an example of solving a problem using a <a href="#algorithmDesignTechniquesClassification">divide and conquer</a>
+ approach. If we don't know how to print a range of numbers 0 through 9,
+ maybe we can start by solving a simpler problem of printing the first
+number 0. Having done that, we have a new, smaller problem: print the
+numbers 1 through 9. But then we notice we already have a function <code class="backtick">printRangeRecursive</code> that will do that for us. So we'll call it.</p>
+<p>If you aren't used to this, it has the feeling of trying to make yourself fly by pulling very hard on your shoelaces.<a href="#fn20" class="footnoteRef" id="fnref20"><sup>20</sup></a> But in fact the computer will happily generate the eleven nested instances of <code class="backtick">printRangeRecursive</code> to make this happen. When we hit the bottom, the call stack will look something like this:</p>
+<pre><code>printRangeRecursive(0, 10)
+ printRangeRecursive(1, 10)
+ printRangeRecursive(2, 10)
+ printRangeRecursive(3, 10)
+ printRangeRecursive(4, 10)
+ printRangeRecursive(5, 10)
+ printRangeRecursive(6, 10)
+ printRangeRecursive(7, 10)
+ printRangeRecursive(8, 10)
+ printRangeRecursive(9, 10)
+ printRangeRecursive(10, 10)</code></pre>
+<p>This works because each call to <code class="backtick">printRangeRecursive</code>
+ gets its own parameters and its own variables separate from the others,
+ even the ones that are still in progress. So each will print out <code class="backtick">start</code> and then call another copy in to print <code class="backtick">start+1</code> etc. In the last call, we finally fail the test <code class="backtick">start&nbsp;&lt;&nbsp;stop</code>, so the function exits, then its parent exits, and so on until we unwind all the calls on the stack back to the first one.</p>
+<p>In <code class="backtick">printRangeRecursiveReversed</code>, the calling pattern is exactly the same, but now instead of printing <code class="backtick">start</code> on the way down, we print <code class="backtick">start</code> on the way back up, after making the recursive call. This means that in <code class="backtick">printRangeRecursiveReversed(0,&nbsp;10)</code>, 0 is printed only after the results of <code class="backtick">printRangeRecursiveReversed(1,&nbsp;10)</code>, which gives us the countdown effect.</p>
+<p>So far these procedures all behave very much like ordinary loops,
+with increasing values on the stack standing in for the loop variable.
+More exciting is <code class="backtick">printRangeRecursiveSplit</code>. This function takes a much more aggressive approach to dividing up the problem: it splits a range <span class="math inline">[0, 10)</span> as two ranges <span class="math inline">[0, 5)</span> and <span class="math inline">[6, 10)</span> separated by a midpoint <span class="math inline">5</span>.^[The notation <span class="math inline">[<em>x</em>, <em>y</em>)</span> means all numbers <span class="math inline"><em>z</em></span> such that <span class="math inline"><em>x</em> ≤ <em>z</em> &lt; <em>y</em></span>.] We want to print the midpoint in the middle, of course, and we can use <code class="backtick">printRangeRecursiveSplit</code>
+ recursively to print the two ranges. Following the execution of this
+procedure is more complicated, with the start of the sequence of calls
+looking something like this:</p>
+<pre><code>printRangeRecursiveSplit(0, 10)
+ printRangeRecursiveSplit(0, 5)
+ printRangeRecursiveSplit(0, 2)
+ printRangeRecursiveSplit(0, 1)
+ printRangeRecursiveSplit(0, 0)
+ printRangeRecursiveSplit(1, 1)
+ printRangeRecursiveSplit(2, 2)
+ printRangeRecursiveSplit(3, 5)
+ printRangeRecursiveSplit(3, 4)
+ printRangeRecursiveSplit(3, 3)
+ printRangeRecursiveSplit(4, 4)
+ printRangeRecursiveSplit(5, 5)
+ printRangeRecursiveSplit(6, 10)
+ ... etc.</code></pre>
+<p>Here the computation has the structure of a tree instead of a list,
+so it is not so obvious how one might rewrite this procedure as a loop.</p>
+<h3 id="Common_problems_with_recursion"><span class="header-section-number">5.6.2</span> Common problems with recursion</h3>
+<p>Like iteration, recursion is a powerful tool that can cause your
+program to do much more than expected. While it may seem that errors in
+recursive functions would be harder to track down than errors in loops,
+most of the time there are a few basic causes.</p>
+<h4 id="Omitting_the_base_case"><span class="header-section-number">5.6.2.1</span> Omitting the base case</h4>
+<p>Suppose we leave out the <code class="backtick">if</code> statement in <code class="backtick">printRangeRecursive</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printRangeRecursiveBad(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, start);
+ printRangeRecursiveBad(start<span class="dv">+1</span>, stop);
+}</code></pre></div>
+<p>This will still work, in a sense. When called as <code class="backtick">printRangeRecursiveBad(0,&nbsp;10)</code>, it will print 0, call itself with <code class="backtick">printRangeRecursiveBad(1,&nbsp;10)</code>,
+ print 1, 2, 3, etc., but there is nothing to stop it at 10 (or anywhere
+ else). So our output will be a long string of numbers followed by a
+segmentation fault, when we blow out the stack.</p>
+<p>This is the recursive version of an infinite loop: the same thing happens if we forget a loop test and write</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printRangeIterativeBad(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ <span class="kw">for</span>(i = <span class="dv">0</span>; ; i++) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, i);
+ }
+}</code></pre></div>
+<p>except that now the program just runs forever, since it never runs
+out of resources. This is an example of how iteration is more efficient
+than recursion, at least in C.</p>
+<h4 id="Blowing_out_the_stack"><span class="header-section-number">5.6.2.2</span> Blowing out the stack</h4>
+<p>Blowing out the stack is what happens when a recursion is too deep.
+Typically, the operating system puts a hard limit on how big the stack
+can grow, on the assumption that any program that grows the stack too
+much has gone insane and needs to be killed before it does more damage.
+One of the ways this can happen is if we forget the base case as above,
+but it can also happen if we just try to use a recursive function to do
+too much. For example, if we call <code class="backtick">printRangeRecursive(0,&nbsp;1000000)</code>, we will probably get a segmentation fault after the first 100,000 numbers or so.</p>
+<p>For this reason, it's best to try to avoid linear recursions like the one in <code class="backtick">printRangeRecursive</code>, where the depth of the recursion is proportional to the number of things we are doing. Much safer are even splits like <code class="backtick">printRangeRecursiveSplit</code>,
+ since the depth of the stack will now be only logarithmic in the number
+ of things we are doing. Fortunately, linear recursions are often <strong>tail-recursive</strong>,
+ where the recursive call is the last thing the recursive function does;
+ in this case, we can use a standard transformation (see <a href="#tailRecursion">below</a>) to convert the tail-recursive function into an iterative function.</p>
+<h4 id="Failure_to_make_progress"><span class="header-section-number">5.6.2.3</span> Failure to make progress</h4>
+<p>Sometimes we end up blowing out the stack because we thought we were
+recursing on a smaller instance of the problem, but in fact we weren't.
+Consider this broken version of <code class="backtick">printRangeRecursiveSplit</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printRangeRecursiveSplitBad(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ <span class="dt">int</span> mid;
+
+ <span class="kw">if</span>(start == stop) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, start);
+ } <span class="kw">else</span> {
+ mid = (start + stop) / <span class="dv">2</span>;
+
+ printRangeRecursiveSplitBad(start, mid);
+ printRangeRecursiveSplitBad(mid, stop);
+ }
+}</code></pre></div>
+<p>This will get stuck on as simple a call as <code class="backtick">printRangeRecursiveSplitBad(0,&nbsp;1)</code>; it will set <code class="backtick">mid</code> to 0, and while the recursive call to <code class="backtick">printRangeRecursiveSplitBad(0,&nbsp;0)</code> will work just fine, the recursive call to <code class="backtick">printRangeRecursiveSplitBad(0,&nbsp;1)</code> will put us back where we started, giving an infinite recursion.</p>
+<p>Detecting these errors is usually not too hard (segmentation faults that produce huge piles of stack frames when you type <code class="backtick">where</code> in gdb are a dead give-away). Figuring out how to make sure that you do in fact always make progress can be trickier.</p>
+<h3 id="tailRecursion"><span class="header-section-number">5.6.3</span> Tail-recursion and iteration</h3>
+<p><strong>Tail recursion</strong> is when a recursive function calls itself only once, and as the last thing it does. The <code class="backtick">printRangeRecursive</code> function is an example of a tail-recursive function:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printRangeRecursive(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ <span class="kw">if</span>(start &lt; stop) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, start);
+ printRangeRecursive(start<span class="dv">+1</span>, stop);
+ }
+}</code></pre></div>
+<p>The nice thing about tail-recursive functions is that we can always
+translate them directly into iterative functions. The reason is that
+when we do the tail call, we are effectively replacing the current copy
+of the function with a new copy with new arguments. So rather than
+keeping around the old zombie parent copy—which has no purpose other
+than to wait for the child to return and then return itself—we can reuse
+ it by assigning new values to its arguments and jumping back to the top
+ of the function.</p>
+<p>Done literally, this produces this <code class="backtick">goto</code>-considered-harmful monstrosity:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printRangeRecursiveGoto(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ topOfFunction:
+
+ <span class="kw">if</span>(start &lt; stop) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, start);
+
+ start = start<span class="dv">+1</span>;
+ <span class="kw">goto</span> topOfFunction;
+ }
+}</code></pre></div>
+<p>But we can almost always remove <code class="backtick">goto</code> statements using less dangerous control structures. In this particular case, the pattern of jumping back to just before an <code class="backtick">if</code> matches up exactly with what we get from a <code class="backtick">while</code> loop:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+printRangeRecursiveNoMore(<span class="dt">int</span> start, <span class="dt">int</span> stop)
+{
+ <span class="kw">while</span>(start &lt; stop) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, start);
+
+ start = start<span class="dv">+1</span>;
+ }
+}</code></pre></div>
+<p>In functional programming languages, this transformation is usually
+done in the other direction, to unroll loops into recursive functions.
+Since C doesn't like recursive functions so much (they blow out the
+stack!), we usually do this transformation got get rid of recursion
+instead of adding it.</p>
+<h4 id="binarySearch"><span class="header-section-number">5.6.3.1</span> Binary search: recursive and iterative versions</h4>
+<p><strong>Binary search</strong> is an algorithm for searching a sorted
+ array for a particular target element, similar to playing Twenty
+Questions when the answer is a number (hopefully in a range that
+includes at most <span class="math inline">2<sup>20</sup></span>
+numbers). The algorithm starts by picking an value in the middle of the
+array. If the target is less than this value, we recurse on the bottom
+half of the array; else we recurse on the top half.</p>
+<p>Here is an interface for binary search on an array of <code>int</code>s:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* returns 1 if target is present in sorted array */</span>
+<span class="dt">int</span> binarySearch(<span class="dt">int</span> target, <span class="dt">const</span> <span class="dt">int</span> *a, size_t length);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/binarySearch/binarySearch.h" class="uri">examples/binarySearch/binarySearch.h</a>
+</div>
+<p>Written recursively, we might implement the algorithm like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stddef.h&gt;</span>
+
+<span class="ot">#include "binarySearch.h"</span>
+
+<span class="dt">int</span>
+binarySearch(<span class="dt">int</span> target, <span class="dt">const</span> <span class="dt">int</span> *a, size_t length)
+{
+ size_t index;
+
+ index = length/<span class="dv">2</span>;
+
+ <span class="kw">if</span>(length == <span class="dv">0</span>) {
+ <span class="co">/* nothing left */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> <span class="kw">if</span>(target == a[index]) {
+ <span class="co">/* got it */</span>
+ <span class="kw">return</span> <span class="dv">1</span>;
+ } <span class="kw">else</span> <span class="kw">if</span>(target &lt; a[index]) {
+ <span class="co">/* recurse on bottom half */</span>
+ <span class="kw">return</span> binarySearch(target, a, index);
+ } <span class="kw">else</span> {
+ <span class="co">/* recurse on top half */</span>
+ <span class="co">/* we throw away index+1 elements (including a[index]) */</span>
+ <span class="kw">return</span> binarySearch(target, a+index<span class="dv">+1</span>, length-(index<span class="dv">+1</span>));
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/binarySearch/binarySearchRecursive.c" class="uri">examples/binarySearch/binarySearchRecursive.c</a>
+</div>
+<p>This will work just fine, and indeed it finds the target element (or not) in <span class="math inline"><em>O</em>(log<em>n</em>)</span> time, because we can only recurse <span class="math inline"><em>O</em>(log<em>n</em>)</span> times before running out of elements and we only pay <span class="math inline"><em>O</em>(1)</span> cost per recursive call to <code>binarySearch</code>.
+ But we do have to pay function call overhead for call, and there is a
+potential to run into stack overflow if our stack is very constrained.</p>
+<p>Fortunately, we don't do anything with the return value from <code>binarySearch</code>
+ but pass it on up the stack: the function is tail-recursive. This means
+ that we can get rid of the recursion by reusing the stack from from the
+ initial call. The mechanical way to do this is wrap the body of the
+routine in a <code>for(;;)</code> loop (so that we jump back to the top
+whenever we hit the bottom), and replace each recursive call with one or
+ more assignments to update any parameters that change in the recursive
+call. The result looks like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stddef.h&gt;</span>
+
+<span class="ot">#include "binarySearch.h"</span>
+
+<span class="dt">int</span>
+binarySearch(<span class="dt">int</span> target, <span class="dt">const</span> <span class="dt">int</span> *a, size_t length)
+{
+ size_t index;
+
+ <span class="co">/* direct translation of recursive version */</span>
+ <span class="co">/* hence the weird organization of the loop */</span>
+ <span class="kw">for</span>(;;) {
+ index = length/<span class="dv">2</span>;
+
+ <span class="kw">if</span>(length == <span class="dv">0</span>) {
+ <span class="co">/* nothing left */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> <span class="kw">if</span>(target == a[index]) {
+ <span class="co">/* got it */</span>
+ <span class="kw">return</span> <span class="dv">1</span>;
+ } <span class="kw">else</span> <span class="kw">if</span>(target &lt; a[index]) {
+ <span class="co">/* recurse on bottom half */</span>
+ length = index;
+ } <span class="kw">else</span> {
+ <span class="co">/* recurse on top half */</span>
+ <span class="co">/* we throw away index+1 elements (including a[index]) */</span>
+ a = a + index + <span class="dv">1</span>;
+ length = length - (index + <span class="dv">1</span>);
+ }
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/binarySearch/binarySearchIterative.c" class="uri">examples/binarySearch/binarySearchIterative.c</a>
+</div>
+<p>Here's some simple test code to demonstrate that these two implementations in fact do the same thing: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/binarySearch/Makefile">Makefile</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/binarySearch/testBinarySearch.c">testBinarySearch.c</a>.</p>
+<h3 id="mergesort"><span class="header-section-number">5.6.4</span> Mergesort: a recursive sorting algorithm</h3>
+<p>So far the examples we have given have not been very useful, or have
+involved recursion that we can easily replace with iteration. Here is an
+ example of a recursive procedure that cannot be as easily turned into
+an iterative version.</p>
+<p>We are going to implement the <a href="http://en.wikipedia.org/wiki/Mergesort" title="WikiPedia">mergesort</a> algorithm on arrays. This is a classic <a href="#algorithmDesignTechniquesClassification">divide and conquer</a>
+ sorting algorithm that splits an array into two pieces, sorts each
+piece (recursively!), then merges the results back together. Here is the
+ code, together with a simple test program.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+
+<span class="co">/* merge sorted arrays a1 and a2, putting result in out */</span>
+<span class="dt">void</span>
+merge(<span class="dt">int</span> n1, <span class="dt">const</span> <span class="dt">int</span> a1[], <span class="dt">int</span> n2, <span class="dt">const</span> <span class="dt">int</span> a2[], <span class="dt">int</span> out[])
+{
+ <span class="dt">int</span> i1;
+ <span class="dt">int</span> i2;
+ <span class="dt">int</span> iout;
+
+ i1 = i2 = iout = <span class="dv">0</span>;
+
+ <span class="kw">while</span>(i1 &lt; n1 || i2 &lt; n2) {
+ <span class="kw">if</span>(i2 &gt;= n2 || ((i1 &lt; n1) &amp;&amp; (a1[i1] &lt; a2[i2]))) {
+ <span class="co">/* a1[i1] exists and is smaller */</span>
+ out[iout++] = a1[i1++];
+ } <span class="kw">else</span> {
+ <span class="co">/* a1[i1] doesn't exist, or is bigger than a2[i2] */</span>
+ out[iout++] = a2[i2++];
+ }
+ }
+}
+
+<span class="co">/* sort a, putting result in out */</span>
+<span class="co">/* we call this mergeSort to avoid conflict with mergesort in libc */</span>
+<span class="dt">void</span>
+mergeSort(<span class="dt">int</span> n, <span class="dt">const</span> <span class="dt">int</span> a[], <span class="dt">int</span> out[])
+{
+ <span class="dt">int</span> *a1;
+ <span class="dt">int</span> *a2;
+
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) {
+ <span class="co">/* 0 or 1 elements is already sorted */</span>
+ memcpy(out, a, <span class="kw">sizeof</span>(<span class="dt">int</span>) * n);
+ } <span class="kw">else</span> {
+ <span class="co">/* sort into temp arrays */</span>
+ a1 = malloc(<span class="kw">sizeof</span>(<span class="dt">int</span>) * (n/<span class="dv">2</span>));
+ a2 = malloc(<span class="kw">sizeof</span>(<span class="dt">int</span>) * (n - n/<span class="dv">2</span>));
+
+ mergeSort(n/<span class="dv">2</span>, a, a1);
+ mergeSort(n - n/<span class="dv">2</span>, a + n/<span class="dv">2</span>, a2);
+
+ <span class="co">/* merge results */</span>
+ merge(n/<span class="dv">2</span>, a1, n - n/<span class="dv">2</span>, a2, out);
+
+ <span class="co">/* free the temp arrays */</span>
+ free(a1);
+ free(a2);
+ }
+}
+
+<span class="ot">#define N (20)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> a[N];
+ <span class="dt">int</span> b[N];
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) {
+ a[i] = N-i<span class="dv">-1</span>;
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) {
+ printf(<span class="st">"%d "</span>, a[i]);
+ }
+ putchar(<span class="ch">'\n'</span>);
+
+ mergeSort(N, a, b);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) {
+ printf(<span class="st">"%d "</span>, b[i]);
+ }
+ putchar(<span class="ch">'\n'</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sorting/mergesort.c" class="uri">examples/sorting/mergesort.c</a>
+</div>
+<p>The cost of this is pretty cheap: <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span>, since each element of <code class="backtick">a</code> is processed through <code class="backtick">merge</code> once for each array it gets put in, and the recursion only goes <span class="math inline"><em>O</em>(log<em>n</em>)</span> layers deep before we hit 1-element arrays.</p>
+<p>The reason that we can't easily transform this into an iterative version is that the <code>mergeSort</code>
+ function is not tail-recursive: not only does it call itself twice, but
+ it also needs to free the temporary arrays at the end. Because the
+algorithm has to do these tasks on the way back up the stack, we need to
+ keep the stack around to track them.</p>
+<h3 id="asymptotic-complexity-of-recursive-functions"><span class="header-section-number">5.6.5</span> Asymptotic complexity of recursive functions</h3>
+<p>One issue with a recursive functions is that it becomes harder to
+estimate its asymptotic complexity. Unlike loops, where we can estimate
+the cost by simply multiplying the number of iterations by the cost of
+each iteration, the cost of a recursive function depends on the cost of
+its recursive calls. This would make it seem that we would need to be
+able to compute the cost of the function before we could compute the
+cost of the function.</p>
+<p>Fortunately, for most recursive functions, the size of the input
+drops whenever we recurse. So the cost can be expressed in terms of a <strong>recurrence</strong>, a formula for the cost <span class="math inline"><em>T</em>(<em>n</em>)</span> on an input of size <span class="math inline"><em>n</em></span> in terms of the cost on smaller inputs. Some examples:</p>
+<dl>
+<dt><span class="math inline"><em>T</em>(<em>n</em>)=<em>O</em>(1)+<em>T</em>(<em>n</em>/2)</span></dt>
+<dd>This is the cost of binary search. To search an array of <span class="math inline"><em>n</em></span> elements, look up the middle element (<span class="math inline"><em>O</em>(1)</span> time) and, in the worst case, recurse on an array of <span class="math inline"><em>n</em>/2</span> elements.
+</dd>
+<dt><span class="math inline"><em>T</em>(<em>n</em>)=2<em>T</em>(<em>n</em>/2)+<em>O</em>(<em>n</em>)</span></dt>
+<dd>This is the cost of mergesort. Sort two half-size arrays recursively, then merge them in <span class="math inline"><em>O</em>(<em>n</em>)</span> time.
+</dd>
+<dt><span class="math inline"><em>T</em>(<em>n</em>)=<em>O</em>(1)+<em>T</em>(<em>n</em> − 1)</span></dt>
+<dd>This is the cost of most simple loops, if we think of them as a recursive process. Do <span class="math inline"><em>O</em>(1)</span> work on the first element, then do <span class="math inline"><em>T</em>(<em>n</em> − 1)</span> work on the rest.
+</dd>
+</dl>
+<p>There are <a href="http://en.wikipedia.org/wiki/Master_theorem">standard tools</a>
+ for solving many of the recurrences that arise in common algorithms,
+but these are overkill for our purposes, since there are only a handful
+of recurrences that are likely to come up in practice and we already
+solved most of them. Here is a table of some of the more common
+possibilities:</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">Recurrence</th>
+<th align="left">Solution</th>
+<th align="left">Example</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><span class="math inline"><em>T</em>(<em>n</em>)=<em>T</em>(<em>n</em> − 1)+<em>O</em>(1)</span></td>
+<td align="left"><span class="math inline"><em>T</em>(<em>n</em>)=<em>O</em>(<em>n</em>)</span></td>
+<td align="left">Finding a maximum</td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline"><em>T</em>(<em>n</em>)=<em>T</em>(<em>n</em> − 1)+<em>O</em>(<em>n</em>)</span></td>
+<td align="left"><span class="math inline"><em>T</em>(<em>n</em>)=<em>O</em>(<em>n</em><sup>2</sup>)</span></td>
+<td align="left">Selection sort</td>
+</tr>
+<tr class="odd">
+<td align="left"><span class="math inline"><em>T</em>(<em>n</em>)=<em>T</em>(<em>n</em>/2)+<em>O</em>(1)</span></td>
+<td align="left"><span class="math inline"><em>T</em>(<em>n</em>)=<em>O</em>(log<em>n</em>)</span></td>
+<td align="left">Binary search</td>
+</tr>
+<tr class="even">
+<td align="left"><span class="math inline"><em>T</em>(<em>n</em>)=2<em>T</em>(<em>n</em>/2)+<em>O</em>(<em>n</em>)</span></td>
+<td align="left"><span class="math inline"><em>T</em>(<em>n</em>)=<em>O</em>(<em>n</em>log<em>n</em>)</span></td>
+<td align="left">Mergesort</td>
+</tr>
+</tbody>
+</table>
+<h2 id="binaryTrees"><span class="header-section-number">5.7</span> Binary trees</h2>
+<p><a href="#algorithmDesignTechniquesClassification">Divide and conquer</a>
+ yields algorithms whose execution has a tree structure. Sometimes we
+build data structures that are also trees. It is probably not surprising
+ that divide and conquer is the natural way to build algorithms that use
+ such trees as inputs.</p>
+<h3 id="Tree_basics"><span class="header-section-number">5.7.1</span> Tree basics</h3>
+<p>Here is a typical binary tree. It is binary because every node has at most two children. This particular tree is also <strong>complete</strong> because the nodes consist only of <strong>internal nodes</strong> with exactly two children and <strong>leaves</strong> with no children. Not all binary trees will be complete.</p>
+<pre><code> 0
+ / \
+ 1 2
+ / \
+ 3 4
+ / \
+ 5 6
+ / \
+ 7 8</code></pre>
+<p>Structurally, a complete binary tree consists of either a single node (a leaf) or a root node with a left and right <strong>subtree</strong>,
+ each of which is itself either a leaf or a root node with two subtrees.
+ The set of all nodes underneath a particular node x is called the
+subtree rooted at x.</p>
+<p>The <strong>size</strong> of a tree is the number of nodes; a leaf by itself has size 1. The <strong>height</strong> of a tree is the length of the longest path; 0 for a leaf, at least one in any larger tree. The <strong>depth</strong> of a node is the length of the path from the root to that node. The <strong>height</strong>
+ of a node is the height of the subtree of which it is the root, i.e.
+the length of the longest path from that node to some leaf below it. A
+node <span class="math inline"><em>u</em></span> is an <strong>ancestor</strong> of a node <span class="math inline"><em>v</em></span> if <span class="math inline"><em>v</em></span> is contained in the subtree rooted at <span class="math inline"><em>u</em></span>; we may write equivalently that <span class="math inline"><em>v</em></span> is a <strong>descendant</strong> of <span class="math inline"><em>u</em></span>. Note that every node is both and ancestor and descendant of itself; if we wish to exclude the node itself, we refer to a <strong>proper ancestor</strong> or <strong>proper descendant</strong>.</p>
+<h3 id="Binary_tree_implementations"><span class="header-section-number">5.7.2</span> Binary tree implementations</h3>
+<p>In a low-level programming language like C, a binary tree typically
+looks a lot like a linked list with an extra outgoing pointer from each
+element, e.g.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> node {
+ <span class="dt">int</span> key;
+ <span class="kw">struct</span> node *left; <span class="co">/* left child */</span>
+ <span class="kw">struct</span> node *right; <span class="co">/* right child */</span>
+};</code></pre></div>
+<p>Missing children (and the empty tree) are represented by null
+pointers. Typically, individual tree nodes are allocated separately
+using <code class="backtick">malloc</code>; however, for
+high-performance use it is not unusual for tree libraries to do their
+own storage allocation out of large blocks obtained from <code class="backtick">malloc</code>.</p>
+<p>Optionally, the <code class="backtick">struct</code> may be extended to include additional information such as a pointer to the node's parent, hints for <a href="#balancedTrees">balancing</a>,
+ or aggregate information about the subtree rooted at the node such as
+its size or the sum/max/average of the keys of its nodes.</p>
+<p>When it is not important to be able to move large subtrees around
+simply by adjusting pointers, a tree may be represented implicitly by
+packing it into an array. This is a standard approach for implementing <a href="#heaps">heaps</a>, which we will see soon.</p>
+<h3 id="The_canonical_binary_tree_algorithm"><span class="header-section-number">5.7.3</span> The canonical binary tree algorithm</h3>
+<p>Pretty much every <a href="#algorithmDesignTechniquesClassification">divide and conquer</a> algorithm for binary trees looks like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+doSomethingToAllNodes(<span class="kw">struct</span> node *root)
+{
+ <span class="kw">if</span>(root) {
+ doSomethingTo(root);
+ doSomethingToAllNodes(root-&gt;left);
+ doSomethingToAllNodes(root-&gt;right);
+ }
+}</code></pre></div>
+<p>The function processes all nodes in what is called a <strong>preorder traversal</strong>, where the "preorder" part means that the root of any tree is processed first. Moving the call to <code class="backtick">doSomethingTo</code> in between or after the two recursive calls yields an <strong>inorder</strong> or <strong>postorder</strong> traversal, respectively.</p>
+<p>In practice we usually want to extract some information from the tree. For example, this function computes the size of a tree:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+treeSize(<span class="kw">struct</span> node *root)
+{
+ <span class="kw">if</span>(root == <span class="dv">0</span>) {
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> <span class="dv">1</span> + treeSize(root-&gt;left) + treeSize(root-&gt;right);
+ }
+}</code></pre></div>
+<p>and this function computes the height:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+treeHeight(<span class="kw">struct</span> node *root)
+{
+ <span class="dt">int</span> lh; <span class="co">/* height of left subtree */</span>
+ <span class="dt">int</span> rh; <span class="co">/* height of right subtree */</span>
+
+ <span class="kw">if</span>(root == <span class="dv">0</span>) {
+ <span class="kw">return</span> -<span class="dv">1</span>;
+ } <span class="kw">else</span> {
+ lh = treeHeight(root-&gt;left);
+ rh = treeHeight(root-&gt;right);
+ <span class="kw">return</span> <span class="dv">1</span> + (lh &gt; rh ? lh : rh);
+ }
+}</code></pre></div>
+<p>Since both of these algorithms have the same structure, they both
+have the same asymptotic running time. We can compute this running time
+by observing that each recursive call to <code>treeSize</code> or <code>treeHeight</code> that does not get a null pointer passed to it gets a different node (so there are <span class="math inline"><em>n</em></span>
+ such calls), and each call that does get a null pointer passed to it is
+ called by a routine that doesn't, and that there are at most two such
+calls per node. Since the body of each call itself costs <span class="math inline"><em>O</em>(1)</span> (no loops), this gives a total cost of <span class="math inline"><em>Θ</em>(<em>n</em>)</span>.</p>
+<p>So these are all <span class="math inline"><em>Θ</em>(<em>n</em>)</span> algorithms.</p>
+<h3 id="Nodes_vs_leaves"><span class="header-section-number">5.7.4</span> Nodes vs leaves</h3>
+<p>For some binary trees we don't store anything interesting in the
+internal nodes, using them only to provide a route to the leaves. We
+might reasonably ask if an algorithm that runs in <span class="math inline"><em>O</em>(<em>n</em>)</span> time where <span class="math inline"><em>n</em></span> is the total number of nodes still runs in <span class="math inline"><em>O</em>(<em>m</em>)</span> time, where <span class="math inline"><em>m</em></span> counts only the leaves. For <em>complete</em>
+ binary trees, we can show that we get the same asymptotic performance
+whether we count leaves only, internal nodes only, or both leaves and
+internal nodes.</p>
+<p>Let <span class="math inline"><em>T</em>(<em>n</em>)</span> be the number of internal nodes in a complete binary tree with <span class="math inline"><em>n</em></span> leaves. It is easy to see that <span class="math inline"><em>T</em>(1)=0</span> and <span class="math inline"><em>T</em>(2)=1</span>, but for larger trees there are multiple structures and so it makes sense to write a recurrence: <span class="math inline"><em>T</em>(<em>n</em>)=1 + <em>T</em>(<em>k</em>)+<em>T</em>(<em>n</em> − <em>k</em>)</span>.</p>
+<p>We can show by induction that the solution to this recurrence is exactly <span class="math inline"><em>T</em>(<em>n</em>)=<em>n</em> − 1</span>. We already have the base case <span class="math inline"><em>T</em>(1)=0</span>. For larger <span class="math inline"><em>n</em></span>, we have <span class="math inline"><em>T</em>(<em>n</em>)=1 + <em>T</em>(<em>k</em>)+<em>T</em>(<em>n</em> − <em>k</em>)=1 + (<em>k</em> − 1)+(<em>n</em> − <em>k</em> − 1)=<em>n</em> − 1</span>.</p>
+<p>So a complete binary tree with <span class="math inline"><em>Θ</em>(<em>n</em>)</span> nodes has <span class="math inline"><em>Θ</em>(<em>n</em>)</span> internal nodes and <span class="math inline"><em>Θ</em>(<em>n</em>)</span> leaves; if we don't care about constant factors, we won't care which number we use.</p>
+<h3 id="Special_classes_of_binary_trees"><span class="header-section-number">5.7.5</span> Special classes of binary trees</h3>
+<p>So far we haven't specified where particular nodes are placed in the
+binary tree. Most applications of binary trees put some constraints on
+how nodes relate to one another. Some possibilities:</p>
+<ul>
+<li><a href="#heaps">Heaps</a>: Each node has a key that is less than
+the keys of both of its children. These allow for a very simple
+implementation using arrays, so we will look at these first.</li>
+<li><a href="#binarySearchTrees">BinarySearchTrees</a>: Each node has a
+key, and a node's key must be greater than all keys in the subtree of
+its left-hand child and less than all keys in the subtree of its
+right-hand child.</li>
+</ul>
+<h2 id="heaps"><span class="header-section-number">5.8</span> Heaps</h2>
+<p>A <strong>heap</strong> is a <a href="#binaryTrees">binary tree</a> in which each element has a key (or sometimes <strong>priority</strong>) that is less than the keys of its children. Heaps are used to implement the <strong>priority queue</strong> <a href="#abstractDataTypes">abstract data type</a>, which we'll talk about first.</p>
+<h3 id="priorityQueues"><span class="header-section-number">5.8.1</span> Priority queues</h3>
+<p>In a standard queue, elements leave the queue in the same order as
+they arrive. In a priority queue, elements leave the queue in order of
+decreasing priority: the DEQUEUE operation becomes a DELETE-MIN
+operation (or DELETE-MAX, if higher numbers mean higher priority), which
+ removes and returns the highest-priority element of the priority queue,
+ regardless of when it was inserted. Priority queues are often used in
+operating system schedulers to determine which job to run next: a
+high-priority job (e.g., turn on the fire suppression system) runs
+before a low-priority job (floss the cat) even if the low-priority job
+has been waiting longer.</p>
+<h3 id="Expensive_implementations_of_priority_queues"><span class="header-section-number">5.8.2</span> Expensive implementations of priority queues</h3>
+<p>Implementing a priority queue using an array or linked list is likely
+ to be expensive. If the array or list is unsorted, it takes <span class="math inline"><em>O</em>(<em>n</em>)</span> time to find the minimum element; if it is sorted, it takes <span class="math inline"><em>O</em>(<em>n</em>)</span>
+ time (in the worst case) to add a new element. So such implementations
+are only useful when the numbers of INSERT and DELETE-MIN operations are
+ very different. For example, if DELETE-MIN is called only rarely but
+INSERT is called often, it may actually be cheapest to implement a
+priority queue as an unsorted linked list with <span class="math inline"><em>O</em>(1)</span> INSERTs and <span class="math inline"><em>O</em>(<em>n</em>)</span>
+ DELETE-MINs. But if we expect that every element that is inserted is
+eventually removed, we want something for which both INSERT and
+DELETE-MIN are cheap operations.</p>
+<h3 id="heapStructure"><span class="header-section-number">5.8.3</span> Structure of a heap</h3>
+<p>A heap is a binary tree in which each node has a smaller key than its children; this property is called the <strong>heap property</strong> or <strong>heap invariant</strong>.</p>
+<p>To insert a node in the heap, we add it as a new leaf, which may
+violate the heap property if the new node has a lower key than its
+parent. But we can restore the heap property (at least between this node
+ and its parent) by swapping either the new node or its sibling with the
+ parent, where in either case we move up the node with the smaller key.
+This may still leave a violation of the heap property one level up in
+the tree, but by continuing to swap small nodes with their parents we
+eventually reach the top and have a heap again. The time to complete
+this operation is proportional to the depth of the heap, which will
+typically be <span class="math inline"><em>O</em>(log<em>n</em>)</span> (we will see how to enforce this in a moment).</p>
+<p>To implement DELETE-MIN, we can easily find the value to return at
+the top of the heap. Unfortunately, removing it leaves a vacuum that
+must be filled in by some other element. The easiest way to do this is
+to grab a leaf (which probably has a very high key), and then float it
+down to where it belongs by swapping it with its smaller child at each
+iteration. After time proportional to the depth (again <span class="math inline"><em>O</em>(log<em>n</em>)</span> if we are doing things right), the heap invariant is restored.</p>
+<p>Similar local swapping can be used to restore the heap invariant if
+the priority of some element in the middle changes; we will not discuss
+this in detail.</p>
+<h3 id="Packed_heaps"><span class="header-section-number">5.8.4</span> Packed heaps</h3>
+<p>It is possible to build a heap using <code class="backtick">struct</code>s
+ and pointers, where each element points to its parent and children. In
+practice, heaps are instead stored in arrays, with an implicit pointer
+structure determined by array indices. For zero-based arrays as in C,
+the rule is that a node at position <code class="backtick">i</code> has children at positions <code class="backtick">2*i+1</code> and <code class="backtick">2*i+2</code>; in the other direction, a node at position <code class="backtick">i</code> has a parent at position <code class="backtick">(i-1)/2</code> (which rounds down in C). This is equivalent to storing a heap in an array by reading through the tree in <a href="#graphSearch">breadth-first search</a> order:</p>
+<pre><code> 0
+ / \
+ 1 2
+/ \ / \
+3 4 5 6</code></pre>
+<p>becomes</p>
+<pre><code>0 1 2 3 4 5 6</code></pre>
+<p>This approach works best if there are no gaps in the array. So to
+maximize efficiency we make this "no gaps" policy part of the invariant.
+ We can do so because we don't care which leaf gets added when we do an
+INSERT, so we can make it be the position at the end of the array.
+Similarly, in a DELETE-MIN operation we can promote the last element to
+the root before floating it down. Both these operations change the
+number of elements in the array, and INSERTs in particular might force
+us to reallocate eventually. So in the worst case INSERT can be an
+expensive operation, although as with growing hash tables, the amortized
+ cost may still be small.</p>
+<h3 id="Bottom-up_heapification"><span class="header-section-number">5.8.5</span> Bottom-up heapification</h3>
+<p>If we are presented with an unsorted array, we can turn it into a heap more quickly than the <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span> time required to do <span class="math inline"><em>n</em></span> INSERTs. The trick is to build the heap from the bottom up (i.e. starting with position <span class="math inline"><em>n</em> − 1</span> and working back to position <span class="math inline">0</span>, so that when it comes time to fix the heap invariant at position <span class="math inline"><em>i</em></span> we have already fixed it at all later positions (this is a form of <a href="#dynamicProgramming">dynamic programming</a>). Unfortunately, it is not quite enough simply to swap <code class="backtick">a[i]</code> with its smaller child when we get there, because we could find that <code class="backtick">a[0]</code> (say) was the largest element in the heap. But the cost of floating <code class="backtick">a[i]</code>
+ down to its proper place will be proportional to its own height rather
+than the height of the entire heap. Since most of the elements of the
+heap are close to the bottom, the total cost will turn out to be <span class="math inline"><em>O</em>(<em>n</em>)</span>.</p>
+<h3 id="heapSort"><span class="header-section-number">5.8.6</span> Heapsort</h3>
+<p>Bottom-up heapification is used in the Heapsort algorithm, which first does bottom-up heapification in <span class="math inline"><em>O</em>(<em>n</em>)</span> time and then repeatedly calls DELETE-MAX to extract the largest remaining element. This is no faster than the <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span> cost of <a href="#mergesort">mergesort</a> or <a href="#quicksort">quicksort</a>
+ in typical use, but requires very little auxiliary storage since we can
+ maintain the heap in the bottom part of the same array whose top part
+stores the max elements extracted so far.</p>
+<p>Here is a simple implementation of heapsort, that demonstrates how
+both bottom-up heapification and the DELETE-MAX procedure work by
+floating elements down to their proper places:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="co">/* max heap implementation */</span>
+
+<span class="co">/* compute child 0 or 1 */</span>
+<span class="ot">#define Child(x, dir) (2*(x)+1+(dir))</span>
+
+<span class="co">/* float value at position pos down */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+floatDown(<span class="dt">int</span> n, <span class="dt">int</span> *a, <span class="dt">int</span> pos)
+{
+ <span class="dt">int</span> x;
+
+ <span class="co">/* save original value once */</span>
+ x = a[pos];
+
+ <span class="kw">for</span>(;;) {
+ <span class="kw">if</span>(Child(pos, <span class="dv">1</span>) &lt; n &amp;&amp; a[Child(pos, <span class="dv">1</span>)] &gt; a[Child(pos, <span class="dv">0</span>)]) {
+ <span class="co">/* maybe swap with Child(pos, 1) */</span>
+ <span class="kw">if</span>(a[Child(pos, <span class="dv">1</span>)] &gt; x) {
+ a[pos] = a[Child(pos, <span class="dv">1</span>)];
+ pos = Child(pos, <span class="dv">1</span>);
+ } <span class="kw">else</span> {
+ <span class="co">/* x is bigger than both kids */</span>
+ <span class="kw">break</span>;
+ }
+ } <span class="kw">else</span> <span class="kw">if</span>(Child(pos, <span class="dv">0</span>) &lt; n &amp;&amp; a[Child(pos, <span class="dv">0</span>)] &gt; x) {
+ <span class="co">/* swap with Child(pos, 0) */</span>
+ a[pos] = a[Child(pos, <span class="dv">0</span>)];
+ pos = Child(pos, <span class="dv">0</span>);
+ } <span class="kw">else</span> {
+ <span class="co">/* done */</span>
+ <span class="kw">break</span>;
+ }
+ }
+
+ a[pos] = x;
+}
+
+<span class="co">/* construct a heap bottom-up */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+heapify(<span class="dt">int</span> n, <span class="dt">int</span> *a)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = n<span class="dv">-1</span>; i &gt;= <span class="dv">0</span>; i--) {
+ floatDown(n, a, i);
+ }
+}
+
+<span class="co">/* sort an array */</span>
+<span class="dt">void</span>
+heapSort(<span class="dt">int</span> n, <span class="dt">int</span> *a)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> tmp;
+
+ heapify(n, a);
+
+ <span class="kw">for</span>(i = n<span class="dv">-1</span>; i &gt; <span class="dv">0</span>; i--) {
+ <span class="co">/* swap max to a[i] */</span>
+ tmp = a[<span class="dv">0</span>];
+ a[<span class="dv">0</span>] = a[i];
+ a[i] = tmp;
+
+ <span class="co">/* float new a[0] down */</span>
+ floatDown(i, a, <span class="dv">0</span>);
+ }
+}
+
+<span class="ot">#define N (100)</span>
+<span class="ot">#define MULTIPLIER (17)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> a[N];
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(argc != <span class="dv">1</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) { a[i] = (i*MULTIPLIER) % N; }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) { printf(<span class="st">"%d "</span>, a[i]); }
+ putchar(<span class="ch">'\n'</span>);
+
+ heapSort(N, a);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) { printf(<span class="st">"%d "</span>, a[i]); }
+ putchar(<span class="ch">'\n'</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sorting/heapsort.c" class="uri">examples/sorting/heapsort.c</a>
+</div>
+<h3 id="heapMoreInformation"><span class="header-section-number">5.8.7</span> More information</h3>
+<ul>
+<li><a href="http://en.wikipedia.org/wiki/Priority_queue" title="WikiPedia">Priority_queue</a></li>
+<li><a href="http://en.wikipedia.org/wiki/Binary_heap" title="WikiPedia">Binary_heap</a></li>
+<li><a href="http://mathworld.wolfram.com/Heap.html" class="uri">http://mathworld.wolfram.com/Heap.html</a></li>
+</ul>
+<h2 id="binarySearchTrees"><span class="header-section-number">5.9</span> Binary search trees</h2>
+<p>A <strong>binary search tree</strong> is a <a href="#binaryTrees">binary tree</a> in which each node has a <strong>key</strong>,
+ and a node's key must be greater than all keys in the subtree of its
+left-hand child and less than all keys in the subtree of its right-hand
+child. This allows a node to be searched for using essentially the same
+binary search algorithm used on sorted arrays.</p>
+<h3 id="Searching_for_a_node"><span class="header-section-number">5.9.1</span> Searching for a node</h3>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* returns node with given target key */</span>
+<span class="co">/* or null if no such node exists */</span>
+<span class="kw">struct</span> node *
+treeSearch(<span class="kw">struct</span> node *root, <span class="dt">int</span> target)
+{
+ <span class="kw">if</span>(root-&gt;key == target) {
+ <span class="kw">return</span> root;
+ } <span class="kw">else</span> <span class="kw">if</span>(root-&gt;key &gt; target) {
+ <span class="kw">return</span> treeSearch(root-&gt;left, target);
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> treeSearch(root-&gt;right, target);
+ }
+}</code></pre></div>
+<p>This procedure can be rewritten iteratively, which avoids stack overflow and is likely to be faster:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> node *
+treeSearch(<span class="kw">struct</span> node *root, <span class="dt">int</span> target)
+{
+ <span class="kw">while</span>(root != <span class="dv">0</span> &amp;&amp; root-&gt;key != target) {
+ <span class="kw">if</span>(root-&gt;key &gt; target) {
+ root = root-&gt;left;
+ } <span class="kw">else</span> {
+ root = root-&gt;right;
+ }
+ }
+
+ <span class="kw">return</span> root;
+}</code></pre></div>
+<p>These procedures can be modified in the obvious way to deal with keys that aren't <code class="backtick">int</code>s, as long as they can be compared (e.g., by using <code class="backtick">strcmp</code> on strings).</p>
+<h3 id="Inserting_a_new_node"><span class="header-section-number">5.9.2</span> Inserting a new node</h3>
+<p>As in a <a href="#hashTables">hash table</a>, the insertion procedure
+ mirrors the search procedure. We must be a little careful to avoid
+actually walking all the way down to a null pointer, since a null
+pointer now indicates a missing space for a leaf that we can fill with
+our new node. So the code is a little more complex.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span>
+treeInsert(<span class="kw">struct</span> tree *root, <span class="dt">int</span> key)
+{
+ <span class="kw">struct</span> tree *newNode;
+
+ newNode = malloc(<span class="kw">sizeof</span>(*newNode));
+ assert(newNode);
+
+ newNode-&gt;key = key;
+ newNode-&gt;left = <span class="dv">0</span>;
+ newNode-&gt;right = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(;;) {
+ <span class="kw">if</span>(root-&gt;key &gt; key) {
+ <span class="co">/* try left child */</span>
+ <span class="kw">if</span>(root-&gt;left) {
+ root = root-&gt;left;
+ } <span class="kw">else</span> {
+ <span class="co">/* put it in */</span>
+ root-&gt;left = newNode;
+ <span class="kw">return</span>;
+ }
+ } <span class="kw">else</span> {
+ <span class="co">/* right child case is symmetric */</span>
+ <span class="kw">if</span>(root-&gt;right) {
+ root = root-&gt;right;
+ } <span class="kw">else</span> {
+ <span class="co">/* put it in */</span>
+ root-&gt;right = newNode;
+ <span class="kw">return</span>;
+ }
+ }
+ }
+}</code></pre></div>
+<p>Note that this code happily inserts duplicate keys. It also makes no
+attempt to keep the tree balanced. This may lead to very long paths if
+new keys are inserted in strictly increasing or strictly decreasing
+order.</p>
+<h3 id="deleting-a-node"><span class="header-section-number">5.9.3</span> Deleting a node</h3>
+<p>Deletion is more complicated. If a node has no children, we can just
+remove it, and the rest of the tree stays the same. A node with one
+child can be spliced out, connecting its parent directly to its child.
+But with two children, we can't do this.</p>
+<p>The trick is to find the leftmost node in our target's right subtree
+(or vice versa). This node exists assuming the target has two children.
+As in a hash table, we can then swap our target node with this more
+convenient node. Because it is the leftmost node, it has no left child,
+so we can delete it using the no-children or one-child case.</p>
+<h3 id="Costs"><span class="header-section-number">5.9.4</span> Costs</h3>
+<p>Searching for or inserting a node in a binary search tree with <span class="math inline"><em>n</em></span> nodes takes time proportional to the depth of the node. In <a href="#balancedTrees">balanced trees</a>, where the nodes in each subtree are divided roughly evenly between the two child subtrees, this will be <span class="math inline"><em>O</em>(log<em>n</em>)</span>, but for a badly unbalanced tree, this might be as much as <span class="math inline"><em>O</em>(<em>n</em>)</span>. So making a binary search tree work efficiently requires keeping it balanced.</p>
+<h2 id="augmentedTrees"><span class="header-section-number">5.10</span> Augmented trees</h2>
+<p>An <strong>augmented</strong> data structure stores additional
+information in each of its nodes that caches values that might otherwise
+ be expensive to compute. For trees, this might include information like
+ the size of a subtree (which can be useful for <strong>ranking</strong>
+ values, where we want to determine how many elements of the tree are
+smaller), the height of a subtree, or other summary information like the
+ sum of all the keys in a subtree.</p>
+<p>Augmented data structures, in a sense, violate the
+no-separate-but-equal rule that says we shouldn't store the same
+information in different places. The reason we try to avoid this is that
+ it's trouble if the two copies diverge, and by not having two copies in
+ the first place there is no possibility that they contradict each
+other. But in this case the reduced cost justifies breaking this rule.</p>
+<p>The idea is that when we insert a new element into an augmented tree,
+ it only changes the height/size/sum/etc. values for nodes on the path
+from the root to the new value. Since each of these aggregate values can
+ be computed for a node in <span class="math inline"><em>O</em>(1)</span>
+ time from the values in its children, we can update all the aggregate
+values on our way back up the stack after doing the insertion at a cost
+of <span class="math inline"><em>O</em>(1)</span> per node. This will give a total cost of <span class="math inline"><em>O</em>(log<em>n</em>)</span> assuming our tree is reasonably balanced.</p>
+<h3 id="applications"><span class="header-section-number">5.10.1</span> Applications</h3>
+<p>Storing the height field can be useful for balancing, as in <a href="#avlTrees">AVL trees</a>.</p>
+<p>Storing the size allows ranking (computing the number of elements
+less than a given target value) and unraking (find an element with a
+particular rank). Sample code for doing this is given in the <a href="#avlTreeImplementation">AVL tree sample implementation</a> below.</p>
+<p>Storing other aggregates like the sum of keys or values allows <strong>range queries</strong>,
+ where we ask, for example, for some aggregate statistic (like the sum
+or average) of all the elements between some goven minimum and maximum.</p>
+<p>Assuming we keep the tree balanced and correctly maintain the
+aggregate data or each subtree, all of these operations can be done in <span class="math inline"><em>O</em>(log<em>n</em>)</span> time.</p>
+<h2 id="balancedTrees"><span class="header-section-number">5.11</span> Balanced trees</h2>
+<p><a href="#binarySearchTrees">Binary search trees</a> are a fine idea, but they only work if they are <strong>balanced</strong>—if
+ moving from a tree to its left or right subtree reduces the size by a
+constant fraction. Balanced binary trees add some extra mechanism to the
+ basic binary search tree to ensure balance. Finding efficient ways to
+balance a tree has been studied for decades, and several good mechanisms
+ are known. We'll try to hit the high points of all of them.</p>
+<h3 id="treeRotations"><span class="header-section-number">5.11.1</span> Tree rotations</h3>
+<p>The problem is that as we insert new nodes, some paths through the
+tree may become very long. So we need to be able to shrink the long
+paths by moving nodes elsewhere in the tree.</p>
+<p>But how do we do this? The idea is to notice that there may be many
+binary search trees that contain the same data, and that we can
+transform one into another by a local modification called a <em>rotation</em>:</p>
+<pre><code> y x
+ / \ &lt;==&gt; / \
+ x C A y
+ / \ / \
+A B B C
+
+Single rotation on x-y edge</code></pre>
+<p>If <span class="math inline"><em>A</em> &lt; <em>x</em> &lt; <em>B</em> &lt; <em>y</em> &lt; <em>C</em></span>, then both versions of this tree have the binary search tree property. By doing the rotation in one direction, we move <span class="math inline"><em>A</em></span> up and <span class="math inline"><em>C</em></span> down; in the other direction, we move <span class="math inline"><em>A</em></span> down and <span class="math inline"><em>C</em></span> up. So rotations can be used to transfer depth from the leftmost grandchild of a node to the rightmost and vice versa.</p>
+<p>But what if it's the middle grandchild <span class="math inline"><em>B</em></span> that's the problem? A single rotation as above doesn't move <span class="math inline"><em>B</em></span> up or down. To move <span class="math inline"><em>B</em></span>, we have to reposition it so that it's on the end of something. We do this by splitting <span class="math inline"><em>B</em></span> into two subtrees <span class="math inline"><em>B</em><sub>1</sub></span> and <span class="math inline"><em>B</em><sub>2</sub></span>, and doing two rotations that split the two subtrees while moving both up. For this we need to do two rotations:</p>
+<pre><code> z z y
+ / \ ===&gt; / \ ===&gt; / \
+ x C y C x z
+ / \ / \ /| |\
+A y x B2 A B1 B2 C
+ / \ / \
+ B1 B2 A B1
+
+Double rotation: rotate xy then zy</code></pre>
+<h3 id="AVLtrees"><span class="header-section-number">5.11.2</span> AVL trees</h3>
+<p>Rotations in principle let us rebalance a tree, but we still need to
+decide when to do them. If we try to keep the tree in perfect balance
+(all paths nearly the same length), we'll spend so much time rotating
+that we won't be able to do anything else.</p>
+<p>AVL trees solve this problem by enforcing the invariant that the
+heights of the two subtrees sitting under each node differ by at most
+one. This does not guarantee perfect balance, but it does get close. Let
+ <span class="math inline"><em>S</em>(<em>k</em>)</span> be the size of the smallest AVL tree with height <span class="math inline"><em>k</em></span>. This tree will have at least one subtree of height <span class="math inline"><em>k</em> − 1</span>, but its other subtree can be of height <span class="math inline"><em>k</em> − 2</span> (and should be, to keep it as small as possible). We thus have the recurrence <span class="math inline"><em>S</em>(<em>k</em>)=1 + <em>S</em>(<em>k</em> − 1)+<em>S</em>(<em>k</em> − 2)</span>, which is very close to the Fibonacci recurrence.</p>
+<p>It is possible to solve this exactly using generating functions. But we can get close by guessing that <span class="math inline"><em>S</em>(<em>k</em>)≥<em>a</em><sup><em>k</em></sup></span> for some constant <span class="math inline"><em>a</em></span>. This clearly works for <span class="math inline"><em>S</em>(0)=<em>a</em><sup>0</sup> = 1</span>. For larger <span class="math inline"><em>k</em></span>, compute</p>
+<ul>
+<li><span class="math inline"><em>S</em>(<em>k</em>)=1 + <em>a</em><sup><em>k</em> − 1</sup> + <em>a</em><sup><em>k</em> − 2</sup> = 1 + <em>a</em><sup><em>k</em></sup>(1/<em>a</em> + 1/<em>a</em><sup>2</sup>)&gt;<em>a</em><sup><em>k</em></sup>(1/<em>a</em> + 1/<em>a</em><sup>2</sup>)</span>.</li>
+</ul>
+<p>This last quantity is at least <span class="math inline"><em>a</em><sup><em>k</em></sup></span> provided <span class="math inline">(1/<em>a</em> + 1/<em>a</em><sup>2</sup>)</span> is at least 1. We can solve exactly for the largest <span class="math inline"><em>a</em></span> that makes this work, but a very quick calculation shows that <span class="math inline"><em>a</em> = 3/2</span> works: <span class="math inline">2/3 + 4/9 = 10/9 &gt; 1</span>. It follows that any AVL tree with height <span class="math inline"><em>k</em></span> has at least <span class="math inline">(3/2)<sup><em>k</em></sup></span> nodes, or conversely that any AVL tree with <span class="math inline">(3/2)<sup><em>k</em></sup></span> nodes has height at most <span class="math inline"><em>k</em></span>. So the height of an arbitrary AVL tree with <span class="math inline"><em>n</em></span> nodes is no greater than <span class="math inline">log<sub>3/2</sub><em>n</em> = <em>O</em>(log<em>n</em>)</span>.</p>
+<p>How do we maintain this invariant? The first thing to do is add extra
+ information to the tree, so that we can tell when the invariant has
+been violated. AVL trees store in each node the difference between the
+heights of its left and right subtrees, which will be one of <span class="math inline">−1</span>, <span class="math inline">0</span>, or <span class="math inline">1</span>. In an ideal world this would require <span class="math inline">log<sub>2</sub>3 ≈ 1.58</span>
+ bits per node, but since fractional bits are difficult to represent on
+modern computers a typical implementation uses two bits. Inserting a new
+ node into an AVL tree involves</p>
+<ol style="list-style-type: decimal">
+<li>Doing a standard binary search tree insertion.</li>
+<li>Updating the balance fields for every node on the insertion path.</li>
+<li>Performing a single or double rotation to restore balance if needed.</li>
+</ol>
+<p>Implementing this correctly is tricky. Intuitively, we can imagine a
+version of an AVL tree in which we stored the height of each node (using
+ <span class="math inline"><em>O</em>(loglog<em>n</em>)</span> bits). When we insert a new node, only the heights of its ancestors change—so step 2 requires updating <span class="math inline"><em>O</em>(log<em>n</em>)</span>
+ height fields. Similarly, it is only these ancestors that can be too
+tall. It turns out that fixing the closest ancestor fixes all the ones
+above it (because it shortens their longest paths by one as well). So
+just one single or double rotation restores balance.</p>
+<p>Deletions are also possible, but are uglier: a deletion in an AVL tree may require as many as <span class="math inline"><em>O</em>(log<em>n</em>)</span> rotations. The basic idea is to use the standard <a href="#binarySearchTrees">binary search tree</a>
+ deletion trick of either splicing out a node if it has no right child,
+or replacing it with the minimum value in its right subtree (the node
+for which is spliced out); we then have to check to see if we need to
+rebalance at every node above whatever node we removed.</p>
+<p>Which rotations we need to do to rebalance depends on how some pair
+of siblings are unbalanced. Below, we show the possible cases.</p>
+<p>Zig-zig case: This can occur after inserting in A or deleting in C. Here we rotate A up:</p>
+<pre><code> y x
+ / \ ===&gt; / \
+ x C A y
+ / \ | / \
+A B # B C
+|
+#</code></pre>
+<p>Zig-zag case: This can occur after inserting in B or deleting in C. This requires a double rotation.</p>
+<pre><code> z z y
+ / \ ===&gt; / \ ===&gt; / \
+ x C y C x z
+ / \ / \ /| |\
+A y x B2 A B1 B2 C
+ / \ / \
+ B1 B2 A B1</code></pre>
+<p>Zig-zag case, again: This last case comes up after deletion if both
+nephews of the short node are too tall. The same double rotation we used
+ in the previous case works here, too. Note that one of the subtrees is
+still one taller than the others, but that's OK.</p>
+<pre><code> z z y
+ / \ ===&gt; / \ ===&gt; / \
+ x C y C x z
+ / \ / \ /| |\
+A y x B2 A B1 B2 C
+| / \ / \ |
+# B1 B2 A B1 #
+ |
+ #</code></pre>
+<h4 id="avlTreeImplementation"><span class="header-section-number">5.11.2.1</span> Sample implementation</h4>
+<p>If we are not fanatical about space optimization, we can just keep
+track of the heights of all nodes explicitly, instead of managing the <span class="math inline">−1, 0, 1</span> balance values. Below, we give a not-very-optimized example implementation that uses this approach to store a set of <code>int</code>s.
+ This is pretty much our standard unbalanced BST (although we have to
+make sure that the insert and delete routines are recursive, so that we
+can fix things up on the way back out), with a layer on top, implemented
+ in the <code>treeFix</code> function, that tracks the height and size
+of each subtree (although we don't use size), and another layer on top
+of that, implemented in the <code>treeBalance</code> function, that fixes any violations of the AVL balance rule.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Basic binary search tree data structure without balancing info.</span>
+<span class="co"> *</span>
+<span class="co"> * Convention: </span>
+<span class="co"> *</span>
+<span class="co"> * Operations that update a tree are passed a struct tree **,</span>
+<span class="co"> * so they can replace the argument with the return value.</span>
+<span class="co"> *</span>
+<span class="co"> * Operations that do not update the tree get a const struct tree *.</span>
+<span class="co"> */</span>
+
+<span class="ot">#define LEFT (0)</span>
+<span class="ot">#define RIGHT (1)</span>
+<span class="ot">#define TREE_NUM_CHILDREN (2)</span>
+
+<span class="kw">struct</span> tree {
+ <span class="co">/* we'll make this an array so that we can make some operations symmetric */</span>
+ <span class="kw">struct</span> tree *child[TREE_NUM_CHILDREN];
+ <span class="dt">int</span> key;
+ <span class="dt">int</span> height; <span class="co">/* height of this node */</span>
+ size_t size; <span class="co">/* size of subtree rooted at this node */</span>
+};
+
+<span class="ot">#define TREE_EMPTY (0)</span>
+<span class="ot">#define TREE_EMPTY_HEIGHT (-1)</span>
+
+<span class="co">/* free all elements of a tree, replacing it with TREE_EMPTY */</span>
+<span class="dt">void</span> treeDestroy(<span class="kw">struct</span> tree **root);
+
+<span class="co">/* insert an element into a tree pointed to by root */</span>
+<span class="dt">void</span> treeInsert(<span class="kw">struct</span> tree **root, <span class="dt">int</span> newElement);
+
+<span class="co">/* return 1 if target is in tree, 0 otherwise */</span>
+<span class="co">/* we allow root to be modified to allow for self-balancing trees */</span>
+<span class="dt">int</span> treeContains(<span class="dt">const</span> <span class="kw">struct</span> tree *root, <span class="dt">int</span> target);
+
+<span class="co">/* delete minimum element from the tree and return its key */</span>
+<span class="co">/* do not call this on an empty tree */</span>
+<span class="dt">int</span> treeDeleteMin(<span class="kw">struct</span> tree **root);
+
+<span class="co">/* delete target from the tree */</span>
+<span class="co">/* has no effect if target is not in tree */</span>
+<span class="dt">void</span> treeDelete(<span class="kw">struct</span> tree **root, <span class="dt">int</span> target);
+
+<span class="co">/* return height of tree */</span>
+<span class="dt">int</span> treeHeight(<span class="dt">const</span> <span class="kw">struct</span> tree *root);
+
+<span class="co">/* return size of tree */</span>
+size_t treeSize(<span class="dt">const</span> <span class="kw">struct</span> tree *root);
+
+<span class="co">/* pretty-print the contents of a tree */</span>
+<span class="dt">void</span> treePrint(<span class="dt">const</span> <span class="kw">struct</span> tree *root);
+
+<span class="co">/* return the number of elements in tree less than target */</span>
+size_t treeRank(<span class="dt">const</span> <span class="kw">struct</span> tree *root, <span class="dt">int</span> target);
+
+<span class="co">/* return an element with the given rank */</span>
+<span class="co">/* rank must be less than treeSize(root) */</span>
+<span class="dt">int</span> treeUnrank(<span class="dt">const</span> <span class="kw">struct</span> tree *root, size_t rank);
+
+<span class="co">/* check that aggregate data is correct throughout the tree */</span>
+<span class="dt">void</span> treeSanityCheck(<span class="dt">const</span> <span class="kw">struct</span> tree *root);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/AVL/tree.h" class="uri">examples/trees/AVL/tree.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;stdint.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#include "tree.h"</span>
+
+<span class="dt">int</span>
+treeHeight(<span class="dt">const</span> <span class="kw">struct</span> tree *root)
+{
+ <span class="kw">if</span>(root == <span class="dv">0</span>) {
+ <span class="kw">return</span> TREE_EMPTY_HEIGHT;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> root-&gt;height;
+ }
+}
+
+<span class="co">/* recompute height from height of kids */</span>
+<span class="dt">static</span> <span class="dt">int</span>
+treeComputeHeight(<span class="dt">const</span> <span class="kw">struct</span> tree *root)
+{
+ <span class="dt">int</span> childHeight;
+ <span class="dt">int</span> maxChildHeight;
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(root == <span class="dv">0</span>) {
+ <span class="kw">return</span> TREE_EMPTY_HEIGHT;
+ } <span class="kw">else</span> {
+ maxChildHeight = TREE_EMPTY_HEIGHT;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; TREE_NUM_CHILDREN; i++) {
+ childHeight = treeHeight(root-&gt;child[i]);
+ <span class="kw">if</span>(childHeight &gt; maxChildHeight) {
+ maxChildHeight = childHeight;
+ }
+ }
+
+ <span class="kw">return</span> maxChildHeight + <span class="dv">1</span>;
+ }
+}
+
+size_t
+treeSize(<span class="dt">const</span> <span class="kw">struct</span> tree *root)
+{
+ <span class="kw">if</span>(root == <span class="dv">0</span>) {
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> root-&gt;size;
+ }
+}
+
+<span class="co">/* recompute size from size of kids */</span>
+<span class="dt">static</span> <span class="dt">int</span>
+treeComputeSize(<span class="dt">const</span> <span class="kw">struct</span> tree *root)
+{
+ <span class="dt">int</span> size;
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(root == <span class="dv">0</span>) {
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ size = <span class="dv">1</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; TREE_NUM_CHILDREN; i++) {
+ size += treeSize(root-&gt;child[i]);
+ }
+
+ <span class="kw">return</span> size;
+ }
+}
+
+<span class="co">/* fix aggregate data in root */</span>
+<span class="co">/* assumes children are correct */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+treeAggregateFix(<span class="kw">struct</span> tree *root)
+{
+ <span class="kw">if</span>(root) {
+ root-&gt;height = treeComputeHeight(root);
+ root-&gt;size = treeComputeSize(root);
+ }
+}
+
+<span class="co">/* rotate child in given direction to root */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+treeRotate(<span class="kw">struct</span> tree **root, <span class="dt">int</span> direction)
+{
+ <span class="kw">struct</span> tree *x;
+ <span class="kw">struct</span> tree *y;
+ <span class="kw">struct</span> tree *b;
+
+ <span class="co">/*</span>
+<span class="co"> * y x </span>
+<span class="co"> * / \ / \</span>
+<span class="co"> * x C &lt;=&gt; A y</span>
+<span class="co"> * / \ / \</span>
+<span class="co"> * A B B C</span>
+<span class="co"> */</span>
+
+ y = *root; assert(y);
+ x = y-&gt;child[direction]; assert(x);
+ b = x-&gt;child[!direction];
+
+ <span class="co">/* do the rotation */</span>
+ *root = x;
+ x-&gt;child[!direction] = y;
+ y-&gt;child[direction] = b;
+
+ <span class="co">/* fix aggregate data for y then x */</span>
+ treeAggregateFix(y);
+ treeAggregateFix(x);
+}
+
+<span class="co">/* restore AVL property at *root after an insertion or deletion */</span>
+<span class="co">/* assumes subtrees already have AVL property */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+treeRebalance(<span class="kw">struct</span> tree **root)
+{
+ <span class="dt">int</span> tallerChild;
+
+ <span class="kw">if</span>(*root) {
+ <span class="kw">for</span>(tallerChild = <span class="dv">0</span>; tallerChild &lt; TREE_NUM_CHILDREN; tallerChild++) {
+ <span class="kw">if</span>(treeHeight((*root)-&gt;child[tallerChild]) &gt;= treeHeight((*root)-&gt;child[!tallerChild]) + <span class="dv">2</span>) {
+
+ <span class="co">/* which grandchild is the problem? */</span>
+ <span class="kw">if</span>(treeHeight((*root)-&gt;child[tallerChild]-&gt;child[!tallerChild])
+ &gt; treeHeight((*root)-&gt;child[tallerChild]-&gt;child[tallerChild])) {
+ <span class="co">/* opposite-direction grandchild is too tall */</span>
+ <span class="co">/* rotation at root will just change its parent but not change height */</span>
+ <span class="co">/* so we rotate it up first */</span>
+ treeRotate(&amp;(*root)-&gt;child[tallerChild], !tallerChild);
+ }
+
+ <span class="co">/* now rotate up the taller child */</span>
+ treeRotate(root, tallerChild);
+
+ <span class="co">/* don't bother with other child */</span>
+ <span class="kw">break</span>;
+ }
+ }
+
+ <span class="co">/* test that we actually fixed it */</span>
+ assert(abs(treeHeight((*root)-&gt;child[LEFT]) - treeHeight((*root)-&gt;child[RIGHT])) &lt;= <span class="dv">1</span>);
+
+<span class="ot">#ifdef PARANOID_REBALANCE</span>
+ treeSanityCheck(*root);
+<span class="ot">#endif</span>
+ }
+}
+
+
+<span class="co">/* free all elements of a tree, replacing it with TREE_EMPTY */</span>
+<span class="dt">void</span>
+treeDestroy(<span class="kw">struct</span> tree **root)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(*root) {
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; TREE_NUM_CHILDREN; i++) {
+ treeDestroy(&amp;(*root)-&gt;child[i]);
+ }
+ free(*root);
+ *root = TREE_EMPTY;
+ }
+}
+
+
+
+<span class="co">/* insert an element into a tree pointed to by root */</span>
+<span class="dt">void</span>
+treeInsert(<span class="kw">struct</span> tree **root, <span class="dt">int</span> newElement)
+{
+ <span class="kw">struct</span> tree *e;
+
+ <span class="kw">if</span>(*root == <span class="dv">0</span>) {
+ <span class="co">/* not already there, put it in */</span>
+
+ e = malloc(<span class="kw">sizeof</span>(*e));
+ assert(e);
+
+ e-&gt;key = newElement;
+ e-&gt;child[LEFT] = e-&gt;child[RIGHT] = <span class="dv">0</span>;
+
+ *root = e;
+ } <span class="kw">else</span> <span class="kw">if</span>((*root)-&gt;key == newElement) {
+ <span class="co">/* already there, do nothing */</span>
+ <span class="kw">return</span>;
+ } <span class="kw">else</span> {
+ <span class="co">/* do this recursively so we can fix data on the way back out */</span>
+ treeInsert(&amp;(*root)-&gt;child[(*root)-&gt;key &lt; newElement], newElement);
+ }
+
+ <span class="co">/* fix the aggregate data */</span>
+ treeAggregateFix(*root);
+ treeRebalance(root);
+}
+
+<span class="co">/* return 1 if target is in tree, 0 otherwise */</span>
+<span class="dt">int</span>
+treeContains(<span class="dt">const</span> <span class="kw">struct</span> tree *t, <span class="dt">int</span> target)
+{
+ <span class="kw">while</span>(t &amp;&amp; t-&gt;key != target) {
+ t = t-&gt;child[t-&gt;key &lt; target];
+ }
+
+ <span class="kw">return</span> t != <span class="dv">0</span>;
+}
+
+<span class="co">/* delete minimum element from the tree and return its key */</span>
+<span class="co">/* do not call this on an empty tree */</span>
+<span class="dt">int</span>
+treeDeleteMin(<span class="kw">struct</span> tree **root)
+{
+ <span class="kw">struct</span> tree *toFree;
+ <span class="dt">int</span> retval;
+
+ assert(*root); <span class="co">/* can't delete min from empty tree */</span>
+
+ <span class="kw">if</span>((*root)-&gt;child[LEFT]) {
+ <span class="co">/* recurse on left subtree */</span>
+ retval = treeDeleteMin(&amp;(*root)-&gt;child[LEFT]);
+ } <span class="kw">else</span> {
+ <span class="co">/* delete the root */</span>
+ toFree = *root;
+ retval = toFree-&gt;key;
+ *root = toFree-&gt;child[RIGHT];
+ free(toFree);
+ }
+
+ <span class="co">/* fix the aggregate data */</span>
+ treeAggregateFix(*root);
+ treeRebalance(root);
+
+ <span class="kw">return</span> retval;
+}
+
+<span class="co">/* delete target from the tree */</span>
+<span class="co">/* has no effect if target is not in tree */</span>
+<span class="dt">void</span>
+treeDelete(<span class="kw">struct</span> tree **root, <span class="dt">int</span> target)
+{
+ <span class="kw">struct</span> tree *toFree;
+
+ <span class="co">/* do nothing if target not in tree */</span>
+ <span class="kw">if</span>(*root) {
+ <span class="kw">if</span>((*root)-&gt;key == target) {
+ <span class="kw">if</span>((*root)-&gt;child[RIGHT]) {
+ <span class="co">/* replace root with min value in right subtree */</span>
+ (*root)-&gt;key = treeDeleteMin(&amp;(*root)-&gt;child[RIGHT]);
+ } <span class="kw">else</span> {
+ <span class="co">/* patch out root */</span>
+ toFree = *root;
+ *root = toFree-&gt;child[LEFT];
+ free(toFree);
+ }
+ } <span class="kw">else</span> {
+ treeDelete(&amp;(*root)-&gt;child[(*root)-&gt;key &lt; target], target);
+ }
+
+ <span class="co">/* fix the aggregate data */</span>
+ treeAggregateFix(*root);
+ treeRebalance(root);
+ }
+}
+
+
+<span class="co">/* how far to indent each level of the tree */</span>
+<span class="ot">#define INDENTATION_LEVEL (2)</span>
+
+<span class="co">/* print contents of a tree, indented by depth */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+treePrintIndented(<span class="dt">const</span> <span class="kw">struct</span> tree *root, <span class="dt">int</span> depth)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(root != <span class="dv">0</span>) {
+ treePrintIndented(root-&gt;child[LEFT], depth<span class="dv">+1</span>);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; INDENTATION_LEVEL*depth; i++) {
+ putchar(' ');
+ }
+ printf(<span class="st">"%d Height: %d Size: %zu (%p)</span><span class="ch">\n</span><span class="st">"</span>, root-&gt;key, root-&gt;height, root-&gt;size, (<span class="dt">void</span> *) root);
+
+ treePrintIndented(root-&gt;child[RIGHT], depth<span class="dv">+1</span>);
+ }
+}
+
+<span class="co">/* print the contents of a tree */</span>
+<span class="dt">void</span>
+treePrint(<span class="dt">const</span> <span class="kw">struct</span> tree *root)
+{
+ treePrintIndented(root, <span class="dv">0</span>);
+}
+
+size_t
+treeRank(<span class="dt">const</span> <span class="kw">struct</span> tree *t, <span class="dt">int</span> target)
+{
+ size_t rank = <span class="dv">0</span>;
+
+ <span class="kw">while</span>(t &amp;&amp; t-&gt;key != target) {
+ <span class="kw">if</span>(t-&gt;key &lt; target) {
+ <span class="co">/* go right */</span>
+ <span class="co">/* root and left subtree are all less than target */</span>
+ rank += (<span class="dv">1</span> + treeSize(t-&gt;child[LEFT]));
+ t = t-&gt;child[RIGHT];
+ } <span class="kw">else</span> {
+ <span class="co">/* go left */</span>
+ t = t-&gt;child[LEFT];
+ }
+ }
+
+ <span class="co">/* we must also count left subtree */</span>
+ <span class="kw">return</span> rank + treeSize(t-&gt;child[LEFT]);
+}
+
+<span class="dt">int</span>
+treeUnrank(<span class="dt">const</span> <span class="kw">struct</span> tree *t, size_t rank)
+{
+ size_t leftSize;
+
+ <span class="co">/* basic idea: if rank &lt; treeSize(child[LEFT]), recurse in left child */</span>
+ <span class="co">/* if it's equal, return the root */</span>
+ <span class="co">/* else recurse in right child with rank = rank - treeSize(child[LEFT]) - 1 */</span>
+ <span class="kw">while</span>(rank != (leftSize = treeSize(t-&gt;child[LEFT]))) {
+ <span class="kw">if</span>(rank &lt; leftSize) {
+ t = t-&gt;child[LEFT];
+ } <span class="kw">else</span> {
+ t = t-&gt;child[RIGHT];
+ rank -= (leftSize + <span class="dv">1</span>);
+ }
+ }
+
+ <span class="kw">return</span> t-&gt;key;
+}
+
+<span class="co">/* check that aggregate data is correct throughout the tree */</span>
+<span class="dt">void</span>
+treeSanityCheck(<span class="dt">const</span> <span class="kw">struct</span> tree *root)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(root) {
+ assert(root-&gt;height == treeComputeHeight(root));
+ assert(root-&gt;size == treeComputeSize(root));
+
+ assert(abs(treeHeight(root-&gt;child[LEFT]) - treeHeight(root-&gt;child[RIGHT])) &lt; <span class="dv">2</span>);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; TREE_NUM_CHILDREN; i++) {
+ treeSanityCheck(root-&gt;child[i]);
+ }
+ }
+}
+
+<span class="ot">#ifdef TEST_MAIN</span>
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> key;
+ <span class="dt">int</span> i;
+ <span class="dt">const</span> <span class="dt">int</span> n = <span class="dv">10</span>;
+ <span class="dt">const</span> <span class="dt">int</span> randRange = <span class="dv">1000</span>;
+ <span class="dt">const</span> <span class="dt">int</span> randTrials = <span class="dv">10000</span>;
+ <span class="kw">struct</span> tree *root = TREE_EMPTY;
+
+ <span class="kw">if</span>(argc != <span class="dv">1</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ <span class="co">/* original test */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ assert(!treeContains(root, i));
+ treeInsert(&amp;root, i);
+ assert(treeContains(root, i));
+ treeSanityCheck(root);
+<span class="ot">#ifdef PRINT_AFTER_OPERATIONS</span>
+ treePrint(root);
+ puts(<span class="st">"---"</span>);
+<span class="ot">#endif</span>
+ }
+
+ <span class="co">/* check ranks */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ assert(treeRank(root, i) == i);
+ assert(treeUnrank(root, i) == i);
+ }
+
+ treeSanityCheck(root);
+
+ <span class="co">/* now delete everything */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ assert(treeContains(root, i));
+ treeDelete(&amp;root, i);
+ assert(!treeContains(root, i));
+ treeSanityCheck(root);
+<span class="ot">#ifdef PRINT_AFTER_OPERATIONS</span>
+ treePrint(root);
+ puts(<span class="st">"---"</span>);
+<span class="ot">#endif</span>
+ }
+
+ treeSanityCheck(root);
+ treeDestroy(&amp;root);
+
+ <span class="co">/* random test */</span>
+ srand(<span class="dv">1</span>);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; randTrials; i++) {
+ treeInsert(&amp;root, rand() % randRange);
+ treeDelete(&amp;root, rand() % randRange);
+ }
+
+ treeSanityCheck(root);
+ treeDestroy(&amp;root);
+
+<span class="ot">#ifdef TEST_USE_STDIN</span>
+ <span class="kw">while</span>(scanf(<span class="st">"%d"</span>, &amp;key) == <span class="dv">1</span>) {
+ <span class="co">/* insert if positive, delete if negative */</span>
+ <span class="kw">if</span>(key &gt; <span class="dv">0</span>) {
+ treeInsert(&amp;root, key);
+ assert(treeContains(root, key));
+ } <span class="kw">else</span> <span class="kw">if</span>(key &lt; <span class="dv">0</span>) {
+ treeDelete(&amp;root, -key);
+ assert(!treeContains(root, key));
+ }
+ <span class="co">/* else ignore 0 */</span>
+
+<span class="ot">#ifdef PRINT_AFTER_OPERATIONS</span>
+ treePrint(root);
+ puts(<span class="st">"---"</span>);
+<span class="ot">#endif</span>
+ }
+
+ treeSanityCheck(root);
+
+ treeDestroy(&amp;root);
+<span class="ot">#endif </span><span class="co">/* TEST_USE_STDIN */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+<span class="ot">#endif </span><span class="co">/* TEST_MAIN */</span></code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/AVL/tree.c" class="uri">examples/trees/AVL/tree.c</a>
+</div>
+<p>This <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/AVL/Makefile">Makefile</a> will compile and run some demo code in <code>tree.c</code> if run with <code>make test</code>.</p>
+<p>(An older implementation can be found in the directory <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/oldAvlTree" class="uri">examples/trees/oldAvlTree</a>.</p>
+<h3 id="A2.2BIBM-3_trees"><span class="header-section-number">5.11.3</span> 2–3 trees</h3>
+<p>An early branch in the evolution of balanced trees was the 2–3 tree.
+Here all paths have the same length, but internal nodes have either 2 or
+ 3 children. So a 2–3 tree with height <span class="math inline"><em>k</em></span> has between <span class="math inline">2<sup><em>k</em></sup></span> and <span class="math inline">3<sup><em>k</em></sup></span> leaves and a comparable number of internal nodes. The maximum path length in a tree with <span class="math inline"><em>n</em></span> nodes is at most <span class="math inline">⌈lg<em>n</em>⌉</span>, as in a perfectly balanced binary tree.</p>
+<p>An internal node in a 2–3 tree holds one key if it has two children
+(including two nil pointers) and two if it has three children. A search
+that reaches a three-child node must compare the target with both keys
+to decide which of the three subtrees to recurse into. As in binary
+trees, these comparisons take constant time, so we can search a 2–3 tree
+ in <span class="math inline"><em>O</em>(log<em>n</em>)</span> time.</p>
+<p>Insertion is done by expanding leaf nodes. This may cause a leaf to
+split when it acquires a third key. When a leaf splits, it becomes two
+one-key nodes and the middle key moves up into its parent. This may
+cause further splits up the ancestor chain; the tree grows in height by
+adding a new root when the old root splits. In practice only a small
+number of splits are needed for most insertions, but even in the worst
+case this entire process takes <span class="math inline"><em>O</em>(log<em>n</em>)</span> time.</p>
+<p>It follows that 2–3 trees have the same performance as AVL trees.
+Conceptually, they are simpler, but having to write separate cases for
+2-child and 3-child nodes doubles the size of most code that works on
+2–3 trees. The real significance of 2–3 trees is as a precursor to two
+other kinds of trees, the <em>red-black tree</em> and the <em>B-tree</em>.</p>
+<h3 id="redBlackTrees"><span class="header-section-number">5.11.4</span> Red-black trees</h3>
+<p>A red-black tree is a 2–3–4 tree (i.e. all nodes have 2, 3, or 4
+children and 1, 2, or 3 internal keys) where each node is represented by
+ a little binary tree with a black root and zero, one, or two red
+extender nodes as follows:</p>
+<div class="figure">
+<img src="" title="red-black nodes" alt="redblacknodes.png">
+<p class="caption">redblacknodes.png</p>
+</div>
+<p>The invariant for a red-black tree is that</p>
+<ol style="list-style-type: decimal">
+<li>No two red nodes are adjacent.</li>
+<li>Every path contains the same number of black nodes.</li>
+</ol>
+<p>For technical reasons, we include the null pointers at the bottom of
+the tree as black nodes; this has no effect on the invariant, but
+simplifies the description of the rebalancing procedure.</p>
+<p>From the invariant it follows that every path has between <span class="math inline"><em>k</em></span> and <span class="math inline">2<em>k</em></span> nodes, where <span class="math inline"><em>k</em></span> is the <em>black-height</em>, the common number of black nodes on each path. From this we can prove that the height of the tree is <span class="math inline"><em>O</em>(log<em>n</em>)</span>.</p>
+<p>Searching in a red-black tree is identical to searching in any other
+binary search tree; we simply ignore the color bit on each node. So
+search takes <span class="math inline"><em>O</em>(log<em>n</em>)</span>
+time. For insertions, we use the standard binary search tree insertion
+algorithm, and insert the new node as a red node. This may violate the
+first part of the invariant (it doesn't violate the second because it
+doesn't change the number of black nodes on any path). In this case we
+need to fix up the constraint by recoloring nodes and possibly
+performing a single or double rotation.</p>
+<div class="figure">
+<img src="" title="Rebalancing a red-black tree" alt="redblackrebalance.png">
+<p class="caption">redblackrebalance.png</p>
+</div>
+<p>Which operations we need to do depend on the color of the new node's
+uncle. If the uncle is red, we can recolor the node's parent, uncle, and
+ grandparent and get rid of the double-red edge between the new node and
+ its parent without changing the number of black nodes on any path. In
+this case, the grandparent becomes red, which may create a new
+double-red edge which must be fixed recursively. Thus up to <span class="math inline"><em>O</em>(log<em>n</em>)</span> such recolorings may occur at a total cost of <span class="math inline"><em>O</em>(log<em>n</em>)</span>.</p>
+<p>If the uncle is black (which includes the case where the uncle is a
+null pointer), a rotation (possibly a double rotation) and recoloring is
+ necessary. In this case (depicted at the bottom of the picture above),
+the new grandparent is always black, so there are no more double-red
+edges. So at most two rotations occur after any insertion.</p>
+<p>Deletion is more complicated but can also be done in <span class="math inline"><em>O</em>(log<em>n</em>)</span> recolorings and <span class="math inline"><em>O</em>(1)</span>
+ (in this case up to 3) rotations. Because deletion is simpler in
+red-black trees than in AVL trees, and because operations on red-black
+trees tend to have slightly smaller constants than corresponding
+operation on AVL trees, red-black trees are more often used that AVL
+trees in practice.</p>
+<h3 id="B-trees"><span class="header-section-number">5.11.5</span> B-trees</h3>
+<p>Neither is used as much as a B-tree, a specialized data structure
+optimized for storage systems where the cost of reading or writing a
+large block (of typically 4096 or 8192 bytes) is no greater than the
+cost of reading or writing a single bit. Such systems include typical
+disk drives, where the disk drive has to spend so long finding data on
+disk that it tries to amortize the huge (tens of millions of CPU clock
+cycles) seek cost over many returned bytes.</p>
+<p>A B-tree is a generalization of a 2–3 tree where each node has between <span class="math inline"><em>M</em>/2</span> and <span class="math inline"><em>M</em> − 1</span> children, where <span class="math inline"><em>M</em></span> is some large constant chosen so that a node (including up to <span class="math inline"><em>M</em> − 1</span> pointers and up to <span class="math inline"><em>M</em> − 2</span> keys) will just fit inside a single block. When a node would otherwise end up with <span class="math inline"><em>M</em></span> children, it splits into two nodes with <span class="math inline"><em>M</em>/2</span>
+ children each, and moves its middle key up into its parent. As in 2–3
+trees this may eventually require the root to split and a new root to be
+ created; in practice, <span class="math inline"><em>M</em></span> is often large enough that a small fixed height is enough to span as much data as the storage system is capable of holding.</p>
+<p>Searches in B-trees require looking through <span class="math inline">log<sub><em>M</em></sub><em>n</em></span> nodes, at a cost of <span class="math inline"><em>O</em>(<em>M</em>)</span> time per node. If <span class="math inline"><em>M</em></span> is a constant the total time is asymptotically <span class="math inline"><em>O</em>(log<em>n</em>)</span>. But the reason for using B-trees is that the <span class="math inline"><em>O</em>(<em>M</em>)</span>
+ cost of reading a block is trivial compare to the much larger constant
+time to find the block on the disk; and so it is better to minimize the
+number of disk accesses (by making <span class="math inline"><em>M</em></span> large) than reduce the CPU time.</p>
+<h3 id="splayTrees"><span class="header-section-number">5.11.6</span> Splay trees</h3>
+<p>Yet another approach to balancing is to do it dynamically. Splay trees, described by Sleator and Tarjan in the paper "<a href="https://www.cs.cmu.edu/%7Esleator/papers/self-adjusting.pdf">Self-adjusting binary search trees</a>"
+ (JACM 32(3):652–686, July 1985) are binary search trees in which every
+search operation rotates the target to the root. If this is done
+correctly, the <strong>amortized cost</strong> of each tree operation is <span class="math inline"><em>O</em>(log<em>n</em>)</span>, although particular rare operations might take as much as <span class="math inline"><em>O</em>(<em>n</em>)</span>
+ time. Splay trees require no extra space because they store no
+balancing information; however, the constant factors on searches can be
+larger because every search requires restructuring the tree. For some
+applications this additional cost is balanced by the splay tree's
+ability to adapt to data access patterns; if some elements of the tree
+are hit more often than others, these elements will tend to migrate to
+the top, and the cost of a typical search will drop to <span class="math inline"><em>O</em>(log<em>m</em>)</span>, where <span class="math inline"><em>m</em></span> is the size of the "working set" of frequently-accessed elements.</p>
+<h4 id="how-splaying-works"><span class="header-section-number">5.11.6.1</span> How splaying works</h4>
+<p>The basic idea of a splay operation is that we move some particular
+node to the root of the tree, using a sequence of rotations that tends
+to fix the balance of the tree if the node starts out very deep. So
+while we might occasionally drive the tree into a state that is highly
+unbalanced, as soon as we try to exploit this by searching for a deep
+node, we'll start balancing the tree so that we can't collect too much
+additional cost. In fact, in order to set up the bad state in the first
+place we will have to do a lot of cheap splaying operations: the missing
+ cost of these cheap splays ends up paying for the cost of the later
+expensive search.</p>
+<p>Splaying a node to the root involves performing rotations two layers
+at a time. There are two main cases, depending on whether the node's
+parent and grandparent are in the same direction (zig-zig) or in
+opposite directions (zig-zag), plus a third case when the node is only
+one step away from the root. At each step, we pick one of these cases
+and apply it, until the target node reaches the root of the tree.</p>
+<p>This is probably best understood by looking at a figure from the original paper:</p>
+<p><img src="" alt="Figure 3 from (Sleator and Tarjan, 1985)"><br>
+The bottom two cases are the ones we will do most of the time.</p>
+<p>Just looking at the picture, it doesn't seem like zig-zig will
+improve balance much. But if we have a long path made up of zig-zig
+cases, each operation will push at least one node off of this path,
+cutting the length of the path in half. So the rebalancing happens as
+much because we are pushing nodes off of the long path as because the
+specific rotation operations improve things locally.</p>
+<h4 id="splayTreeAnalysis"><span class="header-section-number">5.11.6.2</span> Analysis</h4>
+<p>Sleator and Tarjan show that any sequence of <span class="math inline"><em>m</em></span> splay operations on an <span class="math inline"><em>n</em></span>-node splay tree has total cost at most <span class="math inline"><em>O</em>((<em>m</em> + <em>n</em>)log<em>n</em> + <em>m</em>)</span>. For large <span class="math inline"><em>m</em></span> (at least linear in <span class="math inline"><em>n</em></span>), the <span class="math inline"><em>O</em>(<em>m</em>log<em>n</em>)</span> term dominates, giving an amortized cost per operation of <span class="math inline"><em>O</em>(log<em>n</em>)</span>,
+ the same as we get from any balanced binary tree. This immediately
+gives a bound on search costs, because the cost of plunging down the
+tree to find the node we are looking for is proportional to the cost of
+splaying it up to the root.</p>
+<p>Splay trees have a useful "caching" property in that they pull
+frequently-accessed nodes to the to the top and push
+less-frequently-accessed nodes down. The authors show that if only <span class="math inline"><em>k</em></span> of the <span class="math inline"><em>n</em></span> nodes are accessed, the long-run amortized cost per search drops to <span class="math inline"><em>O</em>(log<em>k</em>)</span>.
+ For more general access sequences, it is conjectured that the cost to
+perform a sufficiently long sequence of searches using a splay tree is
+in fact optimal up to a constant factor (the "dynamic optimality
+conjecture"), but no one has yet been able to prove this conjecture (or
+provide a counterexample).<a href="#fn21" class="footnoteRef" id="fnref21"><sup>21</sup></a></p>
+<h4 id="other-operations"><span class="header-section-number">5.11.6.3</span> Other operations</h4>
+<p>A search operation consists of a standard binary tree search followed
+ by splaying the target node to the root (if present) or the last
+non-null node we reached to the root instead (if not).</p>
+<p>Insertion and deletion are built on top of procedures to split and join trees.</p>
+<p>A split divides a single splay tree into two splay trees, consisting of all elements less than or equal to some value <span class="math inline"><em>x</em></span> and all elements greater than <span class="math inline"><em>x</em></span>. This is done by searching for <span class="math inline"><em>x</em></span>, which brings either <span class="math inline"><em>x</em></span> or the first element less than or greater than <span class="math inline"><em>x</em></span>
+ to the root, then breaking the link between the root and its left or
+right child depending on whether the root should go in the right or left
+ tree.</p>
+<p>A join merges two splay trees <span class="math inline"><em>L</em></span> and <span class="math inline"><em>R</em></span>, where every element in <span class="math inline"><em>L</em></span> is less than every element in <span class="math inline"><em>R</em></span>. This involves splaying the largest element in <span class="math inline"><em>L</em></span> to the root, and then making the root of <span class="math inline"><em>R</em></span> the right child of this element.</p>
+<p>To do an insert of <span class="math inline"><em>x</em></span>, we do a split around <span class="math inline"><em>x</em></span>, then make the roots of the two trees the children of a new element holding <span class="math inline"><em>x</em></span> (unless <span class="math inline"><em>x</em></span> is already present in the tree, in which case we stop before breaking the trees apart).</p>
+<p>To do a delete of an element <span class="math inline"><em>x</em></span>, we splay <span class="math inline"><em>x</em></span> to the root, remove it, then join the two orphaned subtrees.</p>
+<p>For each operation, we are doing a constant number of splays (amortized cost <span class="math inline"><em>O</em>(log<em>n</em>)</span> each), plus <span class="math inline"><em>O</em>(1)</span>
+ additional work. A bit of work is needed to ensure that the joins and
+splits don't break the amortized cost analysis, but this is done in the
+paper, so we will sweep it under the carpet with the rest of the
+analysis.</p>
+<h4 id="top-down-splaying"><span class="header-section-number">5.11.6.4</span> Top-down splaying</h4>
+<p>There are a few remaining details that we need to deal with before
+trying to implement a splay trees. Because the splay tree could become
+very deep, we probably don't want to implement a splay recursively in a
+language like C, because we'll blow out our stack. We also have a
+problem if we are trying to rotate our target up from the bottom of
+figuring out what its ancestors are. We could solve both of these
+problems by including parent pointers in our tree, but this would add a
+lot of complexity and negate the space improvement over AVL trees of not
+ having to store heights.</p>
+<p>The solution given in the Sleator-Tarjan paper is to replace the
+bottom-up splay procedure with a top-down splay procedure that
+accomplishes the same task. The idea is that rotating a node up from the
+ bottom effectively splits the tree above it into two new left and right
+ subtrees by pushing ancestors sideways according to the zig-zig and
+zig-zag patters. But we can recognize these zig-zig and zig-zag patterns
+ from the top as well, and so we can construct these same left and right
+ subtrees from the top down instead of the bottom up. When we do this,
+instead of adding new nodes to the tops of the trees, we will be adding
+new nodes to the bottoms, as the right child of the rightmost node in
+the left tree or the left child of the rightmost node in the left tree.</p>
+<p>Here's the picture, from the original paper:</p>
+<p><img src="" alt="Figure 12 from (Sleator and Tarjan, 1985)"><br>
+To implement this, we need to keep track of the roots of the three
+trees, as well as the locations in the left and right trees where we
+will be adding new vertices. The roots we can just keep pointers to. For
+ the lower corners of the trees, it makes sense to store instead a
+pointer to the pointer location, so that we can modify the pointer in
+the tree (and then move the pointer to point to the pointer in the new
+corner). Initially, these corner pointers will just point to the left
+and right tree roots, which will start out empty.</p>
+<p>The last step (shown as Figure 12 from the paper) pastes the tree
+back together by inserting the left and right trees between the new root
+ and its children.</p>
+<h4 id="splayTreeImplementation"><span class="header-section-number">5.11.6.5</span> An implementation</h4>
+<p>Here is an implementation of a splay tree, with an interface similar to the previous <a href="#avlTreeImplementation">AVL tree implementation</a>.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Basic binary search tree data structure without balancing info.</span>
+<span class="co"> *</span>
+<span class="co"> * Convention: </span>
+<span class="co"> *</span>
+<span class="co"> * Operations that update a tree are passed a struct tree **,</span>
+<span class="co"> * so they can replace the argument with the return value.</span>
+<span class="co"> *</span>
+<span class="co"> * Operations that do not update the tree get a const struct tree *.</span>
+<span class="co"> */</span>
+
+<span class="ot">#define LEFT (0)</span>
+<span class="ot">#define RIGHT (1)</span>
+<span class="ot">#define TREE_NUM_CHILDREN (2)</span>
+
+<span class="kw">struct</span> tree {
+ <span class="co">/* we'll make this an array so that we can make some operations symmetric */</span>
+ <span class="kw">struct</span> tree *child[TREE_NUM_CHILDREN];
+ <span class="dt">int</span> key;
+};
+
+<span class="ot">#define TREE_EMPTY (0)</span>
+
+<span class="co">/* free all elements of a tree, replacing it with TREE_EMPTY */</span>
+<span class="dt">void</span> treeDestroy(<span class="kw">struct</span> tree **root);
+
+<span class="co">/* insert an element into a tree pointed to by root */</span>
+<span class="dt">void</span> treeInsert(<span class="kw">struct</span> tree **root, <span class="dt">int</span> newElement);
+
+<span class="co">/* return 1 if target is in tree, 0 otherwise */</span>
+<span class="co">/* we allow root to be modified to allow for self-balancing trees */</span>
+<span class="dt">int</span> treeContains(<span class="kw">struct</span> tree **root, <span class="dt">int</span> target);
+
+<span class="co">/* delete target from the tree */</span>
+<span class="co">/* has no effect if target is not in tree */</span>
+<span class="dt">void</span> treeDelete(<span class="kw">struct</span> tree **root, <span class="dt">int</span> target);
+
+<span class="co">/* pretty-print the contents of a tree */</span>
+<span class="dt">void</span> treePrint(<span class="dt">const</span> <span class="kw">struct</span> tree *root);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/splay/tree.h" class="uri">examples/trees/splay/tree.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;limits.h&gt;</span>
+
+<span class="ot">#include "tree.h"</span>
+
+<span class="co">/* free all elements of a tree, replacing it with TREE_EMPTY */</span>
+<span class="dt">void</span>
+treeDestroy(<span class="kw">struct</span> tree **root)
+{
+ <span class="co">/* we want to avoid doing this recursively, because the tree might be deep */</span>
+ <span class="co">/* so we will repeatedly delete the root until the tree is empty */</span>
+ <span class="kw">while</span>(*root) {
+ treeDelete(root, (*root)-&gt;key);
+ }
+}
+
+<span class="co">/* rotate child in given direction to root */</span>
+<span class="dt">void</span>
+treeRotate(<span class="kw">struct</span> tree **root, <span class="dt">int</span> direction)
+{
+ <span class="kw">struct</span> tree *x;
+ <span class="kw">struct</span> tree *y;
+ <span class="kw">struct</span> tree *b;
+
+ <span class="co">/*</span>
+<span class="co"> * y x </span>
+<span class="co"> * / \ / \</span>
+<span class="co"> * x C &lt;=&gt; A y</span>
+<span class="co"> * / \ / \</span>
+<span class="co"> * A B B C</span>
+<span class="co"> */</span>
+
+ y = *root; assert(y);
+ x = y-&gt;child[direction]; assert(x);
+ b = x-&gt;child[!direction];
+
+ <span class="co">/* do the rotation */</span>
+ *root = x;
+ x-&gt;child[!direction] = y;
+ y-&gt;child[direction] = b;
+}
+
+<span class="co">/* link operations for top-down splay */</span>
+<span class="co">/* this pastes a node in as !d-most node in subtree on side d */</span>
+<span class="dt">static</span> <span class="kw">inline</span> <span class="dt">void</span>
+treeLink(<span class="kw">struct</span> tree ***hook, <span class="dt">int</span> d, <span class="kw">struct</span> tree *node)
+{
+ *hook[d] = node;
+ <span class="co">/* strictly speaking we don't need to do this, but it allows printing the partial trees */</span>
+ node-&gt;child[!d] = <span class="dv">0</span>;
+ hook[d] = &amp;node-&gt;child[!d];
+}
+
+<span class="co">/* splay last element on path to target to root */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+treeSplay(<span class="kw">struct</span> tree **root, <span class="dt">int</span> target)
+{
+ <span class="kw">struct</span> tree *t;
+ <span class="kw">struct</span> tree *child;
+ <span class="kw">struct</span> tree *grandchild;
+ <span class="kw">struct</span> tree *top[TREE_NUM_CHILDREN]; <span class="co">/* accumulator trees that will become subtrees of new root */</span>
+ <span class="kw">struct</span> tree **hook[TREE_NUM_CHILDREN]; <span class="co">/* where to link new elements into accumulator trees */</span>
+ <span class="dt">int</span> d;
+ <span class="dt">int</span> dChild; <span class="co">/* direction of child */</span>
+ <span class="dt">int</span> dGrandchild; <span class="co">/* direction of grandchild */</span>
+
+ <span class="co">/* we don't need to keep following this pointer, we'll just fix it at the end */</span>
+ t = *root;
+
+ <span class="co">/* don't do anything to an empty tree */</span>
+ <span class="kw">if</span>(t == <span class="dv">0</span>) { <span class="kw">return</span>; }
+
+ <span class="co">/* ok, tree is not empty, start chopping it up */</span>
+ <span class="kw">for</span>(d = <span class="dv">0</span>; d &lt; TREE_NUM_CHILDREN; d++) {
+ top[d] = <span class="dv">0</span>;
+ hook[d] = &amp;top[d];
+ }
+
+ <span class="co">/* keep going until we hit the key or we would hit a null pointer in the child */</span>
+ <span class="kw">while</span>(t-&gt;key != target &amp;&amp; (child = t-&gt;child[dChild = t-&gt;key &lt; target]) != <span class="dv">0</span>) {
+ <span class="co">/* child is not null */</span>
+ grandchild = child-&gt;child[dGrandchild = child-&gt;key &lt; target];
+
+<span class="ot">#ifdef DEBUG_SPLAY</span>
+ treePrint(top[<span class="dv">0</span>]);
+ puts(<span class="st">"---"</span>);
+ treePrint(t);
+ puts(<span class="st">"---"</span>);
+ treePrint(top[<span class="dv">1</span>]);
+ puts(<span class="st">"==="</span>);
+<span class="ot">#endif</span>
+
+ <span class="kw">if</span>(grandchild == <span class="dv">0</span> || child-&gt;key == target) {
+ <span class="co">/* zig case; paste root into opposite-side hook */</span>
+ treeLink(hook, !dChild, t);
+ t = child;
+ <span class="co">/* we can break because we know we will hit child == 0 next */</span>
+ <span class="kw">break</span>;
+ } <span class="kw">else</span> <span class="kw">if</span>(dChild == dGrandchild) {
+ <span class="co">/* zig-zig case */</span>
+ <span class="co">/* rotate and then hook up child */</span>
+ <span class="co">/* grandChild becomes new root */</span>
+ treeRotate(&amp;t, dChild);
+ treeLink(hook, !dChild, child);
+ t = grandchild;
+ } <span class="kw">else</span> {
+ <span class="co">/* zig-zag case */</span>
+ <span class="co">/* root goes to !dChild, child goes to dChild, grandchild goes to root */</span>
+ treeLink(hook, !dChild, t);
+ treeLink(hook, dChild, child);
+ t = grandchild;
+ }
+ }
+
+ <span class="co">/* now reassemble the tree */</span>
+ <span class="co">/* t's children go in hooks, top nodes become t's new children */</span>
+ <span class="kw">for</span>(d = <span class="dv">0</span>; d &lt; TREE_NUM_CHILDREN; d++) {
+ *hook[d] = t-&gt;child[d];
+ t-&gt;child[d] = top[d];
+ }
+
+ <span class="co">/* and put t back in *root */</span>
+ *root = t;
+}
+
+<span class="co">/* return 1 if target is in tree, 0 otherwise */</span>
+<span class="dt">int</span>
+treeContains(<span class="kw">struct</span> tree **root, <span class="dt">int</span> target)
+{
+ treeSplay(root, target);
+ <span class="kw">return</span> *root != <span class="dv">0</span> &amp;&amp; (*root)-&gt;key == target;
+}
+
+
+<span class="co">/* insert an element into a tree pointed to by root */</span>
+<span class="dt">void</span>
+treeInsert(<span class="kw">struct</span> tree **root, <span class="dt">int</span> newElement)
+{
+ <span class="kw">struct</span> tree *e;
+ <span class="kw">struct</span> tree *t;
+ <span class="dt">int</span> d; <span class="co">/* which side of e to put old root on */</span>
+
+ treeSplay(root, newElement);
+
+ t = *root;
+
+ <span class="co">/* skip if already present */</span>
+ <span class="kw">if</span>(t &amp;&amp; t-&gt;key == newElement) { <span class="kw">return</span>; }
+
+ <span class="co">/* otherwise split the tree */</span>
+ e = malloc(<span class="kw">sizeof</span>(*e));
+ assert(e);
+
+ e-&gt;key = newElement;
+
+ <span class="kw">if</span>(t == <span class="dv">0</span>) {
+ e-&gt;child[LEFT] = e-&gt;child[RIGHT] = <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ <span class="co">/* split tree and put e on top */</span>
+ <span class="co">/* we know t is closest to e, so we don't have to move anything else */</span>
+ d = (*root)-&gt;key &gt; newElement;
+ e-&gt;child[d] = t;
+ e-&gt;child[!d] = t-&gt;child[!d];
+ t-&gt;child[!d] = <span class="dv">0</span>;
+ }
+
+ <span class="co">/* either way we stuff e in *root */</span>
+ *root = e;
+}
+
+<span class="co">/* delete target from the tree */</span>
+<span class="co">/* has no effect if target is not in tree */</span>
+<span class="dt">void</span>
+treeDelete(<span class="kw">struct</span> tree **root, <span class="dt">int</span> target)
+{
+ <span class="kw">struct</span> tree *left;
+ <span class="kw">struct</span> tree *right;
+
+ treeSplay(root, target);
+
+ <span class="kw">if</span>(*root &amp;&amp; (*root)-&gt;key == target) {
+ <span class="co">/* save pointers to kids */</span>
+ left = (*root)-&gt;child[LEFT];
+ right = (*root)-&gt;child[RIGHT];
+
+ <span class="co">/* free the old root */</span>
+ free(*root);
+
+ <span class="co">/* if left is empty, just return right */</span>
+ <span class="kw">if</span>(left == <span class="dv">0</span>) {
+ *root = right;
+ } <span class="kw">else</span> {
+ <span class="co">/* first splay max element in left to top */</span>
+ treeSplay(&amp;left, INT_MAX);
+
+ <span class="co">/* now paste in right subtree */</span>
+ left-&gt;child[RIGHT] = right;
+
+ <span class="co">/* return left */</span>
+ *root = left;
+ }
+ }
+}
+
+<span class="co">/* how far to indent each level of the tree */</span>
+<span class="ot">#define INDENTATION_LEVEL (2)</span>
+
+<span class="co">/* print contents of a tree, indented by depth */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+treePrintIndented(<span class="dt">const</span> <span class="kw">struct</span> tree *root, <span class="dt">int</span> depth)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(root != <span class="dv">0</span>) {
+ treePrintIndented(root-&gt;child[LEFT], depth<span class="dv">+1</span>);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; INDENTATION_LEVEL*depth; i++) {
+ putchar(' ');
+ }
+ printf(<span class="st">"%d (%p)</span><span class="ch">\n</span><span class="st">"</span>, root-&gt;key, (<span class="dt">void</span> *) root);
+
+ treePrintIndented(root-&gt;child[RIGHT], depth<span class="dv">+1</span>);
+ }
+}
+
+<span class="co">/* print the contents of a tree */</span>
+<span class="dt">void</span>
+treePrint(<span class="dt">const</span> <span class="kw">struct</span> tree *root)
+{
+ treePrintIndented(root, <span class="dv">0</span>);
+}
+
+
+<span class="ot">#ifdef TEST_MAIN</span>
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">const</span> <span class="dt">int</span> n = <span class="dv">10</span>;
+ <span class="kw">struct</span> tree *root = TREE_EMPTY;
+
+ <span class="kw">if</span>(argc != <span class="dv">1</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ assert(!treeContains(&amp;root, i));
+ treeInsert(&amp;root, i);
+ assert(treeContains(&amp;root, i));
+ treePrint(root);
+ puts(<span class="st">"==="</span>);
+ }
+
+ <span class="co">/* now delete everything */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ assert(treeContains(&amp;root, i));
+ treeDelete(&amp;root, i);
+ assert(!treeContains(&amp;root, i));
+ treePrint(root);
+ puts(<span class="st">"==="</span>);
+ }
+
+ treeDestroy(&amp;root);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+<span class="ot">#endif</span></code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/splay/tree.c" class="uri">examples/trees/splay/tree.c</a>
+</div>
+<p><a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/splay/Makefile">Makefile</a>. The file <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/splay/speedTest.c">speedTest.c</a>
+ can be used to do a simple test of the efficiency of inserting many
+random elements. On my machine, the splay tree version is about 10%
+slower than the AVL tree for this test on a million elements. This
+probably indicates a bigger slowdown for <code>treeInsert</code> itself, because some of the time will be spent in <code>rand</code> and <code>treeDestroy</code>, but I was too lazy to actually test this further.</p>
+<h4 id="splayTreesMoreInformation"><span class="header-section-number">5.11.6.6</span> More information</h4>
+<p>For more details on splay trees, see <a href="http://dl.acm.org/citation.cfm?id=3835">the original paper</a>, or any number of demos, animations, and other descriptions that can be found via <a href="http://www.google.com/search?q=splay+trees">Google</a>.</p>
+<h3 id="scapegoatTrees"><span class="header-section-number">5.11.7</span> Scapegoat trees</h3>
+<p><strong>Scapegoat trees</strong> are another amortized balanced tree
+data structure. The idea of a scapegoat tree is that if we ever find
+ourselves doing an insert at the end of a path that is too long, we can
+find some subtree rooted at a node along this path that is particularly
+imbalanced and rebalance it all at once at a cost of <span class="math inline"><em>O</em>(<em>k</em>)</span> where <span class="math inline"><em>k</em></span> is the size of the subtree. These were shown by Galperin and Rivest (SODA 1993) to give <span class="math inline"><em>O</em>(log<em>n</em>)</span> amortized cost for inserts, while guaranteeing <span class="math inline"><em>O</em>(log<em>n</em>)</span> depth, so that inserts also run in <span class="math inline"><em>O</em>(log<em>n</em>)</span>
+ worst-case time; they also came up with the name "scapegoat tree",
+although it turns out the same data structure had previously been
+published by Andersson in 1989. Unlike splay trees, scapegoat trees do
+not require modifying the tree during a search, and unlike AVL trees,
+scapegoat trees do not require tracking any information in nodes
+(although they do require tracking the total size of the tree and, to
+allow for rebalancing after many deletes, the maximum size of the tree
+since the last time the entire tree was rebalanced).</p>
+<p>Unfortunately, scapegoat trees are not very fast, so one is probably better off with an AVL tree.</p>
+<h3 id="skip-lists"><span class="header-section-number">5.11.8</span> Skip lists</h3>
+<p><a href="#skipLists">Skip lists</a> are yet another balanced tree
+data structure, where the tree is disguised as a tower of linked lists.
+Since they use randomization for balance, we describe them with other <a href="#randomizedDataStructures">randomized data structures</a>.</p>
+<h3 id="treeImplementations"><span class="header-section-number">5.11.9</span> Implementations</h3>
+<p>AVL trees and red-black trees have been implemented for every
+reasonable programming language you've ever heard of. For C
+implementations, a good place to start is at <a href="http://adtinfo.org/" class="uri">http://adtinfo.org/</a>.</p>
+<h2 id="graphs"><span class="header-section-number">5.12</span> Graphs</h2>
+<p>These are notes on implementing <strong>graphs</strong> and graph algorithms in C.</p>
+<h3 id="graphDefinitions"><span class="header-section-number">5.12.1</span> Basic definitions</h3>
+<p>A <strong>graph</strong> consists of a set of <strong>nodes</strong> or <strong>vertices</strong> together with a set of <strong>edges</strong> or <strong>arcs</strong> where each edge joins two vertices. Unless otherwise specified, a graph is <strong>undirected</strong>: each edge is an unordered pair <span class="math inline">{<em>u</em>, <em>v</em>}</span>
+ of vertices, and we don't regard either of the two vertices as having a
+ distinct role from the other. However, it is more common in computing
+to consider <strong>directed graphs</strong> or <strong>digraphs</strong> in which edges are <em>ordered</em> pairs <span class="math inline">(<em>u</em>, <em>v</em>)</span>; here the vertex <span class="math inline"><em>u</em></span> is the <strong>source</strong> of the edge and vertex v is the <strong>sink</strong> or <strong>target</strong>
+ of the edge. Directed edges are usually drawn as arrows and undirected
+edges as curves or line segments. It is always possible to represent an
+undirected graph as a directed graph where each undirected edge <span class="math inline">{<em>u</em>, <em>v</em>}</span> becomes two oppositely directed edges <span class="math inline">(<em>u</em>, <em>v</em>)</span> and <span class="math inline">(<em>v</em>, <em>u</em>)</span>.</p>
+<p>Here is an example of a small graph, drawn using <a href="http://cs.yale.edu/homes/aspnes/classes/223/images/graphs/graph.dot">this file</a> using the <code>circo</code> program from the <a href="http://graphviz.org/">GraphViz</a> library:</p>
+<div class="figure">
+<img src="" alt="A graph">
+<p class="caption">A graph</p>
+</div>
+<p>Here is a similar directed graph, drawn using <a href="http://cs.yale.edu/homes/aspnes/classes/223/images/graphs/digraph.dot">this file</a>:</p>
+<div class="figure">
+<img src="" alt="A directed graph">
+<p class="caption">A directed graph</p>
+</div>
+<p>Given an edge <span class="math inline">(<em>u</em>, <em>v</em>)</span>, the vertices <span class="math inline"><em>u</em></span> and <span class="math inline"><em>v</em></span> are said to be <strong>incident</strong> to the edge and <strong>adjacent</strong> to each other. The number of vertices adjacent to a given vertex <span class="math inline"><em>u</em></span> is the <strong>degree</strong> of <span class="math inline"><em>u</em></span>; this can be divided into the <strong>out-degree</strong> (number of vertices <span class="math inline"><em>v</em></span> such that <span class="math inline">(<em>u</em>, <em>v</em>)</span> is an edge) and the <strong>in-degree</strong> (number of vertices <span class="math inline"><em>v</em></span> such that <span class="math inline">(<em>v</em>, <em>u</em>)</span> is an edge). A vertex <span class="math inline"><em>v</em></span> adjacent to <span class="math inline"><em>u</em></span> is called a <strong>neighbor</strong> of <span class="math inline"><em>u</em></span>, and (in a directed graph) is a <strong>predecessor</strong> of <span class="math inline"><em>u</em></span> if <span class="math inline">(<em>v</em>, <em>u</em>)</span> is an edge and a <strong>successor</strong> of <span class="math inline"><em>u</em></span> if <span class="math inline">(<em>u</em>, <em>v</em>)</span> is an edge. We will allow a node to be its own predecessor and successor.</p>
+<h3 id="Why_graphs_are_useful"><span class="header-section-number">5.12.2</span> Why graphs are useful</h3>
+<p>Graphs can be used to model any situation where we have things that
+are related to each other in pairs; for example, all of the following
+can be represented by graphs:</p>
+<dl>
+<dt>Family trees</dt>
+<dd>Nodes are members, with an edge from each parent to each of their children.
+</dd>
+<dt>Transportation networks</dt>
+<dd>Nodes are airports, intersections, ports, etc. Edges are airline flights, one-way roads, shipping routes, etc.
+</dd>
+<dt>Assignments</dt>
+<dd>Suppose we are assigning classes to classrooms. Let each node be
+either a class or a classroom, and put an edge from a class to a
+classroom if the class is assigned to that room. This is an example of a
+ <strong>bipartite graph</strong>, where the nodes can be divided into two sets <span class="math inline"><em>S</em></span> and <span class="math inline"><em>T</em></span> and all edges go from <span class="math inline"><em>S</em></span> to <span class="math inline"><em>T</em></span>.
+</dd>
+</dl>
+<h3 id="Operations_on_graphs"><span class="header-section-number">5.12.3</span> Operations on graphs</h3>
+<p>What would we like to do to graphs? Generally, we first have to build
+ a graph by starting with a set of nodes and adding in any edges we
+need, and then we want to extract information from it, such as "Is this
+graph connected?", "What is the shortest path in this graph from <span class="math inline"><em>s</em></span> to <span class="math inline"><em>t</em></span>?",
+ or "How many edges can I remove from this graph before some nodes
+become unreachable from other nodes?" There are standard algorithms for
+answering all of these questions; the information these algorithms need
+is typically (a) given a vertex <span class="math inline"><em>u</em></span>, what successors does it have; and sometimes (b) given vertices <span class="math inline"><em>u</em></span> and <span class="math inline"><em>v</em></span>, does the edge <span class="math inline">(<em>u</em>, <em>v</em>)</span> exist in the graph?</p>
+<h3 id="Representations_of_graphs"><span class="header-section-number">5.12.4</span> Representations of graphs</h3>
+<p>A good graph representation will allow us to answer one or both of
+these questions quickly. There are generally two standard
+representations of graphs that are used in graph algorithms, depending
+on which question is more important.</p>
+<p>For both representations, we simplify the representation task by insisting that vertices be labeled <span class="math inline">0, 1, 2, …, <em>n</em> − 1</span>, where <span class="math inline"><em>n</em></span>
+ is the number of vertices in the graph. If we have a graph with
+different vertex labels (say, airport codes), we can enforce an integer
+labeling by a preprocessing step where we assign integer labels, and
+then translate the integer labels back into more useful user labels
+afterwards. The preprocessing step can usually be done using a <a href="#hashTables">hash table</a> in <span class="math inline"><em>O</em>(<em>n</em>)</span>
+ time, which is likely to be smaller than the cost of whatever algorithm
+ we are running on our graph, and the savings in code complexity and
+running time from working with just integer labels will pay this cost
+back many times over.</p>
+<h4 id="Adjacency_matrices"><span class="header-section-number">5.12.4.1</span> Adjacency matrices</h4>
+<p>An <strong>adjacency matrix</strong> is just a matrix <code class="backtick">a</code> where <code class="backtick">a[i][j]</code> is <code class="backtick">1</code> if (i,j) is an edge in the graph and <code class="backtick">0</code> otherwise. It's easy to build an adjacency matrix, and adding or testing for the existence of an edges takes <span class="math inline"><em>O</em>(1)</span> time. The downsides of adjacency matrices are that finding all the outgoing edges from a vertex takes <span class="math inline"><em>O</em>(<em>n</em>)</span> time even if there aren't very many, and the <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span> space cost is high for "sparse graphs," those with much fewer than <span class="math inline"><em>n</em><sup>2</sup></span> edges.</p>
+<h4 id="Adjacency_lists"><span class="header-section-number">5.12.4.2</span> Adjacency lists</h4>
+<p>An <strong>adjacency list</strong> representation of a graph creates a list of successors for each node <span class="math inline"><em>u</em></span>.
+ These lists may be represented as linked lists (the typical assumption
+in algorithms textbooks), or in languages like C may be represented by
+variable-length arrays. The cost for adding an edge is still <span class="math inline"><em>O</em>(1),</span> but testing for the existence of an edge <span class="math inline">(<em>u</em>, <em>v</em>)</span> rises to <span class="math inline"><em>O</em>(<em>d</em><sup>+</sup>(<em>u</em>))</span>, where <span class="math inline"><em>d</em><sup>+</sup>(<em>u</em>)</span> is the out-degree of <span class="math inline"><em>u</em></span> (i.e., the length of the list of <span class="math inline"><em>u</em></span>'s successors). The cost of enumerating the successors of <span class="math inline"><em>u</em></span> is also <span class="math inline"><em>O</em>(<em>d</em><sup>+</sup>(<em>u</em>))</span>, which is clearly the best possible since it takes that long just to write them all down. Finding predecessors of a node <span class="math inline"><em>u</em></span> is extremely expensive, requiring looking through every list of every node in time <span class="math inline"><em>O</em>(<em>n</em> + <em>m</em>)</span>, where <span class="math inline"><em>m</em></span>
+ is the total number of edges, although if this is something we actually
+ need to do often we can store a second copy of the graph with the edges
+ reversed.</p>
+<p>Adjacency lists are thus most useful when we mostly want to enumerate
+ outgoing edges of each node. This is common in search tasks, where we
+want to find a path from one node to another or compute the distances
+between pairs of nodes. If other operations are important, we can
+optimize them by augmenting the adjacency list representation; for
+example, using sorted arrays for the adjacency lists reduces the cost of
+ edge existence testing to <span class="math inline"><em>O</em>(log(<em>d</em><sup>+</sup>(<em>u</em>)))</span>, and adding a second copy of the graph with reversed edges lets us find all predecessors of u in <span class="math inline"><em>O</em>(<em>d</em><sup>−</sup>(<em>u</em>))</span> time, where <span class="math inline"><em>d</em><sup>−</sup>(<em>u</em>)</span> is <span class="math inline"><em>u</em></span>'s in-degree.</p>
+<p>Adjacency lists also require much less space than adjacency matrices for sparse graphs: <span class="math inline"><em>O</em>(<em>n</em> + <em>m</em>)</span> vs <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span> for adjacency matrices. For this reason adjacency lists are more commonly used than adjacency matrices.</p>
+<h5 id="An_implementation"><span class="header-section-number">5.12.4.2.1</span> An implementation</h5>
+<p>Here is an implementation of a basic graph type using adjacency lists.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* basic directed graph type */</span>
+
+<span class="kw">typedef</span> <span class="kw">struct</span> graph *Graph;
+
+<span class="co">/* create a new graph with n vertices labeled 0..n-1 and no edges */</span>
+Graph graphCreate(<span class="dt">int</span> n);
+
+<span class="co">/* free all space used by graph */</span>
+<span class="dt">void</span> graphDestroy(Graph);
+
+<span class="co">/* add an edge to an existing graph */</span>
+<span class="co">/* doing this more than once may have unpredictable results */</span>
+<span class="dt">void</span> graphAddEdge(Graph, <span class="dt">int</span> source, <span class="dt">int</span> sink);
+
+<span class="co">/* return the number of vertices/edges in the graph */</span>
+<span class="dt">int</span> graphVertexCount(Graph);
+<span class="dt">int</span> graphEdgeCount(Graph);
+
+<span class="co">/* return the out-degree of a vertex */</span>
+<span class="dt">int</span> graphOutDegree(Graph, <span class="dt">int</span> source);
+
+<span class="co">/* return 1 if edge (source, sink) exists), 0 otherwise */</span>
+<span class="dt">int</span> graphHasEdge(Graph, <span class="dt">int</span> source, <span class="dt">int</span> sink);
+
+<span class="co">/* invoke f on all edges (u,v) with source u */</span>
+<span class="co">/* supplying data as final parameter to f */</span>
+<span class="co">/* no particular order is guaranteed */</span>
+<span class="dt">void</span> graphForeach(Graph g, <span class="dt">int</span> source,
+ <span class="dt">void</span> (*f)(Graph g, <span class="dt">int</span> source, <span class="dt">int</span> sink, <span class="dt">void</span> *data),
+ <span class="dt">void</span> *data);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/graphs/graph.h" class="uri">examples/graphs/graph.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "graph.h"</span>
+
+<span class="co">/* basic directed graph type */</span>
+<span class="co">/* the implementation uses adjacency lists</span>
+<span class="co"> * represented as variable-length arrays */</span>
+
+<span class="co">/* these arrays may or may not be sorted: if one gets long enough</span>
+<span class="co"> * and you call graphHasEdge on its source, it will be */</span>
+
+<span class="kw">struct</span> graph {
+ <span class="dt">int</span> n; <span class="co">/* number of vertices */</span>
+ <span class="dt">int</span> m; <span class="co">/* number of edges */</span>
+ <span class="kw">struct</span> successors {
+ <span class="dt">int</span> d; <span class="co">/* number of successors */</span>
+ <span class="dt">int</span> len; <span class="co">/* number of slots in array */</span>
+ <span class="dt">int</span> isSorted; <span class="co">/* true if list is already sorted */</span>
+ <span class="dt">int</span> list[]; <span class="co">/* actual list of successors starts here */</span>
+ } *alist[];
+};
+
+<span class="co">/* create a new graph with n vertices labeled 0..n-1 and no edges */</span>
+Graph
+graphCreate(<span class="dt">int</span> n)
+{
+ Graph g;
+ <span class="dt">int</span> i;
+
+ g = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> graph) + <span class="kw">sizeof</span>(<span class="kw">struct</span> successors *) * n);
+ assert(g);
+
+ g-&gt;n = n;
+ g-&gt;m = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ g-&gt;alist[i] = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> successors));
+ assert(g-&gt;alist[i]);
+
+ g-&gt;alist[i]-&gt;d = <span class="dv">0</span>;
+ g-&gt;alist[i]-&gt;len = <span class="dv">0</span>;
+ g-&gt;alist[i]-&gt;isSorted= <span class="dv">1</span>;
+ }
+
+ <span class="kw">return</span> g;
+}
+
+<span class="co">/* free all space used by graph */</span>
+<span class="dt">void</span>
+graphDestroy(Graph g)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; g-&gt;n; i++) free(g-&gt;alist[i]);
+ free(g);
+}
+
+<span class="co">/* add an edge to an existing graph */</span>
+<span class="dt">void</span>
+graphAddEdge(Graph g, <span class="dt">int</span> u, <span class="dt">int</span> v)
+{
+ assert(u &gt;= <span class="dv">0</span>);
+ assert(u &lt; g-&gt;n);
+ assert(v &gt;= <span class="dv">0</span>);
+ assert(v &lt; g-&gt;n);
+
+ <span class="co">/* do we need to grow the list? */</span>
+ <span class="kw">while</span>(g-&gt;alist[u]-&gt;d &gt;= g-&gt;alist[u]-&gt;len) {
+ g-&gt;alist[u]-&gt;len = g-&gt;alist[u]-&gt;len * <span class="dv">2</span> + <span class="dv">1</span>; <span class="co">/* +1 because it might have been 0 */</span>
+ g-&gt;alist[u] =
+ realloc(g-&gt;alist[u],
+ <span class="kw">sizeof</span>(<span class="kw">struct</span> successors) + <span class="kw">sizeof</span>(<span class="dt">int</span>) * g-&gt;alist[u]-&gt;len);
+ }
+
+ <span class="co">/* now add the new sink */</span>
+ g-&gt;alist[u]-&gt;list[g-&gt;alist[u]-&gt;d++] = v;
+ g-&gt;alist[u]-&gt;isSorted = <span class="dv">0</span>;
+
+ <span class="co">/* bump edge count */</span>
+ g-&gt;m++;
+}
+
+<span class="co">/* return the number of vertices in the graph */</span>
+<span class="dt">int</span>
+graphVertexCount(Graph g)
+{
+ <span class="kw">return</span> g-&gt;n;
+}
+
+<span class="co">/* return the number of vertices in the graph */</span>
+<span class="dt">int</span>
+graphEdgeCount(Graph g)
+{
+ <span class="kw">return</span> g-&gt;m;
+}
+
+<span class="co">/* return the out-degree of a vertex */</span>
+<span class="dt">int</span>
+graphOutDegree(Graph g, <span class="dt">int</span> source)
+{
+ assert(source &gt;= <span class="dv">0</span>);
+ assert(source &lt; g-&gt;n);
+
+ <span class="kw">return</span> g-&gt;alist[source]-&gt;d;
+}
+
+<span class="co">/* when we are willing to call bsearch */</span>
+<span class="ot">#define BSEARCH_THRESHOLD (10)</span>
+
+<span class="dt">static</span> <span class="dt">int</span>
+intcmp(<span class="dt">const</span> <span class="dt">void</span> *a, <span class="dt">const</span> <span class="dt">void</span> *b)
+{
+ <span class="kw">return</span> *((<span class="dt">const</span> <span class="dt">int</span> *) a) - *((<span class="dt">const</span> <span class="dt">int</span> *) b);
+}
+
+<span class="co">/* return 1 if edge (source, sink) exists), 0 otherwise */</span>
+<span class="dt">int</span>
+graphHasEdge(Graph g, <span class="dt">int</span> source, <span class="dt">int</span> sink)
+{
+ <span class="dt">int</span> i;
+
+ assert(source &gt;= <span class="dv">0</span>);
+ assert(source &lt; g-&gt;n);
+ assert(sink &gt;= <span class="dv">0</span>);
+ assert(sink &lt; g-&gt;n);
+
+ <span class="kw">if</span>(graphOutDegree(g, source) &gt;= BSEARCH_THRESHOLD) {
+ <span class="co">/* make sure it is sorted */</span>
+ <span class="kw">if</span>(! g-&gt;alist[source]-&gt;isSorted) {
+ qsort(g-&gt;alist[source]-&gt;list,
+ g-&gt;alist[source]-&gt;d,
+ <span class="kw">sizeof</span>(<span class="dt">int</span>),
+ intcmp);
+ }
+
+ <span class="co">/* call bsearch to do binary search for us */</span>
+ <span class="kw">return</span>
+ bsearch(&amp;sink,
+ g-&gt;alist[source]-&gt;list,
+ g-&gt;alist[source]-&gt;d,
+ <span class="kw">sizeof</span>(<span class="dt">int</span>),
+ intcmp)
+ != <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ <span class="co">/* just do a simple linear search */</span>
+ <span class="co">/* we could call lfind for this, but why bother? */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; g-&gt;alist[source]-&gt;d; i++) {
+ <span class="kw">if</span>(g-&gt;alist[source]-&gt;list[i] == sink) <span class="kw">return</span> <span class="dv">1</span>;
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+}
+
+<span class="co">/* invoke f on all edges (u,v) with source u */</span>
+<span class="co">/* supplying data as final parameter to f */</span>
+<span class="dt">void</span>
+graphForeach(Graph g, <span class="dt">int</span> source,
+ <span class="dt">void</span> (*f)(Graph g, <span class="dt">int</span> source, <span class="dt">int</span> sink, <span class="dt">void</span> *data),
+ <span class="dt">void</span> *data)
+{
+ <span class="dt">int</span> i;
+
+ assert(source &gt;= <span class="dv">0</span>);
+ assert(source &lt; g-&gt;n);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; g-&gt;alist[source]-&gt;d; i++) {
+ f(g, source, g-&gt;alist[source]-&gt;list[i], data);
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/graphs/graph.c" class="uri">examples/graphs/graph.c</a>
+</div>
+<p>And here is some test code: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/graphs/graphTest.c">graphTest.c</a>.</p>
+<h4 id="Implicit_representations"><span class="header-section-number">5.12.4.3</span> Implicit representations</h4>
+<p>For some graphs, it may not make sense to represent them explicitly. An example might be the word-search graph from <a href="http://www.cs.yale.edu/homes/aspnes/pinewiki/CS223%282f%292005%282f%29Assignments%282f%29HW10.html">CS223/2005/Assignments/HW10</a>,
+ which consists of all words in a dictionary with an edge between any
+two words that differ only by one letter. In such a case, rather than
+building an explicit data structure containing all the edges, we might
+generate edges as needed when computing the neighbors of a particular
+vertex. This gives us an implicit or procedural representation of a
+graph.</p>
+<p>Implicit representations require the ability to return a vector or
+list of values from the neighborhood-computing function. There are
+various way to do this, of which the most sophisticated might be to use
+an <a href="#iterators">iterator</a>.</p>
+<h3 id="graphSearch"><span class="header-section-number">5.12.5</span> Searching for paths in a graph</h3>
+<p>A <strong>path</strong> is a sequence of vertices <span class="math inline"><em>v</em><sub>1</sub>, <em>v</em><sub>2</sub>, …<em>v</em><sub><em>k</em></sub></span> where each pair <span class="math inline">(<em>v</em><sub><em>i</em></sub>, <em>v</em><sub><em>i</em> + 1</sub>)</span> is an edge. Often we want to find a path from a source vertex <span class="math inline"><em>s</em></span> to a target vertex <span class="math inline"><em>t</em></span>, or more generally to detect which vertices are reachable from a given source vertex <span class="math inline"><em>s</em></span>.
+ We can solve these problems by using any of several standard graph
+search algorithms, of which the simplest and most commonly used are <strong>depth-first search</strong> and <strong>breadth-first search</strong>.</p>
+<p>Both of these search algorithms are a special case of a more general
+algorithm for growing a directed tree in a graph rooted at a given node <span class="math inline"><em>s</em></span>. Here we are using <em>tree</em> as a graph theorist would, to mean any set of <span class="math inline"><em>k</em></span> nodes joined by <span class="math inline"><em>k</em> − 1</span>
+ edges. This is similar to trees used in data structures except that
+there are no limits on the number of children a node can have and no
+ordering constraints within the tree.</p>
+<p>The general tree-growing algorithm might be described as follows:</p>
+<ol style="list-style-type: decimal">
+<li>Start with a tree consisting of just <span class="math inline"><em>s</em></span>.</li>
+<li>If there is at least one edge that leaves the tree (i.e. goes from a
+ node in the current tree to a node outside the current tree), pick the
+"best" such edge and add it and its sink to the tree.</li>
+<li>Repeat step 2 until no edges leave the tree.</li>
+</ol>
+<p>Practically, steps 2 and 3 are implemented by having some sort of
+data structure that acts as a bucket for unprocessed edges. When a new
+node is added to the tree, all of its outgoing edges are thrown into the
+ bucket. The "best" outgoing edge is obtained by applying some sort of
+pop, dequeue, or delete-min operation to the bucket, depending on which
+it provides; if this edge turns out to be an internal edge of the tree
+(maybe we added its sink after putting it in the bucket), we throw it
+away. Otherwise we mark the edge and its sink as belonging to the tree
+and repeat.</p>
+<p>The output of the generic tree-growing algorithm typically consists of (a) marks on all the nodes that are reachable from <span class="math inline"><em>s</em></span>, and (b) for each such node <span class="math inline"><em>v</em></span>, a parent pointer back to the source of the edge that brought <span class="math inline"><em>v</em></span>
+ into the tree. Often these two values can be combined by using a null
+parent pointer to represent the absence of a mark (this usually requires
+ making the root point to itself so that we know it's in the tree).
+Other values that may be useful are a table showing the order in which
+nodes were added to the tree.</p>
+<p>What kind of tree we get depends on what we use for the
+bucket—specifically, on what edge is returned when we ask for the "best"
+ edge. Two easy cases are:</p>
+<ol style="list-style-type: decimal">
+<li>The bucket is a stack. When we ask for an outgoing edge, we get the
+last edge inserted. This has the effect of running along as far as
+possible through the graph before backtracking, since we always keep
+going from the last node if possible. The resulting algorithm is called <strong>depth-first search</strong> and yields a <strong>depth-first search tree</strong>.
+ If we don't care about the lengths of the paths we consider,
+depth-first search is a perfectly good algorithm for testing
+connectivity. It can also be implemented without any auxiliary data
+structures as a recursive procedure, as long as we don't go so deep as
+to blow out the system stack.</li>
+<li>The bucket is a queue. Now when we ask for an outgoing edge, we get
+the first edge inserted. This favors edges that are close to the root:
+we don't start consider edges from nodes adjacent to the root until we
+have already added all the root's successors to the tree, and similarly
+we don't start considering edges at distance k until we have already
+added all the closer nodes to the tree. This gives <strong>breadth-first search</strong>, which constructs a <strong>shortest-path tree</strong> in which every path from the root to a node in the tree has the minimum length.</li>
+</ol>
+<p>Structurally, these algorithms are almost completely identical;
+indeed, if we organize the stack/queue so that it can pop from both
+ends, we can switch between depth-first search and breadth-first search
+just by choosing which end to pop from.</p>
+<p>Below, we give a [combined implementation](#combinedDFSBFS} of both
+depth-first search and breadth-first search that does precisely this,
+although this is mostly for show. Typical implementations of
+breadth-first search include a further optimization, where we test an
+edge to see if we should add it to the tree (and possibly add it) before
+ inserting into the queue. This gives the same result as the DFS-like
+implementation but only requires <span class="math inline"><em>O</em>(<em>n</em>)</span> space for the queue instead of <span class="math inline"><em>O</em>(<em>m</em>)</span>,
+ with a smaller constant as well since don't need to bother storing
+source edges in the queue. An example of this approach is given
+[below]{#graphSearchImplementation}.</p>
+<p>The running time of any of these algorithms is <em>very</em> fast: we pay <span class="math inline"><em>O</em>(1)</span> per vertex in setup costs and <span class="math inline"><em>O</em>(1)</span> per edge during the search (assuming the input is in adjacency-list form), giving a linear <span class="math inline"><em>O</em>(<em>n</em> + <em>m</em>)</span> total cost. Often it is more expensive to set up the graph in the first place than to run a search on it.</p>
+<h4 id="graphSearchImplementation"><span class="header-section-number">5.12.5.1</span> Implementation of depth-first and breadth-first search</h4>
+<p>Here is a simple implementation of depth-first search, using a
+recursive algorithm, and breadth-first search, using an iterative
+algorithm that maintains a queue of vertices. In both cases the
+algorithm is applied to a sample graph whose vertices are the integers <span class="math inline">0</span> through <span class="math inline"><em>n</em> − 1</span> for some <span class="math inline"><em>n</em></span>, and in which vertex <span class="math inline"><em>x</em></span> has edges to vertices <span class="math inline"><em>x</em>/2</span>, <span class="math inline">3 ⋅ <em>x</em></span>, and <span class="math inline"><em>x</em> + 1</span>, whenever these values are also integers in the range <span class="math inline">0</span> through <span class="math inline"><em>n</em> − 1</span>. For large graphs it may be safer to run an <a href="#combinedDFSBFS">iterative version of DFS</a> that uses an explicit stack instead of a possibly very deep recursion.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;stdint.h&gt;</span>
+
+<span class="kw">typedef</span> <span class="dt">int</span> Vertex;
+
+<span class="ot">#define VERTEX_NULL (-1)</span>
+
+<span class="kw">struct</span> node {
+ Vertex *neighbors; <span class="co">/* array of outgoing edges, terminated by VERTEX_NULL */</span>
+ Vertex parent; <span class="co">/* for search */</span>
+};
+
+<span class="kw">struct</span> graph {
+ size_t n; <span class="co">/* number of vertices */</span>
+ <span class="kw">struct</span> node *v; <span class="co">/* list of vertices */</span>
+};
+
+<span class="dt">void</span>
+graphDestroy(<span class="kw">struct</span> graph *g)
+{
+ Vertex v;
+
+ <span class="kw">for</span>(v = <span class="dv">0</span>; v &lt; g-&gt;n; v++) {
+ free(g-&gt;v[v].neighbors);
+ }
+
+ free(g);
+}
+
+<span class="co">/* this graph has edges from x to x+1, x to 3*x, and x to x/2 (when x is even) */</span>
+<span class="kw">struct</span> graph *
+makeSampleGraph(size_t n)
+{
+ <span class="kw">struct</span> graph *g;
+ Vertex v;
+ <span class="dt">const</span> <span class="dt">int</span> allocNeighbors = <span class="dv">4</span>;
+ <span class="dt">int</span> i;
+
+ g = malloc(<span class="kw">sizeof</span>(*g));
+ assert(g);
+
+ g-&gt;n = n;
+ g-&gt;v = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> node) * n);
+ assert(g-&gt;v);
+
+ <span class="kw">for</span>(v = <span class="dv">0</span>; v &lt; n; v++) {
+ g-&gt;v[v].parent = VERTEX_NULL;
+
+ <span class="co">/* fill in neighbors */</span>
+ g-&gt;v[v].neighbors = malloc(<span class="kw">sizeof</span>(Vertex) * allocNeighbors);
+ i = <span class="dv">0</span>;
+ <span class="kw">if</span>(v % <span class="dv">2</span> == <span class="dv">0</span>) { g-&gt;v[v].neighbors[i++] = v/<span class="dv">2</span>; }
+ <span class="kw">if</span>(<span class="dv">3</span>*v &lt; n) { g-&gt;v[v].neighbors[i++] = <span class="dv">3</span>*v; }
+ <span class="kw">if</span>(v<span class="dv">+1</span> &lt; n) { g-&gt;v[v].neighbors[i++] = v<span class="dv">+1</span>; }
+ g-&gt;v[v].neighbors[i++] = VERTEX_NULL;
+ }
+
+ <span class="kw">return</span> g;
+}
+
+<span class="co">/* output graph in dot format */</span>
+<span class="dt">void</span>
+printGraph(<span class="dt">const</span> <span class="kw">struct</span> graph *g)
+{
+ Vertex u;
+ size_t i;
+
+ puts(<span class="st">"digraph G {"</span>);
+
+ <span class="kw">for</span>(u = <span class="dv">0</span>; u &lt; g-&gt;n; u++) {
+ <span class="kw">for</span>(i = <span class="dv">0</span>; g-&gt;v[u].neighbors[i] != VERTEX_NULL; i++) {
+ printf(<span class="st">"%d -&gt; %d;</span><span class="ch">\n</span><span class="st">"</span>, u, g-&gt;v[u].neighbors[i]);
+ }
+ }
+
+ puts(<span class="st">"}"</span>);
+}
+
+<span class="co">/* reconstruct path back to root from u */</span>
+<span class="dt">void</span>
+printPath(<span class="dt">const</span> <span class="kw">struct</span> graph *g, Vertex u)
+{
+ <span class="kw">do</span> {
+ printf(<span class="st">" %d"</span>, u);
+ u = g-&gt;v[u].parent;
+ } <span class="kw">while</span>(g-&gt;v[u].parent != u);
+}
+
+<span class="co">/* print the tree in dot format */</span>
+<span class="dt">void</span>
+printTree(<span class="dt">const</span> <span class="kw">struct</span> graph *g)
+{
+ Vertex u;
+
+ puts(<span class="st">"digraph G {"</span>);
+
+ <span class="kw">for</span>(u = <span class="dv">0</span>; u &lt; g-&gt;n; u++) {
+ <span class="kw">if</span>(g-&gt;v[u].parent != VERTEX_NULL) {
+ printf(<span class="st">"%d -&gt; %d;</span><span class="ch">\n</span><span class="st">"</span>, u, g-&gt;v[u].parent);
+ }
+ }
+
+ puts(<span class="st">"}"</span>);
+}
+
+<span class="co">/* compute DFS tree starting at root */</span>
+<span class="co">/* this uses a recursive algorithm and will not work on large graphs! */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+dfsHelper(<span class="kw">struct</span> graph *g, Vertex parent, Vertex child)
+{
+ <span class="dt">int</span> i;
+ Vertex neighbor;
+
+ <span class="kw">if</span>(g-&gt;v[child].parent == VERTEX_NULL) {
+ g-&gt;v[child].parent = parent;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; (neighbor = g-&gt;v[child].neighbors[i]) != VERTEX_NULL; i++) {
+ dfsHelper(g, child, neighbor);
+ }
+ }
+}
+
+<span class="dt">void</span>
+dfs(<span class="kw">struct</span> graph *g, Vertex root)
+{
+ dfsHelper(g, root, root);
+}
+
+<span class="co">/* compute BFS tree starting at root */</span>
+<span class="dt">void</span>
+bfs(<span class="kw">struct</span> graph *g, Vertex root)
+{
+ Vertex *q;
+ <span class="dt">int</span> head; <span class="co">/* deq from here */</span>
+ <span class="dt">int</span> tail; <span class="co">/* enq from here */</span>
+ Vertex current;
+ Vertex nbr;
+ <span class="dt">int</span> i;
+
+ q = malloc(<span class="kw">sizeof</span>(Vertex) * g-&gt;n);
+ assert(q);
+
+ head = tail = <span class="dv">0</span>;
+
+ <span class="co">/* push root onto q */</span>
+ g-&gt;v[root].parent = root;
+ q[tail++] = root;
+
+ <span class="kw">while</span>(head &lt; tail) {
+ current = q[head++];
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; (nbr = g-&gt;v[current].neighbors[i]) != VERTEX_NULL; i++) {
+ <span class="kw">if</span>(g-&gt;v[nbr].parent == VERTEX_NULL) {
+ <span class="co">/* haven't seen this guy */</span>
+ <span class="co">/* push it */</span>
+ g-&gt;v[nbr].parent = current;
+ q[tail++] = nbr;
+ }
+ }
+ }
+
+ free(q);
+}
+
+
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> n;
+ <span class="kw">struct</span> graph *g;
+
+ <span class="kw">if</span>(argc != <span class="dv">3</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s action n</span><span class="ch">\n</span><span class="st">where action =</span><span class="ch">\n</span><span class="st"> g - print graph</span><span class="ch">\n</span><span class="st"> d - print dfs tree</span><span class="ch">\n</span><span class="st"> b - print bfs tree</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ n = atoi(argv[<span class="dv">2</span>]);
+
+ g = makeSampleGraph(n);
+
+ <span class="kw">switch</span>(argv[<span class="dv">1</span>][<span class="dv">0</span>]) {
+ <span class="kw">case</span> 'g':
+ printGraph(g);
+ <span class="kw">break</span>;
+ <span class="kw">case</span> 'd':
+ dfs(g, <span class="dv">0</span>);
+ printTree(g);
+ <span class="kw">break</span>;
+ <span class="kw">case</span> 'b':
+ bfs(g, <span class="dv">0</span>);
+ printTree(g);
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ fprintf(stderr, <span class="st">"%s: unknown action '%c'</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>], argv[<span class="dv">1</span>][<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ graphDestroy(g);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/graphSearch/search.c" class="uri">examples/graphSearch/search.c</a>
+</div>
+<p>The output of the program is either the graph, a DFS tree of the graph rooted at <span class="math inline">0</span>, or a BFS tree of the graph rooted at <span class="math inline">0</span>, in a format suitable for feeding to the <a href="http://graphviz.org/">GraphViz</a> program <code>dot</code>, which draws pictures of graphs.</p>
+<p>Here are the pictures for <span class="math inline"><em>n</em> = 20</span>.</p>
+<div class="figure">
+<img src="" alt="The full graph">
+<p class="caption">The full graph</p>
+</div>
+<div class="figure">
+<img src="" alt="DFS tree">
+<p class="caption">DFS tree</p>
+</div>
+<div class="figure">
+<img src="" alt="BFS tree">
+<p class="caption">BFS tree</p>
+</div>
+<h4 id="combinedDFSBFS"><span class="header-section-number">5.12.5.2</span> Combined implementation of depth-first and breadth-first search</h4>
+<p>These are some older implementations of BFS and DFS that demonstrate
+how both can be written using the same code just by changing the
+behavior of the core data structure. This also demonstrates how to
+construct DFS iteratively; for BFS, the <a href="#graphSearchImplementation">preceding implementation</a> is better in every respect.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Typical usage:</span>
+<span class="co"> *</span>
+<span class="co"> * struct searchInfo *s;</span>
+<span class="co"> * int n;</span>
+<span class="co"> *</span>
+<span class="co"> * s = searchInfoCreate(g);</span>
+<span class="co"> *</span>
+<span class="co"> * n = graph_vertices(g);</span>
+<span class="co"> * for(i = 0; i &lt; n; i++) {</span>
+<span class="co"> * dfs(s, i);</span>
+<span class="co"> * }</span>
+<span class="co"> *</span>
+<span class="co"> * ... use results in s ...</span>
+<span class="co"> *</span>
+<span class="co"> * searchInfoDestroy(s);</span>
+<span class="co"> *</span>
+<span class="co"> */</span>
+
+<span class="co">/* summary information per node for dfs and bfs */</span>
+<span class="co">/* this is not intended to be opaque---user can read it */</span>
+<span class="co">/* (but should not write it!) */</span>
+
+<span class="ot">#define SEARCH_INFO_NULL (-1) </span><span class="co">/* for empty slots */</span>
+
+<span class="kw">struct</span> searchInfo {
+ Graph graph;
+ <span class="dt">int</span> reached; <span class="co">/* count of reached nodes */</span>
+ <span class="dt">int</span> *preorder; <span class="co">/* list of nodes in order first reached */</span>
+ <span class="dt">int</span> *time; <span class="co">/* time[i] == position of node i in preorder list */</span>
+ <span class="dt">int</span> *parent; <span class="co">/* parent in DFS or BFS forest */</span>
+ <span class="dt">int</span> *depth; <span class="co">/* distance from root */</span>
+};
+
+<span class="co">/* allocate and initialize search results structure */</span>
+<span class="co">/* you need to do this before passing it to dfs or bfs */</span>
+<span class="kw">struct</span> searchInfo *searchInfoCreate(Graph g);
+
+<span class="co">/* free searchInfo data---does NOT free graph pointer */</span>
+<span class="dt">void</span> searchInfoDestroy(<span class="kw">struct</span> searchInfo *);
+
+<span class="co">/* perform depth-first search starting at root, updating results */</span>
+<span class="dt">void</span> dfs(<span class="kw">struct</span> searchInfo *results, <span class="dt">int</span> root);
+
+<span class="co">/* perform breadth-first search starting at root, updating results */</span>
+<span class="dt">void</span> bfs(<span class="kw">struct</span> searchInfo *results, <span class="dt">int</span> root);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/graphs/genericSearch.h" class="uri">examples/graphs/genericSearch.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "graph.h"</span>
+<span class="ot">#include "genericSearch.h"</span>
+
+<span class="co">/* create an array of n ints initialized to SEARCH_INFO_NULL */</span>
+<span class="dt">static</span> <span class="dt">int</span> *
+createEmptyArray(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> *a;
+ <span class="dt">int</span> i;
+
+ a = malloc(<span class="kw">sizeof</span>(*a) * n);
+ assert(a);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ a[i] = SEARCH_INFO_NULL;
+ }
+
+ <span class="kw">return</span> a;
+}
+
+<span class="co">/* allocate and initialize search results structure */</span>
+<span class="co">/* you need to do this before passing it to dfs or bfs */</span>
+<span class="kw">struct</span> searchInfo *
+searchInfoCreate(Graph g)
+{
+ <span class="kw">struct</span> searchInfo *s;
+ <span class="dt">int</span> n;
+
+ s = malloc(<span class="kw">sizeof</span>(*s));
+ assert(s);
+
+ s-&gt;graph = g;
+ s-&gt;reached = <span class="dv">0</span>;
+
+ n = graphVertexCount(g);
+
+ s-&gt;preorder = createEmptyArray(n);
+ s-&gt;time = createEmptyArray(n);
+ s-&gt;parent = createEmptyArray(n);
+ s-&gt;depth = createEmptyArray(n);
+
+ <span class="kw">return</span> s;
+}
+
+<span class="co">/* free searchInfo data---does NOT free graph pointer */</span>
+<span class="dt">void</span>
+searchInfoDestroy(<span class="kw">struct</span> searchInfo *s)
+{
+ free(s-&gt;depth);
+ free(s-&gt;parent);
+ free(s-&gt;time);
+ free(s-&gt;preorder);
+ free(s);
+}
+
+<span class="co">/* used inside search routines */</span>
+<span class="kw">struct</span> edge {
+ <span class="dt">int</span> u; <span class="co">/* source */</span>
+ <span class="dt">int</span> v; <span class="co">/* sink */</span>
+};
+
+<span class="co">/* stack/queue */</span>
+<span class="kw">struct</span> queue {
+ <span class="kw">struct</span> edge *e;
+ <span class="dt">int</span> bottom;
+ <span class="dt">int</span> top;
+};
+
+<span class="dt">static</span> <span class="dt">void</span>
+pushEdge(Graph g, <span class="dt">int</span> u, <span class="dt">int</span> v, <span class="dt">void</span> *data)
+{
+ <span class="kw">struct</span> queue *q;
+
+ q = data;
+
+ assert(q-&gt;top &lt; graphEdgeCount(g) + <span class="dv">1</span>);
+
+ q-&gt;e[q-&gt;top].u = u;
+ q-&gt;e[q-&gt;top].v = v;
+ q-&gt;top++;
+}
+
+<span class="co">/* this rather horrible function implements dfs if useQueue == 0 */</span>
+<span class="co">/* and bfs if useQueue == 1 */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+genericSearch(<span class="kw">struct</span> searchInfo *r, <span class="dt">int</span> root, <span class="dt">int</span> useQueue)
+{
+ <span class="co">/* queue/stack */</span>
+ <span class="kw">struct</span> queue q;
+
+ <span class="co">/* edge we are working on */</span>
+ <span class="kw">struct</span> edge cur;
+
+ <span class="co">/* start with empty q */</span>
+ <span class="co">/* we need one space per edge */</span>
+ <span class="co">/* plus one for the fake (root, root) edge */</span>
+ q.e = malloc(<span class="kw">sizeof</span>(*q.e) * (graphEdgeCount(r-&gt;graph) + <span class="dv">1</span>));
+ assert(q.e);
+
+ q.bottom = q.top = <span class="dv">0</span>;
+
+ <span class="co">/* push the root */</span>
+ pushEdge(r-&gt;graph, root, root, &amp;q);
+
+ <span class="co">/* while q.e not empty */</span>
+ <span class="kw">while</span>(q.bottom &lt; q.top) {
+ <span class="kw">if</span>(useQueue) {
+ cur = q.e[q.bottom++];
+ } <span class="kw">else</span> {
+ cur = q.e[--q.top];
+ }
+
+ <span class="co">/* did we visit sink already? */</span>
+ <span class="kw">if</span>(r-&gt;parent[cur.v] != SEARCH_INFO_NULL) <span class="kw">continue</span>;
+
+ <span class="co">/* no */</span>
+ assert(r-&gt;reached &lt; graphVertexCount(r-&gt;graph));
+ r-&gt;parent[cur.v] = cur.u;
+ r-&gt;time[cur.v] = r-&gt;reached;
+ r-&gt;preorder[r-&gt;reached++] = cur.v;
+ <span class="kw">if</span>(cur.u == cur.v) {
+ <span class="co">/* we could avoid this if we were certain SEARCH_INFO_NULL */</span>
+ <span class="co">/* would never be anything but -1 */</span>
+ r-&gt;depth[cur.v] = <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ r-&gt;depth[cur.v] = r-&gt;depth[cur.u] + <span class="dv">1</span>;
+ }
+
+ <span class="co">/* push all outgoing edges */</span>
+ graphForeach(r-&gt;graph, cur.v, pushEdge, &amp;q);
+ }
+
+ free(q.e);
+}
+
+<span class="dt">void</span>
+dfs(<span class="kw">struct</span> searchInfo *results, <span class="dt">int</span> root)
+{
+ genericSearch(results, root, <span class="dv">0</span>);
+}
+
+<span class="dt">void</span>
+bfs(<span class="kw">struct</span> searchInfo *results, <span class="dt">int</span> root)
+{
+ genericSearch(results, root, <span class="dv">1</span>);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/graphs/genericSearch.c" class="uri">examples/graphs/genericSearch.c</a>
+</div>
+<p>And here is some test code: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/graphs/genericSearchTest.c">genericSearchTest.c</a>. You will need to compile <code class="backtick">genericSearchTest.c</code> together with both <code class="backtick">genericSearch.c</code> and <code class="backtick">graph.c</code> to get it to work. This <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/graphs/Makefile">Makefile</a> will do this for you.</p>
+<h4 id="Other_variations_on_the_basic_algorithm"><span class="header-section-number">5.12.5.3</span> Other variations on the basic algorithm</h4>
+<p>Stacks and queues are not the only options for the bucket in the generic search algorithm. Some other choices are:</p>
+<ul>
+<li>A <a href="#priorityQueues">priority queue</a> keyed by edge weights. If the edges have <strong>weights</strong>, the generic tree-builder can be used to find a tree containing <span class="math inline"><em>s</em></span> with minimum total edge weight.<a href="#fn22" class="footnoteRef" id="fnref22"><sup>22</sup></a> The basic idea is to always pull out the lightest edge. The resulting algorithm runs in <span class="math inline"><em>O</em>(<em>n</em> + <em>m</em>log<em>m</em>)</span> time (since each heap operation takes <span class="math inline"><em>O</em>(log<em>m</em>)</span> time), and is known as <strong>Prim's algorithm</strong>. See <a href="http://en.wikipedia.org/wiki/Prim%27s_algorithm" title="WikiPedia">Prim's algorithm</a> for more details.</li>
+<li>A priority queue keyed by path lengths. Here we assume that edges have <strong>lengths</strong>,
+ and we want to build a shortest-path tree where the length of the path
+is no longer just the number of edges it contains but the sum of their
+weights. The basic idea is to keep track of the distance from the root
+to each node in the tree, and assign each edge a key equal to the sum of
+ the distance to its source and its length. The resulting search
+algorithm, known as <strong>Dijkstra's algorithm</strong>, will give a shortest-path tree if all the edge weights are non-negative. See <a href="http://en.wikipedia.org/wiki/Dijkstra%27s_algorithm" title="WikiPedia">Dijkstra's algorithm</a>.</li>
+</ul>
+<h2 id="dynamicProgramming"><span class="header-section-number">5.13</span> Dynamic programming</h2>
+<p><strong>Dynamic programming</strong> is a general-purpose <a href="#algorithmDesignTechniques">algorithm design technique</a> that is most often used to solve <strong>combinatorial optimization</strong> problems, where we are looking for the best possible input to some function chosen from an exponentially large search space.</p>
+<p>There are two parts to dynamic programming. The first part is a programming technique: dynamic programming is essentially <a href="#algorithmDesignTechniquesClassification">divide and conquer</a>
+ run in reverse: we solve a big instance of a problem by breaking it up
+recursively into smaller instances; but instead of carrying out the
+computation recursively from the top down, we start from the bottom with
+ the smallest instances of the problem, solving each increasingly large
+instance in turn and storing the result in a table. The second part is a
+ design principle: in building up our table, we are careful always to
+preserve alternative solutions we may need later, by delaying commitment
+ to particular choices to the extent that we can.</p>
+<p>The bottom-up aspect of dynamic programming is most useful when a
+straightforward recursion would produce many duplicate subproblems. It
+is most efficient when we can enumerate a class of subproblems that
+doesn't include too many extraneous cases that we don't need for our
+original problem.</p>
+<p>To take a simple example, suppose that we want to compute the <span class="math inline"><em>n</em></span>-th Fibonacci number using the defining recurrence</p>
+<ul>
+<li><span class="math inline"><em>F</em>(<em>n</em>)=<em>F</em>(<em>n</em> − 1)+<em>F</em>(<em>n</em> − 2)</span></li>
+<li><span class="math inline"><em>F</em>(1)=<em>F</em>(0)=1</span>.</li>
+</ul>
+<p>A naive approach would simply code the recurrence up directly:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+fib(<span class="dt">int</span> n)
+{
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) {
+ <span class="kw">return</span> <span class="dv">1</span>
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> fib(n<span class="dv">-1</span>) + fib(n<span class="dv">-2</span>);
+ }
+}</code></pre></div>
+<p>The running time of this procedure is easy to compute. The recurrence is</p>
+<ul>
+<li><span class="math inline"><em>T</em>(<em>n</em>)=<em>T</em>(<em>n</em> − 1)+<em>T</em>(<em>n</em> − 2)+<em>Θ</em>(1)</span>,</li>
+</ul>
+<p>whose solution is <span class="math inline"><em>Θ</em>(<em>a</em><sup><em>n</em></sup>)</span> where <span class="math inline"><em>a</em></span> is the golden ratio <span class="math inline">1.6180339887498948482…</span>. This is badly exponential.<a href="#fn23" class="footnoteRef" id="fnref23"><sup>23</sup></a></p>
+<h3 id="Memoization"><span class="header-section-number">5.13.1</span> Memoization</h3>
+<p>The problem is that we keep recomputing values of <code class="backtick">fib</code> that we've already computed. We can avoid this by <strong>memoization</strong>, where we wrap our recursive solution in a <strong>memoizer</strong> that stores previously-computed solutions in a <a href="#hashTables">hash table</a>.
+ Sensible programming languages will let you write a memoizer once and
+apply it to arbitrary recursive functions. In less sensible programming
+languages it is usually easier just to embed the memoization in the
+function definition itself, like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+memoFib(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> ret;
+
+ <span class="kw">if</span>(hashContains(FibHash, n)) {
+ <span class="kw">return</span> hashGet(FibHash, n);
+ } <span class="kw">else</span> {
+ ret = memoFib(n<span class="dv">-1</span>) + memoFib(n<span class="dv">-2</span>);
+ hashPut(FibHash, n, ret);
+ <span class="kw">return</span> ret;
+ }
+}</code></pre></div>
+<p>The assumption here is that <code class="backtick">FibHash</code> is a global hash table that we have initialized to map <code class="backtick">0</code> and <code class="backtick">1</code> to <code class="backtick">1</code>. The total cost of running this procedure is <span class="math inline"><em>O</em>(<em>n</em>)</span>, because <code class="backtick">fib</code> is called at most twice for each value <span class="math inline"><em>k</em></span> in <span class="math inline">0…<em>n</em></span>.</p>
+<p>Memoization is a very useful technique in practice, but it is not
+popular with algorithm designers because computing the running time of a
+ complex memoized procedure is often much more difficult than computing
+the time to fill a nice clean table. The use of a hash table instead of
+an array may also add overhead (and code complexity) that comes out in
+the constant factors. But it is always the case that a memoized
+recursive procedure considers no more subproblems than a table-based
+solution, and it may consider many fewer if we are sloppy about what we
+put in our table (perhaps because we can't easily predict what
+subproblems will be useful).</p>
+<h3 id="Dynamic_programming"><span class="header-section-number">5.13.2</span> Dynamic programming</h3>
+<p>Dynamic programming comes to the rescue. Because we know what smaller
+ cases we have to reduce F(n) to, instead of computing F(n) top-down, we
+ compute it bottom-up, hitting all possible smaller cases and storing
+the results in an array:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+fib2(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> *a;
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> ret;
+
+ <span class="kw">if</span>(n &lt; <span class="dv">2</span>) {
+ <span class="kw">return</span> <span class="dv">1</span>;
+ } <span class="kw">else</span> {
+ a = malloc(<span class="kw">sizeof</span>(*a) * (n<span class="dv">+1</span>));
+ assert(a);
+
+ a[<span class="dv">1</span>] = a[<span class="dv">2</span>] = <span class="dv">1</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">3</span>; i &lt;= n; i++) {
+ a[i] = a[i<span class="dv">-1</span>] + a[i<span class="dv">-2</span>];
+ }
+ }
+
+ ret = a[n];
+ free(a);
+ <span class="kw">return</span> ret;
+}</code></pre></div>
+<p>Notice the recurrence is exactly the same in this version as in our
+original recursive version, except that instead of computing F(n-1) and
+F(n-2) recursively, we just pull them out of the array. This is typical
+of dynamic-programming solutions: often the most tedious editing step in
+ converting a recursive algorithm to dynamic programming is changing
+parentheses to square brackets. As with memoization, the effect of this
+conversion is dramatic; what used to be an exponential-time algorithm is
+ now linear-time.</p>
+<h4 id="More_examples"><span class="header-section-number">5.13.2.1</span> More examples</h4>
+<h5 id="Longest_increasing_subsequence"><span class="header-section-number">5.13.2.1.1</span> Longest increasing subsequence</h5>
+<p>Suppose that we want to compute the <strong>longest increasing subsequence</strong>
+ of an array. This is a sequence, not necessarily contiguous, of
+elements from the array such that each is strictly larger than the one
+before it. Since there are <span class="math inline">2<sup><em>n</em></sup></span> different subsequences of an <span class="math inline"><em>n</em></span>-element array, the brute-force approach of trying all of them might take a while.</p>
+<p>What makes this problem suitable for dynamic programming is that any
+prefix of a longest increasing subsequence is a longest increasing
+subsequence of the part of the array that ends where the prefix ends; if
+ it weren't, we could make the big sequence longer by choosing a longer
+prefix. So to find the longest increasing subsequence of the whole
+array, we build up a table of longest increasing subsequences for each
+initial prefix of the array. At each step, when finding the longest
+increasing subsequence of elements <span class="math inline">0…<em>i</em></span>,
+ we can just scan through all the possible values for the second-to-last
+ element and read the length of the best possible subsequence ending
+there out of the table. When the table is complete, we can scan for the
+best last element and then work backwards to reconstruct the actual
+subsequence.</p>
+<p>This last step requires some explanation. We don't really want to store in <code class="backtick">table[i]</code> the full longest increasing subsequence ending at position <code class="backtick">i</code>,
+ because it may be very big. Instead, we store the index of the
+second-to-last element of this sequence. Since that second-to-last
+element also has a table entry that stores the index of its predecessor,
+ by following the indices we can generate a subsequence of length <span class="math inline"><em>O</em>(<em>n</em>)</span>, even though we only stored <span class="math inline"><em>O</em>(1)</span> pieces of information in each table entry. This is similar to the parent pointer technique used in <a href="#graphSearch">graph search algorithms</a>.</p>
+<p>Here's what the code looks like:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* compute a longest strictly increasing subsequence of an array of ints */</span>
+<span class="co">/* input is array a with given length n */</span>
+<span class="co">/* returns length of LIS */</span>
+<span class="co">/* If the output pointer is non-null, writes LIS to output pointer. */</span>
+<span class="co">/* Caller should provide at least sizeof(int)*n space for output */</span>
+<span class="co">/* If there are multiple LIS's, which one is returned is arbitrary. */</span>
+<span class="dt">unsigned</span> <span class="dt">long</span>
+longest_increasing_subsequence(<span class="dt">const</span> <span class="dt">int</span> a[], <span class="dt">unsigned</span> <span class="dt">long</span> n, <span class="dt">int</span> *output);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/dynamicProgramming/lis/lis.h" class="uri">examples/dynamicProgramming/lis/lis.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "lis.h"</span>
+
+<span class="dt">unsigned</span> <span class="dt">long</span>
+longest_increasing_subsequence(<span class="dt">const</span> <span class="dt">int</span> a[], <span class="dt">unsigned</span> <span class="dt">long</span> n, <span class="dt">int</span> *output)
+{
+ <span class="kw">struct</span> lis_data {
+ <span class="dt">unsigned</span> <span class="dt">long</span> length; <span class="co">/* length of LIS ending at this point */</span>
+ <span class="dt">unsigned</span> <span class="dt">long</span> prev; <span class="co">/* previous entry in the LIS ending at this point */</span>
+ } *table;
+
+ <span class="dt">unsigned</span> <span class="dt">long</span> best; <span class="co">/* best entry in table */</span>
+ <span class="dt">unsigned</span> <span class="dt">long</span> scan; <span class="co">/* used to generate output */</span>
+
+ <span class="dt">unsigned</span> <span class="dt">long</span> i;
+ <span class="dt">unsigned</span> <span class="dt">long</span> j;
+ <span class="dt">unsigned</span> <span class="dt">long</span> best_length;
+
+ <span class="co">/* special case for empty table */</span>
+ <span class="kw">if</span>(n == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ table = malloc(<span class="kw">sizeof</span>(*table) * n);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ <span class="co">/* default best is just this element by itself */</span>
+ table[i].length = <span class="dv">1</span>;
+ table[i].prev = n; <span class="co">/* default end-of-list value */</span>
+
+ <span class="co">/* but try all other possibilities */</span>
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; i; j++) {
+ <span class="kw">if</span>(a[j] &lt; a[i] &amp;&amp; table[j].length + <span class="dv">1</span> &gt; table[i].length) {
+ <span class="co">/* we have a winner */</span>
+ table[i].length = table[j].length + <span class="dv">1</span>;
+ table[i].prev = j;
+ }
+ }
+ }
+
+ <span class="co">/* now find the best of the lot */</span>
+ best = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">1</span>; i &lt; n; i++) {
+ <span class="kw">if</span>(table[i].length &gt; table[best].length) {
+ best = i;
+ }
+ }
+
+ <span class="co">/* table[best].length is now our return value */</span>
+ <span class="co">/* save it so that we don't lose it when we free table */</span>
+ best_length = table[best].length;
+
+ <span class="co">/* do we really have to compute the output? */</span>
+ <span class="kw">if</span>(output) {
+ <span class="co">/* yes :-( */</span>
+ scan = best;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; best_length; i++) {
+ assert(scan &gt;= <span class="dv">0</span>);
+ assert(scan &lt; n);
+
+ output[best_length - i - <span class="dv">1</span>] = a[scan];
+
+ scan = table[scan].prev;
+ }
+ }
+
+ free(table);
+
+ <span class="kw">return</span> best_length;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/dynamicProgramming/lis/lis.c" class="uri">examples/dynamicProgramming/lis/lis.c</a>
+</div>
+<p>A sample program that runs <code class="backtick">longest_increasing_subsequence</code> on a list of numbers passed in by <code class="backtick">stdin</code> is given in <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/dynamicProgramming/lis/test_lis.c">test_lis.c</a>. Here is a <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/dynamicProgramming/lis/Makefile">Makefile</a>.</p>
+<p>Implemented like this, the cost of finding an LIS is <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span>,
+ because to compute each entry in the array, we have to search through
+all the previous entries to find the longest path that ends at a value
+less than the current one. This can be improved by using a more clever
+data structure. If we use a binary search tree that stores path keyed by
+ the last value, and augment each node with a field that represents the
+maximum length of any path in the subtree under that node, then we can
+find the longest feasible path that we can append the current node to in
+ <span class="math inline"><em>O</em>(log<em>n</em>)</span> time instead of <span class="math inline"><em>O</em>(<em>n</em>)</span> time. This brings the total cost down to only <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span>.</p>
+<h5 id="All-pairs_shortest_paths"><span class="header-section-number">5.13.2.1.2</span> All-pairs shortest paths</h5>
+<p>Suppose we want to compute the distance between any two points in a graph, where each edge <span class="math inline"><em>u</em><em>v</em></span> has a length <span class="math inline">ℓ<sub><em>u</em></sub><em>v</em></span> (<span class="math inline">+∞</span> for edges not in the graph) and the distance between two vertices <span class="math inline"><em>s</em></span> and t$ is the minimum over all <span class="math inline"><em>s</em></span>–<span class="math inline"><em>t</em></span> paths of the total length of the edges. There are various algorithms for doing this for a particular <span class="math inline"><em>s</em></span> and <span class="math inline"><em>t</em></span>, but there is also a very simple dynamic programming algorithm known as <strong>Floyd-Warshall</strong> that computes the distance between all <span class="math inline"><em>n</em><sup>2</sup></span> pairs of vertices in <span class="math inline"><em>Θ</em>(<em>n</em><sup>3</sup>)</span> time.</p>
+<p>The assumption is that the graph does not contain a <strong>negative cycle</strong>
+ (a cycle with total edge weight less than zero), so that for two
+connected nodes there is always a shortest path that uses each
+intermediate vertex at most once. If a graph does contain a negative
+cycle, the algorithm will detect it by reporting the distance from <span class="math inline"><em>i</em></span> to <span class="math inline"><em>i</em></span> less than zero for some <span class="math inline"><em>i</em></span>.</p>
+<p>Negative cycles don't generally exist in distance graphs (unless you
+have the ability to move faster than the speed of light), but they can
+come up in other contexts. One example would be in currency arbitrage,
+where each node is some currency, the weight of an edge <span class="math inline"><em>u</em><em>v</em></span> is the logarithm of the exchange rate from <span class="math inline"><em>u</em></span> to <span class="math inline"><em>v</em></span>, and the total weight of a path from <span class="math inline"><em>s</em></span> to <span class="math inline"><em>t</em></span> gives the logarithm of the number of units of <span class="math inline"><em>t</em></span> you can get for one unit of <span class="math inline"><em>s</em></span>,
+ since adding the logs along the path corresponds to multiplying all the
+ exchange rates. In this context a negative cycle gives you a way to
+turn a dollar into less than a dollar by running it through various
+other currencies, which is not useful, but a <em>positive cycle</em>
+lets you pay for the supercomputer you bought to find it before anybody
+else did. If we negate all the edge weights, we turn a positive cycle
+into a negative cycle, making a fast algorithm for finding this negative
+ cycle potentially valuable.</p>
+<p>However, if we don't have any negative cycles, the idea is that we
+can create restricted instances of the shortest-path problem by limiting
+ the maximum index of any node used on the path. Let <span class="math inline"><em>L</em>(<em>i</em>, <em>j</em>, <em>k</em>)</span> be the length of a shortest path from <span class="math inline"><em>i</em></span> to <span class="math inline"><em>j</em></span> that uses only the vertices <span class="math inline">0, …, <em>k</em> − 1</span> along the path (not counting the endpoints <span class="math inline"><em>i</em></span> and <span class="math inline"><em>j</em></span>, which can be anything). When <span class="math inline"><em>k</em> = 0</span>, this is just the length of the <span class="math inline"><em>i</em></span>–<span class="math inline"><em>j</em></span> edge, or <span class="math inline">+∞</span> if there is no such edge. So we can start by computing <span class="math inline"><em>L</em>(<em>i</em>, <em>j</em>, 0)</span> for all <span class="math inline"><em>i</em></span>. Now given <span class="math inline"><em>L</em>(<em>i</em>, <em>j</em>, <em>k</em>)</span> for all <span class="math inline"><em>i</em></span> and some <span class="math inline"><em>k</em></span>, we can compute <span class="math inline"><em>L</em>(<em>i</em>, <em>j</em>, <em>k</em> + 1)</span> by observing that any shortest <span class="math inline"><em>i</em></span>–<span class="math inline"><em>j</em></span> path that has intermediate vertices in <span class="math inline">0…<em>k</em></span> either consists of a path with intermediate vertices in <span class="math inline">0…<em>k</em> − 1</span>, or consists of a path from <span class="math inline"><em>i</em></span> to <span class="math inline"><em>k</em></span> followed by a path from <span class="math inline"><em>k</em></span> to <span class="math inline"><em>j</em></span>, where both of these paths have intermediate vertices in <span class="math inline">0…<em>k</em> − 1</span>. So we get</p>
+<ul>
+<li><span class="math inline"><em>L</em>(<em>i</em>, <em>j</em>, <em>k</em> + 1)=min(<em>L</em>(<em>i</em>, <em>j</em>, <em>k</em>),<em>L</em>(<em>i</em>, <em>k</em>, <em>k</em>)+<em>L</em>(<em>k</em>, <em>j</em>, <em>k</em>)</span>.</li>
+</ul>
+<p>Since this takes <span class="math inline"><em>O</em>(1)</span> time to compute if we have previously computed <span class="math inline"><em>L</em>(<em>i</em>, <em>j</em>, <em>k</em>)</span> for all <span class="math inline"><em>i</em></span> and <span class="math inline"><em>j</em></span>, we can fill in the entire table in <span class="math inline"><em>O</em>(<em>n</em><sup>3</sup>)</span> time.</p>
+<p>Implementation details:</p>
+<ul>
+<li>If we want to reconstruct the shortest path in addition to computing its length, we can store the first vertex for each <span class="math inline"><em>i</em></span>–<span class="math inline"><em>j</em></span> path. This will either be (a) the first vertex in the <span class="math inline"><em>i</em></span>–<span class="math inline"><em>j</em></span> path for the previous <span class="math inline"><em>k</em></span>, or (b) the first vertex in the <span class="math inline"><em>i</em></span>–<span class="math inline"><em>k</em></span> path.</li>
+<li>We don't actually need to use a full three-dimensional array. It's enough to store one value for each pair <span class="math inline"><em>i</em>, <em>j</em></span> and let <span class="math inline"><em>k</em></span> be implicit. At each step we let <span class="math inline"><em>L</em>[<em>i</em>][<em>j</em>]</span> be <span class="math inline">min(<em>L</em>[<em>i</em>][<em>j</em>],<em>L</em>[<em>i</em>][<em>k</em>]+<em>L</em>[<em>k</em>][<em>j</em>])</span>. The trick is that we don't care if <span class="math inline"><em>L</em>[<em>i</em>][<em>k</em>]</span> or <span class="math inline"><em>L</em>[<em>k</em>][<em>j</em>]</span> has already been updated, because that will only give us paths with a few extra <span class="math inline"><em>k</em></span> vertices, which won't be the shortest paths anyway assuming no negative cycles.</li>
+</ul>
+<h5 id="longestCommonSubsequence"><span class="header-section-number">5.13.2.1.3</span> Longest common subsequence</h5>
+<p>Given sequences of characters <span class="math inline"><em>v</em></span> and <span class="math inline"><em>w</em></span>, <span class="math inline"><em>v</em></span> is a <em>subsequence</em> of <span class="math inline"><em>w</em></span> if every character in <span class="math inline"><em>v</em></span> appears in <span class="math inline"><em>w</em></span> in the same order. For example, <code class="backtick">aaaaa</code>, <code class="backtick">brac</code>, and <code class="backtick">badar</code> are all subsequences of <code class="backtick">abracadabra</code>, but <code class="backtick">badcar</code> is not. A longest common subsequence (LCS for short) of two sequences <span class="math inline"><em>x</em></span> and <span class="math inline"><em>y</em></span> is the longest sequence that is a subsequence of both: two longest common subsequences of <code class="backtick">abracadabra</code> and <code class="backtick">badcar</code> are <code class="backtick">badar</code> and <code class="backtick">bacar</code>.</p>
+<p>As with longest increasing subsequence, one can find the LCS of two
+sequence by brute force, but it will take even longer. Not only are
+there are <span class="math inline">2<sup><em>n</em></sup></span> subsequences of a sequence of length <span class="math inline"><em>n</em></span>,
+ but checking each subsequence of the first to see if it is also a
+subsequence of the second may take some time. It is better to solve the
+problem using dynamic programming. Having sequences gives an obvious
+linear structure to exploit: the basic strategy will be to compute LCSs
+for increasingly long prefixes of the inputs. But with two sequences we
+will have to consider prefixes of both, which will give us a
+two-dimensional table where rows correspond to prefixes of sequence <span class="math inline"><em>x</em></span> and columns correspond to prefixes of sequence <span class="math inline"><em>y</em></span>.</p>
+<p>The recursive decomposition that makes this technique work looks like this. Let <span class="math inline"><em>L</em>(<em>x</em>, <em>y</em>)</span> be the length of the longest common subsequence of <span class="math inline"><em>x</em></span> and <span class="math inline"><em>y</em></span>, where <span class="math inline"><em>x</em></span> and <span class="math inline"><em>y</em></span> are strings. Let <span class="math inline"><em>a</em></span> and <span class="math inline"><em>b</em></span> be single characters. Then <span class="math inline"><em>L</em>(<em>x</em><em>a</em>, <em>y</em><em>b</em>)</span> is the maximum of:</p>
+<ul>
+<li><span class="math inline"><em>L</em>(<em>x</em>, <em>y</em>)+1</span>, if <span class="math inline"><em>a</em> = <em>b</em></span>,</li>
+<li><span class="math inline"><em>L</em>(<em>x</em><em>a</em>, <em>y</em>)</span>, or</li>
+<li><span class="math inline"><em>L</em>(<em>x</em>, <em>y</em><em>b</em>)</span>.</li>
+</ul>
+<p>The idea is that we either have a new matching character we couldn't
+use before (the first case), or we have an LCS that doesn't use one of <span class="math inline"><em>a</em></span> or <span class="math inline"><em>b</em></span> (the remaining cases). In each case the recursive call to LCS involves a shorter prefix of <span class="math inline"><em>x</em><em>a</em></span> or <span class="math inline"><em>y</em><em>b</em></span>, with an ultimate base case <span class="math inline"><em>L</em>(<em>x</em>, <em>y</em>)=0</span> if at least one of <span class="math inline"><em>x</em></span> or <span class="math inline"><em>y</em></span>
+ is the empty string. So we can fill in these values in a table, as long
+ as we are careful to make sure that the shorter prefixes are always
+filled first. If we are smart about remembering which case applies at
+each step, we can even go back and extract an actual LCS, by stitching
+together to places where <span class="math inline"><em>a</em> = <em>b</em></span>. Here's a short C program that does this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+<span class="ot">#include &lt;limits.h&gt;</span>
+
+<span class="co">/* compute longest common subsequence of argv[1] and argv[2] */</span>
+
+<span class="co">/* computes longest common subsequence of x and y, writes result to lcs */</span>
+<span class="co">/* lcs should be pre-allocated by caller to 1 + minimum length of x or y */</span>
+<span class="dt">void</span>
+longestCommonSubsequence(<span class="dt">const</span> <span class="dt">char</span> *x, <span class="dt">const</span> <span class="dt">char</span> *y, <span class="dt">char</span> *lcs)
+{
+ <span class="dt">int</span> xLen;
+ <span class="dt">int</span> yLen;
+ <span class="dt">int</span> i; <span class="co">/* position in x */</span>
+ <span class="dt">int</span> j; <span class="co">/* position in y */</span>
+
+ xLen = strlen(x);
+ yLen = strlen(y);
+
+ <span class="co">/* best choice at each position */</span>
+ <span class="co">/* length gives length of LCS for these prefixes */</span>
+ <span class="co">/* prev points to previous substring */</span>
+ <span class="co">/* newChar if non-null is new character */</span>
+ <span class="kw">struct</span> choice {
+ <span class="dt">int</span> length;
+ <span class="kw">struct</span> choice *prev;
+ <span class="dt">char</span> newChar;
+ } best[xLen][yLen];
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; xLen; i++) {
+ <span class="kw">for</span>(j = <span class="dv">0</span>; j &lt; yLen; j++) {
+ <span class="co">/* we can always do no common substring */</span>
+ best[i][j].length = <span class="dv">0</span>;
+ best[i][j].prev = <span class="dv">0</span>;
+ best[i][j].newChar = <span class="dv">0</span>;
+
+ <span class="co">/* if we have a match, try adding new character */</span>
+ <span class="co">/* this is always better than the nothing we started with */</span>
+ <span class="kw">if</span>(x[i] == y[j]) {
+ best[i][j].newChar = x[i];
+ <span class="kw">if</span>(i &gt; <span class="dv">0</span> &amp;&amp; j &gt; <span class="dv">0</span>) {
+ best[i][j].length = best[i<span class="dv">-1</span>][j<span class="dv">-1</span>].length + <span class="dv">1</span>;
+ best[i][j].prev = &amp;best[i<span class="dv">-1</span>][j<span class="dv">-1</span>];
+ } <span class="kw">else</span> {
+ best[i][j].length = <span class="dv">1</span>;
+ }
+ }
+
+ <span class="co">/* maybe we can do even better by ignoring a new character */</span>
+ <span class="kw">if</span>(i &gt; <span class="dv">0</span> &amp;&amp; best[i<span class="dv">-1</span>][j].length &gt; best[i][j].length) {
+ <span class="co">/* throw away a character from x */</span>
+ best[i][j].length = best[i<span class="dv">-1</span>][j].length;
+ best[i][j].prev = &amp;best[i<span class="dv">-1</span>][j];
+ best[i][j].newChar = <span class="dv">0</span>;
+ }
+
+ <span class="kw">if</span>(j &gt; <span class="dv">0</span> &amp;&amp; best[i][j<span class="dv">-1</span>].length &gt; best[i][j].length) {
+ <span class="co">/* throw away a character from x */</span>
+ best[i][j].length = best[i][j<span class="dv">-1</span>].length;
+ best[i][j].prev = &amp;best[i][j<span class="dv">-1</span>];
+ best[i][j].newChar = <span class="dv">0</span>;
+ }
+
+ }
+ }
+
+ <span class="co">/* reconstruct string working backwards from best[xLen-1][yLen-1] */</span>
+ <span class="dt">int</span> outPos; <span class="co">/* position in output string */</span>
+ <span class="kw">struct</span> choice *p; <span class="co">/* for chasing linked list */</span>
+
+ outPos = best[xLen<span class="dv">-1</span>][yLen<span class="dv">-1</span>].length;
+ lcs[outPos--] = '\<span class="dv">0</span>';
+
+ <span class="kw">for</span>(p = &amp;best[xLen<span class="dv">-1</span>][yLen<span class="dv">-1</span>]; p; p = p-&gt;prev) {
+ <span class="kw">if</span>(p-&gt;newChar) {
+ assert(outPos &gt;= <span class="dv">0</span>);
+ lcs[outPos--] = p-&gt;newChar;
+ }
+ }
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">if</span>(argc != <span class="dv">3</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s string1 string2</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ <span class="dt">char</span> output[strlen(argv[<span class="dv">1</span>]) + <span class="dv">1</span>];
+
+ longestCommonSubsequence(argv[<span class="dv">1</span>], argv[<span class="dv">2</span>], output);
+
+ puts(output);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/dynamicProgramming/lcs/lcs.c" class="uri">examples/dynamicProgramming/lcs/lcs.c</a>
+</div>
+<p>The whole thing takes <span class="math inline"><em>O</em>(<em>n</em><em>m</em>)</span> time where <span class="math inline"><em>n</em></span> and <span class="math inline"><em>m</em></span> are the lengths of <span class="math inline"><em>A</em></span> and <span class="math inline"><em>B</em></span>.</p>
+<h2 id="randomization"><span class="header-section-number">5.14</span> Randomization</h2>
+<p>Randomization is a fundamental technique in algorithm design, that
+allows programs to run quickly when the average-case behavior of an
+algorithm is better than the worst-case behavior. It is also heavily
+used in games, both in entertainment and gambling. The latter
+application gives the only example I know of a <a href="http://www.zdnet.com/article/comdex-99-the-mysterious-death-of-larry-volk/">programmer killed for writing bad code</a>, which shows how serious good random-number generation is.</p>
+<h3 id="Generating_random_values_in_C"><span class="header-section-number">5.14.1</span> Generating random values in C</h3>
+<p>If you want random values in a C program, there are three typical
+ways of getting them, depending on how good (i.e. uniform, uncorrelated,
+ and unpredictable) you want them to be.</p>
+<h4 id="The_rand_function_from_the_standard_library"><span class="header-section-number">5.14.1.1</span> The <code>rand</code> function from the standard library</h4>
+<p>E.g.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, rand());
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/randomization/randOnce.c" class="uri">examples/randomization/randOnce.c</a>
+</div>
+<p>The <code class="backtick">rand</code> function, declared in <code class="backtick">stdlib.h</code>, returns a random-looking integer in the range 0 to <code class="backtick">RAND_MAX</code> (inclusive) every time you call it. On machines using the GNU C library <code class="backtick">RAND_MAX</code> is equal to <code class="backtick">INT_MAX</code> which is typically <span class="math inline">2<sup>31</sup> − 1</span>, but <code>RAND_MAX</code> may be as small as 32767. There are no particularly strong guarantees about the quality of random numbers that <code class="backtick">rand</code>
+ returns, but it should be good enough for casual use, and it has the
+advantage that as part of the C standard you can assume it is present
+almost everywhere.</p>
+<p>Note that <code class="backtick">rand</code> is a <strong>pseudorandom number generator</strong>:
+ the sequence of values it returns is predictable if you know its
+starting state (and is still predictable from past values in the
+sequence even if you don't know the starting state, if you are clever
+enough). It is also the case that the initial seed is fixed, so that the
+ program above will print the same value every time you run it.</p>
+<p>This is a feature: it permits debugging randomized programs. As John
+von Neumann, who proposed pseudorandom number generators in his 1946
+talk "Various Techniques Used in Connection With Random Digits,"
+explained:</p>
+<blockquote>
+<p>We see then that we could build a physical instrument to feed random
+digits directly into a high-speed computing machine and could have the
+control call for these numbers as needed. The real objection to this
+procedure is the practical need for checking computations. If we suspect
+ that a calculation is wrong, almost any reasonable check involves
+repeating something done before. At that point the introduction of new
+random numbers would be intolerable.</p>
+</blockquote>
+<h5 id="supplying-a-seed-with-srand"><span class="header-section-number">5.14.1.1.1</span> Supplying a seed with <code>srand</code></h5>
+<p>If you want to get different sequences, you need to <strong>seed</strong> the random number generator using <code class="backtick">srand</code>. A typical use might be:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;time.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ srand(time(<span class="dv">0</span>));
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, rand());
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/randomization/srandFromTime.c" class="uri">examples/randomization/srandFromTime.c</a>
+</div>
+<p>Here <code class="backtick">time(0)</code> returns the number of
+seconds since the epoch (00:00:00 UTC, January 1, 1970, for POSIX
+systems, not counting leap seconds). Note that this still might give
+repeated values if you run it twice in the same second, and it's
+extremely dangerous if you expect to distribute your code to a lot of
+people who want different results, since two of your users <em>are</em> likely to run it twice in the same second. See the discussion of <code class="backtick">/dev/urandom</code> below for a better method.</p>
+<h4 id="Better_pseudorandom_number_generators"><span class="header-section-number">5.14.1.2</span> Better pseudorandom number generators</h4>
+<p>There has been quite a bit of research on pseudorandom number
+generators over the years, and much better pseudorandom number
+generators than <code class="backtick">rand</code> are available. The current champion for simulation work is <strong>Mersenne Twister</strong>, which runs about 4 times faster than <code class="backtick">rand</code> in its standard C implementation and passes a much wider battery of statistical tests. Its English-language home page is at <a href="http://www.math.sci.hiroshima-u.ac.jp/%7Em-mat/MT/emt.html" class="uri">http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html</a>. As with <code class="backtick">rand</code>, you still need to provide an initial seed value.</p>
+<p>There are also <strong>cryptographically secure pseudorandom number generators</strong>, of which the most famous is <a href="http://en.wikipedia.org/wiki/Blum_Blum_Shub" title="WikiPedia">Blum Blum Shub</a>.
+ These cannot be predicted based on their output if seeded with a true
+random value (under certain cryptographic assumptions: hardness of
+factoring for Blum Blum Shub). Unfortunately, cryptographic PRNGs are
+usually too slow for day-to-day use.</p>
+<h4 id="Random_numbers_without_the_pseudo"><span class="header-section-number">5.14.1.3</span> Random numbers without the pseudo</h4>
+<p>If you really need actual random numbers and are on a Linux or BSD-like operating system, you can use the special device files <code class="backtick">/dev/random</code> and <code class="backtick">/dev/urandom</code>.
+ These can be opened for reading like ordinary files, but the values
+read from them are a random sequence of bytes (including null
+characters). A typical use might be:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">unsigned</span> <span class="dt">int</span> randval;
+ FILE *f;
+
+ f = fopen(<span class="st">"/dev/random"</span>, <span class="st">"r"</span>);
+ fread(&amp;randval, <span class="kw">sizeof</span>(randval), <span class="dv">1</span>, f);
+ fclose(f);
+
+ printf(<span class="st">"%u</span><span class="ch">\n</span><span class="st">"</span>, randval);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/randomization/devRandom.c" class="uri">examples/randomization/devRandom.c</a>
+</div>
+<p>(A similar construction can also be used to obtain a better initial seed for <code class="backtick">srand</code> than <code class="backtick">time(0)</code>.)</p>
+<p>Both <code class="backtick">/dev/random</code> and <code class="backtick">/dev/urandom</code>
+ derive their random bits from physically random properties of the
+computer, like time between keystrokes or small variations in hard disk
+rotation speeds. The difference between the two is that <code class="backtick">/dev/urandom</code>
+ will always give you some random-looking bits, even if it has to
+generate extra ones using a cryptographic pseudo-random number
+generator, while <code class="backtick">/dev/random</code> will only
+give you bits that it is confident are in fact random. Since your
+computer only generates a small number of genuinely random bits per
+second, this may mean that <code class="backtick">/dev/random</code> will exhaust its pool if read too often. In this case, a read on <code class="backtick">/dev/random</code> will block (just like reading a terminal with no input on it) until the pool has filled up again.</p>
+<p>Neither <code class="backtick">/dev/random</code> nor <code class="backtick">/dev/urandom</code>
+ is known to be secure against a determined attacker, but they are about
+ the best you can do without resorting to specialized hardware.</p>
+<h4 id="RANDMAX"><span class="header-section-number">5.14.1.4</span> Range issues</h4>
+<p>The problem with <code class="backtick">rand</code> is that getting a uniform value between 0 and 2<sup>31</sup>-1 may not be what you want. It could be that <code class="backtick">RAND_MAX</code> is be too small; in this case, you may have to call <code class="backtick">rand</code> more than once and paste together the results. But there can be problems with <code class="backtick">RAND_MAX</code> even if it is bigger than the values you want.</p>
+<p>For example, suppose you want to simulate a die roll for your video
+craps machine, but you don't want to get whacked by Johnny "The
+Debugger" when the Nevada State Gaming Commission notices that 6-6 is
+coming up slightly less often than it's supposed to. A natural thing to
+try would be to take the output of <code class="backtick">rand</code> mod 6:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span> d6(<span class="dt">void</span>) {
+ <span class="kw">return</span> rand() % <span class="dv">6</span> + <span class="dv">1</span>;
+}</code></pre></div>
+<p>The problem here is that there are <span class="math inline">2<sup>31</sup></span> outputs from rand, and 6 doesn't divide <span class="math inline">2<sup>31</sup></span>.
+ So 1 and 2 are slightly more likely to come up than 3, 4, 5, or 6. This
+ can be particularly noticeable if we want a uniform variable from a
+larger range, e.g. <span class="math inline">[0…⌊(2/3)⋅2<sup>31</sup>⌋]</span>.</p>
+<p>We can avoid this with a technique called <strong>rejection sampling</strong>, where we reject excess parts of the output range of <code class="backtick">rand</code>.
+ For rolling a die, the trick is to reject anything in the last extra
+bit of the range that is left over after the largest multiple of the die
+ size. Here's a routine that does this, returning a uniform value in the
+ range 0 to n-1 for any positive n, together with a program that
+demonstrates its use for rolling dice:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;time.h&gt;</span>
+
+<span class="co">/* return a uniform random value in the range 0..n-1 inclusive */</span>
+<span class="dt">int</span>
+randRange(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> limit;
+ <span class="dt">int</span> r;
+
+ limit = RAND_MAX - (RAND_MAX % n);
+
+ <span class="kw">while</span>((r = rand()) &gt;= limit);
+
+ <span class="kw">return</span> r % n;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+
+ srand(time(<span class="dv">0</span>));
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">40</span>; i++) {
+ printf(<span class="st">"%d "</span>, randRange(<span class="dv">6</span>)+<span class="dv">1</span>);
+ }
+
+ putchar(<span class="ch">'\n'</span>);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/randomization/randRange.c" class="uri">examples/randomization/randRange.c</a>
+</div>
+<p>More generally, rejection sampling can be used to get random values
+with particular properties, where it's hard to generate a value with
+that property directly. Here's a program that generates random primes:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;time.h&gt;</span>
+
+<span class="co">/* return 1 if n is prime */</span>
+<span class="dt">int</span>
+isprime(<span class="dt">int</span> n)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(n % <span class="dv">2</span> == <span class="dv">0</span> || n == <span class="dv">1</span>) { <span class="kw">return</span> <span class="dv">0</span>; }
+
+ <span class="kw">for</span>(i = <span class="dv">3</span>; i*i &lt;= n; i += <span class="dv">2</span>) {
+ <span class="kw">if</span>(n % i == <span class="dv">0</span>) { <span class="kw">return</span> <span class="dv">0</span>; }
+ }
+
+ <span class="kw">return</span> <span class="dv">1</span>;
+}
+
+<span class="co">/* return a uniform random value in the range 0..n-1 inclusive */</span>
+<span class="dt">int</span>
+randPrime(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> r;
+
+ <span class="co">/* extra parens avoid warnings */</span>
+ <span class="kw">while</span>(!isprime((r = rand())));
+
+ <span class="kw">return</span> r;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+
+ srand(time(<span class="dv">0</span>));
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">10</span>; i++) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, randPrime());
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/randomization/randPrime.c" class="uri">examples/randomization/randPrime.c</a>
+</div>
+<p>One temptation to avoid is to re-use your random values. If, for
+example, you try to find a random prime by picking a random x and trying
+ x, x+1, x+2, etc., until you hit a prime, some primes are more likely
+to come up than others.</p>
+<h3 id="Randomized_algorithms"><span class="header-section-number">5.14.2</span> Randomized algorithms</h3>
+<p>Randomized algorithms typically make random choices to get good
+average worst-case performance in situations where a similar
+deterministic algorithm would fail badly for some inputs but perform
+well on most inputs. The idea is that the randomization scrambles the
+input space so that the adversary can't predict which possible input
+values will be bad for us. This still allows him to make trouble if he
+gets lucky, but most of the time our algorithm should run quickly.</p>
+<h4 id="Randomized_search"><span class="header-section-number">5.14.2.1</span> Randomized search</h4>
+<p>This is essentially rejection sampling in disguise. Suppose that you
+want to find one of many needles in a large haystack. One approach is to
+ methodically go through the straws/needles one at a time until you find
+ a needle. But you may find that your good friend the adversary has put
+all the needles at the end of your list. Picking candidate at random is
+likely to hit a needle faster if there are many of them.</p>
+<p>Here is a (silly) routine that quickly finds a number whose high-order bits match a particular pattern:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+matchBits(<span class="dt">int</span> pattern)
+{
+ <span class="dt">int</span> r;
+
+ <span class="kw">while</span>(((r = rand()) &amp; <span class="bn">0x70000000</span>) != (pattern &amp; <span class="bn">0x70000000</span>));
+
+ <span class="kw">return</span> r;
+}</code></pre></div>
+<p>This will find a winning value in 8 tries on average. In contrast,
+this deterministic version will take a lot longer for nonzero patterns:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+matchBitsDeterministic(<span class="dt">int</span> pattern)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; (i &amp; <span class="bn">0x70000000</span>) != (pattern &amp; <span class="bn">0x70000000</span>); i++);
+
+ <span class="kw">return</span> i;
+}</code></pre></div>
+<p>The downside of the randomized approach is that it's hard to tell
+when to quit if there are no matches; if we stop after some fixed number
+ of trials, we get a <a href="http://en.wikipedia.org/wiki/Monte_Carlo_algorithm" title="WikiPedia">Monte Carlo algorithm</a>
+ that may give the wrong answer with small probability. The usual
+solution is to either accept a small probability of failure, or
+interleave a deterministic backup algorithm that always works. The
+latter approach gives a <a href="http://en.wikipedia.org/wiki/Las_Vegas_algorithm" title="WikiPedia">Las Vegas algorithm</a> whose running time is variable but whose correctness is not.</p>
+<h4 id="quicksort"><span class="header-section-number">5.14.2.2</span> Quickselect and quicksort</h4>
+<p><strong>Quickselect</strong>, or <strong>Hoare's FIND</strong> (Hoare, C. A. R. Algorithm 65: FIND, CACM 4(7):321–322, July 1961), is an algorithm for quickly finding the <span class="math inline"><em>k</em></span>-th largest element in an unsorted array of <span class="math inline"><em>n</em></span> elements. It runs in <span class="math inline"><em>O</em>(<em>n</em>)</span>
+ time on average, which is the best one can hope for (we have to look at
+ every element of the array to be sure we didn't miss a small one that
+changes our answer) and better than the <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span> time we get if we sort the array first using a comparison-based sorting algorithm.</p>
+<p>The idea is to pick a random pivot and divide the input into two
+piles, each of which is likely to be roughly a constant fraction of the
+size of the original input.<a href="#fn24" class="footnoteRef" id="fnref24"><sup>24</sup></a>
+ It takes O(n) time to split the input up (we have to compare each
+element to the pivot once), and in the recursive calls this gives a
+geometric series. We can even do the splitting up in place if we are
+willing to reorder the elements of our original array.</p>
+<p>If we recurse into both piles instead of just one, we get <strong>quicksort</strong>
+ (Hoare, C. A. R. Algorithm 64: Quicksort. CACM 4(7):321, July 1961), a
+very fast and simple comparison-based sorting algorithm. Here is an
+implementation of both algorithms:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="co">/* reorder an array to put elements &lt;= pivot</span>
+<span class="co"> * before elements &gt; pivot.</span>
+<span class="co"> * Returns number of elements &lt;= pivot */</span>
+<span class="dt">static</span> <span class="dt">int</span>
+splitByPivot(<span class="dt">int</span> n, <span class="dt">int</span> *a, <span class="dt">int</span> pivot)
+{
+ <span class="dt">int</span> lo;
+ <span class="dt">int</span> hi;
+ <span class="dt">int</span> temp; <span class="co">/* for swapping */</span>
+
+ assert(n &gt;= <span class="dv">0</span>);
+
+ <span class="co">/* Dutch Flag algorithm */</span>
+ <span class="co">/* swap everything &lt;= pivot to bottom of array */</span>
+ <span class="co">/* invariant is i &lt; lo implies a[i] &lt;= pivot */</span>
+ <span class="co">/* and i &gt; hi implies a[i] &gt; pivot */</span>
+ lo = <span class="dv">0</span>;
+ hi = n<span class="dv">-1</span>;
+
+ <span class="kw">while</span>(lo &lt;= hi) {
+ <span class="kw">if</span>(a[lo] &lt;= pivot) {
+ lo++;
+ } <span class="kw">else</span> {
+ temp = a[hi];
+ a[hi--] = a[lo];
+ a[lo] = temp;
+ }
+ }
+
+ <span class="kw">return</span> lo;
+}
+
+<span class="co">/* find the k-th smallest element of an n-element array */</span>
+<span class="co">/* may reorder elements of the original array */</span>
+<span class="dt">int</span>
+quickselectDestructive(<span class="dt">int</span> k, <span class="dt">int</span> n, <span class="dt">int</span> *a)
+{
+ <span class="dt">int</span> pivot;
+ <span class="dt">int</span> lo;
+
+ assert(<span class="dv">0</span> &lt;= k);
+ assert(k &lt; n);
+
+ <span class="kw">if</span>(n == <span class="dv">1</span>) {
+ <span class="kw">return</span> a[<span class="dv">0</span>];
+ }
+
+ <span class="co">/* else */</span>
+ pivot = a[rand() % n]; <span class="co">/* we will tolerate non-uniformity */</span>
+
+ lo = splitByPivot(n, a, pivot);
+
+ <span class="co">/* lo is now number of values &lt;= pivot */</span>
+ <span class="kw">if</span>(k &lt; lo) {
+ <span class="kw">return</span> quickselectDestructive(k, lo, a);
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> quickselectDestructive(k - lo, n - lo, a + lo);
+ }
+}
+
+<span class="co">/* sort an array in place */</span>
+<span class="dt">void</span>
+quickSort(<span class="dt">int</span> n, <span class="dt">int</span> *a)
+{
+ <span class="dt">int</span> pivot;
+ <span class="dt">int</span> lo;
+
+ <span class="kw">if</span>(n &lt;= <span class="dv">1</span>) {
+ <span class="kw">return</span>;
+ }
+
+ <span class="co">/* else */</span>
+ pivot = a[rand() % n]; <span class="co">/* we will tolerate non-uniformity */</span>
+
+ lo = splitByPivot(n, a, pivot);
+
+ quickSort(lo, a);
+ quickSort(n - lo, a + lo);
+}
+
+
+<span class="co">/* shuffle an array */</span>
+<span class="dt">void</span>
+shuffle(<span class="dt">int</span> n, <span class="dt">int</span> *a)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> r;
+ <span class="dt">int</span> temp;
+
+ <span class="kw">for</span>(i = n - <span class="dv">1</span>; i &gt; <span class="dv">0</span>; i--) {
+ r = rand() % i;
+ temp = a[r];
+ a[r] = a[i];
+ a[i] = temp;
+ }
+}
+
+<span class="ot">#define N (1024)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> a[N];
+ <span class="dt">int</span> i;
+
+ srand(<span class="dv">0</span>); <span class="co">/* use fixed value for debugging */</span>
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) {
+ a[i] = i;
+ }
+
+ shuffle(N, a);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) {
+ assert(quickselectDestructive(i, N, a) == i);
+ }
+
+ shuffle(N, a);
+
+ quickSort(N, a);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; N; i++) {
+ assert(a[i] == i);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/randomization/quick.c" class="uri">examples/randomization/quick.c</a>
+</div>
+<h3 id="randomizedDataStructures"><span class="header-section-number">5.14.3</span> Randomized data structures</h3>
+<p>Suppose we insert <span class="math inline"><em>n</em></span>
+elements into an initially-empty binary search tree in random order with
+ no rebalancing. Then each element is equally likely to be the root, and
+ all the elements less than the root end up in the left subtree, while
+all the elements greater than the root end up in the right subtree,
+where they are further partitioned recursively. This is exactly what
+happens in quicksort, so the structure of the tree will exactly mirror
+the structure of an execution of quicksort. In particular, the average
+depth of a node will be <span class="math inline"><em>O</em>(log<em>n</em>)</span>, giving us the same expected search cost as in a balanced binary tree.</p>
+<p>The problem with this approach is that we don't have any guarantees
+that the input will be supplied in random order, and in the worst case
+we end up with a linked list. The solution is to put the randomization
+into the algorithm itself, making the structure of the tree depend on
+random choices made by the program itself.</p>
+<h4 id="skipLists"><span class="header-section-number">5.14.3.1</span> Skip lists</h4>
+<p>A <strong>skip list</strong> (<a href="ftp://ftp.cs.umd.edu/pub/skipLists/skiplists.pdf">Pugh, 1990</a>)
+ is a randomized tree-like data structure based on linked lists. It
+consists of a level 0 list that is an ordinary sorted linked list,
+together with higher-level lists that contain a random sampling of the
+elements at lower levels. When inserted into the level i list, an
+element flips a coin that tells it with probability p to insert itself
+in the level i+1 list as well.</p>
+<p>Searches in a skip list are done by starting in the highest-level
+list and searching forward for the last element whose key is smaller
+than the target; the search then continues in the same way on the next
+level down. The idea is that the higher-level lists act as express lanes
+ to get us to our target value faster. To bound the expected running
+time of a search, it helps to look at this process backwards; the
+reversed search path starts at level 0 and continues going backwards
+until it reaches the first element that is also in a higher level; it
+then jumps to the next level up and repeats the process. On average, we
+hit <span class="math inline">1 + 1/<em>p</em></span> nodes at each level before jumping back up; for constant <span class="math inline"><em>p</em></span> (e.g. <span class="math inline">1/2</span>), this gives us <span class="math inline"><em>O</em>(log<em>n</em>)</span> steps for the search.</p>
+<p>The space per element of a skip list also depends on <span class="math inline"><em>p</em></span>. Every element has at least one outgoing pointer (on level 0), and on average has exactly <span class="math inline">1/(1 − <em>p</em>)</span> expected pointers. So the space cost can also be adjusted by adjusting <span class="math inline"><em>p</em></span>. For example, if space is at a premium, setting <span class="math inline"><em>p</em> = 1/10</span> produces <span class="math inline">10/9</span> pointers per node on average—not much more than in a linked list—but still gives <span class="math inline"><em>O</em>(log<em>n</em>)</span> search time.</p>
+<p>Below is an implementation of a skip list. To avoid having to
+allocate a separate array of pointers for each element, we put a
+length-1 array at the end of <code class="backtick">struct&nbsp;skiplist</code>
+ and rely on C's lack of bounds checking to make the array longer if
+necessary. A dummy head element stores pointers to all the initial
+elements in each level of the skip list; it is given the dummy key <code class="backtick">INT_MIN</code>
+ so that searches for values less than any in the list will report this
+value. Aside from these nasty tricks, the code for search and insertion
+is pretty straightforward. Code for deletion is a little more involved,
+because we have to make sure that we delete the leftmost copy of a key
+if there are duplicates (an alternative would be to modify <code class="backtick">skiplistInsert</code> to ignore duplicates).</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;limits.h&gt;</span>
+
+<span class="ot">#include "skiplist.h"</span>
+
+<span class="ot">#define MAX_HEIGHT (32)</span>
+
+<span class="kw">struct</span> skiplist {
+ <span class="dt">int</span> key;
+ <span class="dt">int</span> height; <span class="co">/* number of next pointers */</span>
+ <span class="kw">struct</span> skiplist *next[<span class="dv">1</span>]; <span class="co">/* first of many */</span>
+};
+
+<span class="co">/* choose a height according to a geometric distribution */</span>
+<span class="dt">static</span> <span class="dt">int</span>
+chooseHeight(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">1</span>; i &lt; MAX_HEIGHT &amp;&amp; rand() % <span class="dv">2</span> == <span class="dv">0</span>; i++);
+
+ <span class="kw">return</span> i;
+}
+
+<span class="co">/* create a skiplist node with the given key and height */</span>
+<span class="co">/* does not fill in next pointers */</span>
+<span class="dt">static</span> Skiplist
+skiplistCreateNode(<span class="dt">int</span> key, <span class="dt">int</span> height)
+{
+ Skiplist s;
+
+ assert(height &gt; <span class="dv">0</span>);
+ assert(height &lt;= MAX_HEIGHT);
+
+ s = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> skiplist) + <span class="kw">sizeof</span>(<span class="kw">struct</span> skiplist *) * (height - <span class="dv">1</span>));
+
+ assert(s);
+
+ s-&gt;key = key;
+ s-&gt;height = height;
+
+ <span class="kw">return</span> s;
+}
+
+<span class="co">/* create an empty skiplist */</span>
+Skiplist
+skiplistCreate(<span class="dt">void</span>)
+{
+ Skiplist s;
+ <span class="dt">int</span> i;
+
+ <span class="co">/* s is a dummy head element */</span>
+ s = skiplistCreateNode(INT_MIN, MAX_HEIGHT);
+
+ <span class="co">/* this tracks the maximum height of any node */</span>
+ s-&gt;height = <span class="dv">1</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; MAX_HEIGHT; i++) {
+ s-&gt;next[i] = <span class="dv">0</span>;
+ }
+
+ <span class="kw">return</span> s;
+}
+
+<span class="co">/* free a skiplist */</span>
+<span class="dt">void</span>
+skiplistDestroy(Skiplist s)
+{
+ Skiplist next;
+
+ <span class="kw">while</span>(s) {
+ next = s-&gt;next[<span class="dv">0</span>];
+ free(s);
+ s = next;
+ }
+}
+
+<span class="co">/* return maximum key less than or equal to key */</span>
+<span class="co">/* or INT_MIN if there is none */</span>
+<span class="dt">int</span>
+skiplistSearch(Skiplist s, <span class="dt">int</span> key)
+{
+ <span class="dt">int</span> level;
+
+ <span class="kw">for</span>(level = s-&gt;height - <span class="dv">1</span>; level &gt;= <span class="dv">0</span>; level--) {
+ <span class="kw">while</span>(s-&gt;next[level] &amp;&amp; s-&gt;next[level]-&gt;key &lt;= key) {
+ s = s-&gt;next[level];
+ }
+ }
+
+ <span class="kw">return</span> s-&gt;key;
+}
+
+<span class="co">/* insert a new key into s */</span>
+<span class="dt">void</span>
+skiplistInsert(Skiplist s, <span class="dt">int</span> key)
+{
+ <span class="dt">int</span> level;
+ Skiplist elt;
+
+ elt = skiplistCreateNode(key, chooseHeight());
+
+ assert(elt);
+
+ <span class="kw">if</span>(elt-&gt;height &gt; s-&gt;height) {
+ s-&gt;height = elt-&gt;height;
+ }
+
+ <span class="co">/* search through levels taller than elt */</span>
+ <span class="kw">for</span>(level = s-&gt;height - <span class="dv">1</span>; level &gt;= elt-&gt;height; level--) {
+ <span class="kw">while</span>(s-&gt;next[level] &amp;&amp; s-&gt;next[level]-&gt;key &lt; key) {
+ s = s-&gt;next[level];
+ }
+ }
+
+ <span class="co">/* now level is elt-&gt;height - 1, we can start inserting */</span>
+ <span class="kw">for</span>(; level &gt;= <span class="dv">0</span>; level--) {
+ <span class="kw">while</span>(s-&gt;next[level] &amp;&amp; s-&gt;next[level]-&gt;key &lt; key) {
+ s = s-&gt;next[level];
+ }
+
+ <span class="co">/* s is last entry on this level &lt; new element */</span>
+ <span class="co">/* do list insert */</span>
+ elt-&gt;next[level] = s-&gt;next[level];
+ s-&gt;next[level] = elt;
+ }
+}
+
+<span class="co">/* delete a key from s */</span>
+<span class="dt">void</span>
+skiplistDelete(Skiplist s, <span class="dt">int</span> key)
+{
+ <span class="dt">int</span> level;
+ Skiplist target;
+
+ <span class="co">/* first we have to find leftmost instance of key */</span>
+ target = s;
+
+ <span class="kw">for</span>(level = s-&gt;height - <span class="dv">1</span>; level &gt;= <span class="dv">0</span>; level--) {
+ <span class="kw">while</span>(target-&gt;next[level] &amp;&amp; target-&gt;next[level]-&gt;key &lt; key) {
+ target = target-&gt;next[level];
+ }
+ }
+
+ <span class="co">/* take one extra step at bottom */</span>
+ target = target-&gt;next[<span class="dv">0</span>];
+
+ <span class="kw">if</span>(target == <span class="dv">0</span> || target-&gt;key != key) {
+ <span class="kw">return</span>;
+ }
+
+ <span class="co">/* now we found target, splice it out */</span>
+ <span class="kw">for</span>(level = s-&gt;height - <span class="dv">1</span>; level &gt;= <span class="dv">0</span>; level--) {
+ <span class="kw">while</span>(s-&gt;next[level] &amp;&amp; s-&gt;next[level]-&gt;key &lt; key) {
+ s = s-&gt;next[level];
+ }
+
+ <span class="kw">if</span>(s-&gt;next[level] == target) {
+ s-&gt;next[level] = target-&gt;next[level];
+ }
+ }
+
+ free(target);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/skiplist/skiplist.c" class="uri">examples/trees/skiplist/skiplist.c</a>
+</div>
+<p>Here is the header file, Makefile, and test code: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/skiplist/skiplist.h">skiplist.h</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/skiplist/Makefile">Makefile</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/skiplist/test_skiplist.c">test_skiplist.c</a>.</p>
+<h4 id="Universal_hash_families"><span class="header-section-number">5.14.3.2</span> Universal hash families</h4>
+<p>Randomization can also be useful in hash tables. Recall that in
+building a hash table, we are relying on the hash function to spread out
+ bad input distributions over the indices of our array. But for any
+fixed hash function, in the worst case we may get inputs where every key
+ hashes to the same location. <strong>Universal hashing</strong> (<a href="http://dx.doi.org/10.1016%2F0022-0000%2879%2990044-8">Carter and Wegman, 1979</a>)
+ solves this problem by choosing a hash function at random. We may still
+ get unlucky and have the hash function hash all our values to the same
+location, but now we are relying on the random number generator to be
+nice to us instead of the adversary. We can also rehash with a new
+random hash function if we find out that the one we are using is bad.</p>
+<p>The problem here is that we can't just choose a function uniformly at
+ random out of the set of all possible hash functions, because there are
+ too many of them, meaning that we would spend more space representing
+our hash function than we would on the table. The solution is to observe
+ that we don't need our hash function h to be truly random; it's enough
+if the probability of collision Pr[h(x) = h(y)] for any fixed keys <span class="math inline"><em>x</em> ≠ <em>y</em></span> is <span class="math inline">1/<em>m</em></span>, where <span class="math inline"><em>m</em></span>
+ is the size of the hash table. The reason is that the cost of searching
+ for x (with chaining) is linear in the number of keys already in the
+table that collide with it. The expected number of such collisions is
+the sum of Pr[h(x) = h(y)] over all keys y in the table, or n/m if we
+have n keys. So this pairwise collision probability bound is enough to
+get the desired n/m behavior out of our table. A family of hash function
+ with this property is called <strong>universal</strong>.</p>
+<p>How do we get a universal hash family? For strings, we can use a
+table of random values, one for each position and possible character in
+the string. The hash of a string is then the exclusive or of the random
+values <code class="backtick">hashArray[i][s[i]]</code> corresponding to
+ the actual characters in the string. If our table has size a power of
+two, this has the universal property, because if two strings x and y
+differ in some position i, then there is only one possible value of <code class="backtick">hashArray[i][y[i]]</code> (mod m) that will make the hash functions equal.</p>
+<p>Typically to avoid having to build an arbitrarily huge table of
+random values, we only has an initial prefix of the string. Here is a
+hash function based on this idea, which assumes that the <code class="backtick">d</code> data structure includes a <code class="backtick">hashArray</code> field that contains the random values for this particular hash table:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">static</span> <span class="dt">unsigned</span> <span class="dt">long</span>
+hash_function(Dict d, <span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *us;
+ <span class="dt">unsigned</span> <span class="dt">long</span> h;
+ <span class="dt">int</span> i;
+
+ h = <span class="dv">0</span>;
+
+ us = (<span class="dt">unsigned</span> <span class="dt">const</span> <span class="dt">char</span> *) s;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; HASH_PREFIX_LENGTH &amp;&amp; us[i] != '\<span class="dv">0</span>'; i++) {
+ h ^= d-&gt;hashArray[i][us[i]];
+ }
+
+ <span class="kw">return</span> h;
+}</code></pre></div>
+<p>A modified version of the <code class="backtick">Dict</code> hash table from the <a href="#hashTables">chapter on hash tables</a> that uses this hash function is given here: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/universal/dict.c">dict.c</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/universal/dict.h">dict.h</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/universal/test_dict.c">test_dict.c</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/hashTables/universal/Makefile">Makefile</a>.</p>
+<h2 id="stringProcessing"><span class="header-section-number">5.15</span> String processing</h2>
+<p>Most of the time, when we've talked about the asymptotic performance
+of data structures, we have implicitly assumed that the keys we are
+looking up are of constant size. This means that computing a hash
+function or comparing two keys (as in a binary search tree) takes <span class="math inline"><em>O</em>(1)</span> time. What if this is not the case?</p>
+<p>If we consider an <span class="math inline"><em>m</em></span>-character string, any reasonable hash function is going to take <span class="math inline"><em>O</em>(<em>m</em>)</span> time since it has to look at all of the characters. Similarly, comparing two <span class="math inline"><em>m</em></span>-character strings may also take <span class="math inline"><em>O</em>(<em>m</em>)</span> time. If we charge for this (as we should!) then the cost of hash table operations goes from <span class="math inline"><em>O</em>(1)</span> to <span class="math inline"><em>O</em>(<em>m</em>)</span> and the cost of binary search tree operations, even in a balanced tree, goes from <span class="math inline"><em>O</em>(log<em>n</em>)</span> to <span class="math inline"><em>O</em>(<em>m</em>log<em>n</em>)</span>. Even sorting becomes more expensive: a sorting algorithm that does <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span> comparisons may now take <span class="math inline"><em>O</em>(<em>m</em><em>n</em>log<em>n</em>)</span> time. But maybe we can exploit the structure of strings to get better performance.</p>
+<h3 id="radixSearch"><span class="header-section-number">5.15.1</span> Radix search</h3>
+<p><strong>Radix search</strong> refers to a variety of data structures
+that support searching for strings considered as sequences of digits in
+some large base (or <strong>radix</strong>). These are generally faster than simple <a href="#binarySearchTrees">binary search trees</a>
+ because they usually only require examining one byte or less of the
+target string at each level of the tree, as compared to every byte in
+the target in a full string comparison. In many cases, the best radix
+search trees are even faster than <a href="#hashTables">hash tables</a>, because they only need to look at a small part of the target string to identify it.</p>
+<p>We'll describe several radix search trees, starting with the simplest and working up.</p>
+<h4 id="Tries"><span class="header-section-number">5.15.1.1</span> Tries</h4>
+<p>A <strong>trie</strong> is a binary tree (or more generally, a <em>k</em>-ary tree where <em>k</em> is the radix) where the root represents the empty bit sequence and the two children of a node representing sequence <span class="math inline"><em>x</em></span> represent the extended sequences <span class="math inline"><em>x</em>0</span> and <span class="math inline"><em>x</em>1</span> (or generally <span class="math inline"><em>x</em>0, <em>x</em>1, …, <em>x</em>(<em>k</em> − 1)</span>).
+ So a key is not stored at a particular node but is instead represented
+bit-by-bit (or digit-by-digit) along some path. Typically a trie assumes
+ that the set of keys is prefix-free, i.e. that no key is a prefix of
+another; in this case there is a one-to-one correspondence between keys
+and leaves of the trie. If this is not the case, we can mark internal
+nodes that also correspond to the ends of keys, getting a slightly
+different data structure known as a <strong>digital search tree</strong>. For null-terminated strings as in C, the null terminator ensures that any set of strings is prefix-free.</p>
+<p>Given this simple description, a trie storing a single long key would
+ have a very large number of nodes. A standard optimization is to chop
+off any path with no branches in it, so that each leaf corresponds to
+the shortest unique prefix of a key. This requires storing the key in
+the leaf so that we can distinguish different keys with the same prefix.</p>
+<p>The name <em>trie</em> comes from the phrase "information re<em>trie</em>val." Despite the etymology, <em>trie</em> is now almost always pronounced like <em>try</em> instead of <em>tree</em> to avoid confusion with other tree data structures.</p>
+<h5 id="Searching_a_trie"><span class="header-section-number">5.15.1.1.1</span> Searching a trie</h5>
+<p>Searching a trie is similar to searching a binary search tree, except
+ that instead of doing a comparison at each step we just look at the
+next bit in the target. The time to perform a search is proportional to
+the number of bits in the longest path in the tree matching a prefix of
+the target. This can be very fast for search misses if the target is
+wildly different from all the keys in the tree.</p>
+<h5 id="Inserting_a_new_element_into_a_trie"><span class="header-section-number">5.15.1.1.2</span> Inserting a new element into a trie</h5>
+<p>Insertion is more complicated for tries than for binary search trees.
+ The reason is that a new element may add more than one new node. There
+are essentially two cases:</p>
+<ol style="list-style-type: decimal">
+<li>(The simple case.) In searching for the new key, we reach a null
+pointer leaving a non-leaf node. In this case we can simply add a new
+leaf. The cost of this case is essentially the same as for search plus <span class="math inline"><em>O</em>(1)</span> for building the new leaf.</li>
+<li>(The other case.) In searching for the new key, we reach a leaf, but
+ the key stored there isn't the same as the new key. Now we have to
+generate a new path for as long as the old key and the new key have the
+same bits, branching out to two different leaves at the end. The cost of
+ this operation is within a constant factor of the cost for searching
+for the new leaf <em>after</em> it is inserted, since that's how long the newly-built search path will be.</li>
+</ol>
+<p>In either case, the cost is bounded by the length of the new key,
+which is about the best we can hope for in the worst case for any data
+structure.</p>
+<h5 id="trieImplementation"><span class="header-section-number">5.15.1.1.3</span> Implementation</h5>
+<p>A typical trie implementation in C might look like this. It uses a <code class="backtick">GET_BIT</code> macro similar to the one from the <a href="#bitManipulation">chapter on bit manipulation</a>, except that we reverse the bits within each byte to get the right sorting order for keys.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="kw">struct</span> trie_node *Trie;
+
+<span class="ot">#define EMPTY_TRIE (0)</span>
+
+<span class="co">/* returns 1 if trie contains target */</span>
+<span class="dt">int</span> trie_contains(Trie trie, <span class="dt">const</span> <span class="dt">char</span> *target);
+
+<span class="co">/* add a new key to a trie */</span>
+<span class="co">/* and return the new trie */</span>
+Trie trie_insert(Trie trie, <span class="dt">const</span> <span class="dt">char</span> *key);
+
+<span class="co">/* free a trie */</span>
+<span class="dt">void</span> trie_destroy(Trie);
+
+<span class="co">/* debugging utility: print all keys in trie */</span>
+<span class="dt">void</span> trie_print(Trie);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/trie/trie.h" class="uri">examples/trees/trie/trie.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "trie.h"</span>
+
+<span class="ot">#define BITS_PER_BYTE (8)</span>
+
+<span class="co">/* extract the n-th bit of x */</span>
+<span class="co">/* here we process bits within bytes in MSB-first order */</span>
+<span class="co">/* this sorts like strcmp */</span>
+<span class="ot">#define GET_BIT(x, n) ((((x)[(n) / BITS_PER_BYTE]) &amp; (0x1 &lt;&lt; (BITS_PER_BYTE - 1 - (n) % BITS_PER_BYTE))) != 0)</span>
+
+<span class="ot">#define TRIE_BASE (2)</span>
+
+<span class="kw">struct</span> trie_node {
+ <span class="dt">char</span> *key;
+ <span class="kw">struct</span> trie_node *kids[TRIE_BASE];
+};
+
+<span class="ot">#define IsLeaf(t) ((t)-&gt;kids[0] == 0 &amp;&amp; (t)-&gt;kids[1] == 0)</span>
+
+<span class="co">/* returns 1 if trie contains target */</span>
+<span class="dt">int</span>
+trie_contains(Trie trie, <span class="dt">const</span> <span class="dt">char</span> *target)
+{
+ <span class="dt">int</span> bit;
+
+ <span class="kw">for</span>(bit = <span class="dv">0</span>; trie &amp;&amp; !IsLeaf(trie); bit++) {
+ <span class="co">/* keep going */</span>
+ trie = trie-&gt;kids[GET_BIT(target, bit)];
+ }
+
+ <span class="kw">if</span>(trie == <span class="dv">0</span>) {
+ <span class="co">/* we lost */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ <span class="co">/* check that leaf really contains the target */</span>
+ <span class="kw">return</span> !strcmp(trie-&gt;key, target);
+ }
+}
+
+<span class="co">/* gcc -pedantic kills strdup! */</span>
+<span class="dt">static</span> <span class="dt">char</span> *
+my_strdup(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="dt">char</span> *s2;
+
+ s2 = malloc(strlen(s) + <span class="dv">1</span>);
+ assert(s2);
+
+ strcpy(s2, s);
+ <span class="kw">return</span> s2;
+}
+
+
+<span class="co">/* helper functions for insert */</span>
+<span class="dt">static</span> Trie
+make_trie_node(<span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ Trie t;
+ <span class="dt">int</span> i;
+
+ t = malloc(<span class="kw">sizeof</span>(*t));
+ assert(t);
+
+ <span class="kw">if</span>(key) {
+ t-&gt;key = my_strdup(key);
+ assert(t-&gt;key);
+ } <span class="kw">else</span> {
+ t-&gt;key = <span class="dv">0</span>;
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; TRIE_BASE; i++) t-&gt;kids[i] = <span class="dv">0</span>;
+
+ <span class="kw">return</span> t;
+}
+
+<span class="co">/* add a new key to a trie */</span>
+<span class="co">/* and return the new trie */</span>
+Trie
+trie_insert(Trie trie, <span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ <span class="dt">int</span> bit;
+ <span class="dt">int</span> bitvalue;
+ Trie t;
+ Trie kid;
+ <span class="dt">const</span> <span class="dt">char</span> *oldkey;
+
+ <span class="kw">if</span>(trie == <span class="dv">0</span>) {
+ <span class="kw">return</span> make_trie_node(key);
+ }
+ <span class="co">/* else */</span>
+ <span class="co">/* first we'll search for key */</span>
+ <span class="kw">for</span>(t = trie, bit = <span class="dv">0</span>; !IsLeaf(t); bit++, t = kid) {
+ kid = t-&gt;kids[bitvalue = GET_BIT(key, bit)];
+ <span class="kw">if</span>(kid == <span class="dv">0</span>) {
+ <span class="co">/* woohoo! easy case */</span>
+ t-&gt;kids[bitvalue] = make_trie_node(key);
+ <span class="kw">return</span> trie;
+ }
+ }
+
+ <span class="co">/* did we get lucky? */</span>
+ <span class="kw">if</span>(!strcmp(t-&gt;key, key)) {
+ <span class="co">/* nothing to do */</span>
+ <span class="kw">return</span> trie;
+ }
+
+ <span class="co">/* else */</span>
+ <span class="co">/* hard case---have to extend the trie */</span>
+ oldkey = t-&gt;key;
+<span class="ot">#ifdef EXCESSIVE_TIDINESS</span>
+ t-&gt;key = <span class="dv">0</span>; <span class="co">/* not required but makes data structure look tidier */</span>
+<span class="ot">#endif</span>
+
+ <span class="co">/* walk the common prefix */</span>
+ <span class="kw">while</span>(GET_BIT(oldkey, bit) == (bitvalue = GET_BIT(key, bit))) {
+ kid = make_trie_node(<span class="dv">0</span>);
+ t-&gt;kids[bitvalue] = kid;
+ bit++;
+ t = kid;
+ }
+
+ <span class="co">/* then split */</span>
+ t-&gt;kids[bitvalue] = make_trie_node(key);
+ t-&gt;kids[!bitvalue] = make_trie_node(oldkey);
+
+ <span class="kw">return</span> trie;
+}
+
+<span class="co">/* kill it */</span>
+<span class="dt">void</span>
+trie_destroy(Trie trie)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">if</span>(trie) {
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; TRIE_BASE; i++) {
+ trie_destroy(trie-&gt;kids[i]);
+ }
+
+ <span class="kw">if</span>(IsLeaf(trie)) {
+ free(trie-&gt;key);
+ }
+
+ free(trie);
+ }
+}
+
+<span class="dt">static</span> <span class="dt">void</span>
+trie_print_internal(Trie t, <span class="dt">int</span> bit)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> kid;
+
+ <span class="kw">if</span>(t != <span class="dv">0</span>) {
+ <span class="kw">if</span>(IsLeaf(t)) {
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; bit; i++) putchar(' ');
+ puts(t-&gt;key);
+ } <span class="kw">else</span> {
+ <span class="kw">for</span>(kid = <span class="dv">0</span>; kid &lt; TRIE_BASE; kid++) {
+ trie_print_internal(t-&gt;kids[kid], bit<span class="dv">+1</span>);
+ }
+ }
+ }
+}
+
+<span class="dt">void</span>
+trie_print(Trie t)
+{
+ trie_print_internal(t, <span class="dv">0</span>);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/trie/trie.c" class="uri">examples/trees/trie/trie.c</a>
+</div>
+<p>Here is a short test program that demonstrates how to use it:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#include "trie.h"</span>
+
+<span class="co">/* test for trie.c */</span>
+<span class="co">/* reads lines from stdin and echoes lines that haven't appeared before */</span>
+
+<span class="co">/* read a line of text from stdin</span>
+<span class="co"> * and return it (without terminating newline) as a freshly-malloc'd block.</span>
+<span class="co"> * Caller is responsible for freeing this block.</span>
+<span class="co"> * Returns 0 on error or EOF.</span>
+<span class="co"> */</span>
+<span class="dt">char</span> *
+getline(<span class="dt">void</span>)
+{
+ <span class="dt">char</span> *line; <span class="co">/* line buffer */</span>
+ <span class="dt">int</span> n; <span class="co">/* characters read */</span>
+ <span class="dt">int</span> size; <span class="co">/* size of line buffer */</span>
+ <span class="dt">int</span> c;
+
+ size = <span class="dv">1</span>;
+ line = malloc(size);
+ <span class="kw">if</span>(line == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ n = <span class="dv">0</span>;
+
+ <span class="kw">while</span>((c = getchar()) != <span class="ch">'\n'</span> &amp;&amp; c != EOF) {
+ <span class="kw">while</span>(n &gt;= size - <span class="dv">1</span>) {
+ size *= <span class="dv">2</span>;
+ line = realloc(line, size);
+ <span class="kw">if</span>(line == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ line[n++] = c;
+ }
+
+ <span class="kw">if</span>(c == EOF &amp;&amp; n == <span class="dv">0</span>) {
+ <span class="co">/* got nothing */</span>
+ free(line);
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ line[n++] = '\<span class="dv">0</span>';
+ <span class="kw">return</span> line;
+ }
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ Trie t;
+ <span class="dt">char</span> *line;
+
+ t = EMPTY_TRIE;
+
+ <span class="kw">while</span>((line = getline()) != <span class="dv">0</span>) {
+ <span class="kw">if</span>(!trie_contains(t, line)) {
+ puts(line);
+ }
+
+ <span class="co">/* try to insert it either way */</span>
+ <span class="co">/* this tests that insert doesn't blow up on duplicates */</span>
+ t = trie_insert(t, line);
+
+ free(line);
+ }
+
+ puts(<span class="st">"==="</span>);
+
+ trie_print(t);
+
+ trie_destroy(t);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/trie/test_trie.c" class="uri">examples/trees/trie/test_trie.c</a>
+</div>
+<h4 id="Patricia_trees"><span class="header-section-number">5.15.1.2</span> Patricia trees</h4>
+<p>Tries perform well when all keys are short (or are distinguished by
+short prefixes), but can grow very large if one inserts two keys that
+have a long common prefix. The reason is that a trie has to store an
+internal node for every bit of the common prefix until the two keys
+become distinguishable, leading to long chains of internal nodes each of
+ which has only one child. An optimization (described in <a href="http://dl.acm.org/citation.cfm?id=321481">this paper</a>) known as a <strong>Patricia tree</strong> eliminates these long chains by having each node store the number of the bit to branch on, like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">struct</span> patricia_node {
+ <span class="dt">char</span> *key;
+ <span class="dt">int</span> bit;
+ <span class="kw">struct</span> patricia_node *kids[<span class="dv">2</span>];
+};
+
+<span class="kw">typedef</span> <span class="kw">struct</span> patricia_node *Patricia;</code></pre></div>
+<p>Now when searching for a key, instead of using the number of nodes
+visited so far to figure out which bit to look at, we just read the bit
+out of the table. This means in particular that we can skip over any
+bits that we don't actually branch on. We do however have to be more
+careful to make sure we don't run off the end of our target key, since
+it is possible that when skipping over intermediate bits we might skip
+over some that distinguish our target from all keys in the table,
+including longer keys. For example, a Patricia tree storing the strings <code class="backtick">abc</code> and <code class="backtick">abd</code> will first test bit position 22, since that's where <code class="backtick">abc</code> and <code class="backtick">abd</code> differ. This can be trouble if we are looking for <code class="backtick">x</code>.</p>
+<p>Here's the search code:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+patricia_contains(Patricia t, <span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ <span class="dt">int</span> key_bits;
+
+ key_bits = BITS_PER_BYTE * (strlen(key)+<span class="dv">1</span>); <span class="co">/* +1 for the NUL */</span>
+
+ <span class="kw">while</span>(t &amp;&amp; !IsLeaf(t)) {
+ <span class="kw">if</span>(t-&gt;bit &gt;= key_bits) {
+ <span class="co">/* can't be there */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ t = t-&gt;kids[GET_BIT(key, t-&gt;bit)];
+ }
+ }
+
+ <span class="kw">return</span> t &amp;&amp; !strcmp(t-&gt;key, key);
+}</code></pre></div>
+<p>The insertion code is similar in many respects to the insertion code
+for a trie. The differences are that we never construct a long chain of
+internal nodes when splitting a leaf (although we do have to scan
+through both the old and new keys to find the first bit position where
+they differ), but we may sometimes have to add a new internal node
+between two previously existing nodes if a new key branches off at a bit
+ position that was previously skipped over.</p>
+<p>In the worst case Patricia trees are much more efficient than tries,
+in both space (linear in the number of keys instead of linear in the
+total size of the keys) and time complexity, often needing to examine
+only a very small number of bits for misses (hits still require a full
+scan in <code class="backtick">strcmp</code> to verify the correct key).
+ The only downside of Patricia trees is that since they work on bits,
+they are not quite perfectly tuned to the byte or word-oriented
+structure of modern CPUs.</p>
+<h4 id="Ternary_search_trees"><span class="header-section-number">5.15.1.3</span> Ternary search trees</h4>
+<p><strong>Ternary search trees</strong> were described by Jon Bentley and Bob Sedgewick in an article in the April 1988 issue of <em>Dr. Dobb's Journal</em>, available <a href="http://www.drdobbs.com/database/ternary-search-trees/184410528">here</a>.</p>
+<p>The basic idea is that each node in the tree stores one character from the key and three child pointers <code class="backtick">lt</code>, <code class="backtick">eq</code>, and <code class="backtick">gt</code>. If the corresponding character in the target is equal to the character in the node, we move to the <em>next</em> character in the target and follow the <code class="backtick">eq</code> pointer out of the node. If the target is less, follow the <code class="backtick">lt</code> pointer but stay at the <em>same</em> character. If the target is greater, follow the <code class="backtick">gt</code>
+ pointer and again stay at the same character. When searching for a
+string, we walk down the tree until we either reach a node that matches
+the terminating NUL (a hit), or follow a null pointer (a miss).</p>
+<p>A TST acts a bit like a 256-way trie, except that instead of storing
+an array of 256 outgoing pointers, we build something similar to a small
+ binary search tree for the next character. Note that no explicit
+balancing is done within these binary search trees. From a theoretical
+perspective, the worst case is that we get a 256-node deep linked-list
+equivalent at each step, multiplying our search time by <span class="math inline">256 = <em>O</em>(1)</span>. In practice, only those characters that actual appear in some key at this stage will show up, so the <span class="math inline"><em>O</em>(1)</span> is likely to be a small <span class="math inline"><em>O</em>(1)</span>, especially if keys are presented in random order.</p>
+<p>TSTs are one of the fastest known data structures for implementing
+dictionaries using strings as keys, beating both hash tables and tries
+in most cases. They can be slower than Patricia trees if the keys have
+many keys with long matching prefixes; however, a Patricia-like
+optimization can be applied to give a <strong>compressed ternary search tree</strong> that works well even in this case. In practice, regular TSTs are usually good enough.</p>
+<p>Here is a simple implementation of an insert-only TST. The C code
+includes two versions of the insert helper routine; the first is the
+original recursive version and the second is an iterative version
+generated by eliminating the tail recursion from the first.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="kw">struct</span> tst_node *TST;
+
+<span class="ot">#define EMPTY_TST (0)</span>
+
+<span class="co">/* returns 1 if t contains target */</span>
+<span class="dt">int</span> tst_contains(TST t, <span class="dt">const</span> <span class="dt">char</span> *target);
+
+<span class="co">/* add a new key to a TST */</span>
+<span class="co">/* and return the new TST */</span>
+TST tst_insert(TST t, <span class="dt">const</span> <span class="dt">char</span> *key);
+
+<span class="co">/* free a TST */</span>
+<span class="dt">void</span> tst_destroy(TST);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/tst/tst.h" class="uri">examples/trees/tst/tst.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "tst.h"</span>
+
+<span class="kw">struct</span> tst_node {
+ <span class="dt">char</span> key; <span class="co">/* value to split on */</span>
+ <span class="kw">struct</span> tst_node *lt; <span class="co">/* go here if target[index] &lt; value */</span>
+ <span class="kw">struct</span> tst_node *eq; <span class="co">/* go here if target[index] == value */</span>
+ <span class="kw">struct</span> tst_node *gt; <span class="co">/* go here if target[index] &gt; value */</span>
+};
+
+<span class="co">/* returns 1 if t contains key */</span>
+<span class="dt">int</span>
+tst_contains(TST t, <span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ assert(key);
+
+ <span class="kw">while</span>(t) {
+ <span class="kw">if</span>(*key &lt; t-&gt;key) {
+ t = t-&gt;lt;
+ } <span class="kw">else</span> <span class="kw">if</span>(*key &gt; t-&gt;key) {
+ t = t-&gt;gt;
+ } <span class="kw">else</span> <span class="kw">if</span>(*key == '\<span class="dv">0</span>') {
+ <span class="kw">return</span> <span class="dv">1</span>;
+ } <span class="kw">else</span> {
+ t = t-&gt;eq;
+ key++;
+ }
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+
+<span class="co">/* original recursive insert */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+tst_insert_recursive(TST *t, <span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ <span class="kw">if</span>(*t == <span class="dv">0</span>) {
+ *t = malloc(<span class="kw">sizeof</span>(**t));
+ assert(*t);
+ (*t)-&gt;key = *key;
+ (*t)-&gt;lt = (*t)-&gt;eq = (*t)-&gt;gt = <span class="dv">0</span>;
+ }
+
+ <span class="co">/* now follow search */</span>
+ <span class="kw">if</span>(*key &lt; (*t)-&gt;key) {
+ tst_insert_recursive(&amp;(*t)-&gt;lt, key);
+ } <span class="kw">else</span> <span class="kw">if</span>(*key &gt; (*t)-&gt;key) {
+ tst_insert_recursive(&amp;(*t)-&gt;gt, key);
+ } <span class="kw">else</span> <span class="kw">if</span>(*key == '\<span class="dv">0</span>') {
+ <span class="co">/* do nothing, we are done */</span>
+ ;
+ } <span class="kw">else</span> {
+ tst_insert_recursive(&amp;(*t)-&gt;eq, key<span class="dv">+1</span>);
+ }
+}
+
+<span class="co">/* iterative version of above, since somebody asked */</span>
+<span class="co">/* This is pretty much standard tail-recursion elimination: */</span>
+<span class="co">/* The whole function gets wrapped in a loop, and recursive</span>
+<span class="co"> * calls get replaced by assignment */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+tst_insert_iterative(TST *t, <span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ <span class="kw">for</span>(;;) {
+ <span class="kw">if</span>(*t == <span class="dv">0</span>) {
+ *t = malloc(<span class="kw">sizeof</span>(**t));
+ assert(*t);
+ (*t)-&gt;key = *key;
+ (*t)-&gt;lt = (*t)-&gt;eq = (*t)-&gt;gt = <span class="dv">0</span>;
+ }
+
+ <span class="co">/* now follow search */</span>
+ <span class="kw">if</span>(*key &lt; (*t)-&gt;key) {
+ t = &amp;(*t)-&gt;lt;
+ } <span class="kw">else</span> <span class="kw">if</span>(*key &gt; (*t)-&gt;key) {
+ t = &amp;(*t)-&gt;gt;
+ } <span class="kw">else</span> <span class="kw">if</span>(*key == '\<span class="dv">0</span>') {
+ <span class="co">/* do nothing, we are done */</span>
+ <span class="kw">return</span>;
+ } <span class="kw">else</span> {
+ t = &amp;(*t)-&gt;eq;
+ key++;
+ }
+ }
+}
+
+
+<span class="co">/* add a new key to a TST */</span>
+<span class="co">/* and return the new TST */</span>
+TST
+tst_insert(TST t, <span class="dt">const</span> <span class="dt">char</span> *key)
+{
+ assert(key);
+
+<span class="ot">#ifdef USE_RECURSIVE_INSERT</span>
+ tst_insert_recursive(&amp;t, key);
+<span class="ot">#else</span>
+ tst_insert_iterative(&amp;t, key);
+<span class="ot">#endif</span>
+ <span class="kw">return</span> t;
+}
+
+<span class="co">/* free a TST */</span>
+<span class="dt">void</span>
+tst_destroy(TST t)
+{
+ <span class="kw">if</span>(t) {
+ tst_destroy(t-&gt;lt);
+ tst_destroy(t-&gt;eq);
+ tst_destroy(t-&gt;gt);
+ free(t);
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/tst/tst.c" class="uri">examples/trees/tst/tst.c</a>
+</div>
+<p>And here is some test code, almost identical to the test code for tries: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/trees/tst/test_tst.c">test_tst.c</a>.</p>
+<p>The <em>Dr. Dobb's</em> article contains additional code for doing deletions and partial matches, plus some optimizations for inserts.</p>
+<h4 id="treesMoreInformation"><span class="header-section-number">5.15.1.4</span> More information</h4>
+<ul>
+<li><a href="http://imej.wfu.edu/articles/2002/2/02/index.asp" class="uri">http://imej.wfu.edu/articles/2002/2/02/index.asp</a> has some good Java-based animations of radix tries, Patricia tries, and other tree-like data structures.</li>
+</ul>
+<h3 id="radixSort"><span class="header-section-number">5.15.2</span> Radix sort</h3>
+<p>The standard <code class="backtick">quicksort</code> routine is an example of a <strong>comparison-based sorting algorithm</strong>. This means that the only information the algorithm uses about the items it is sorting is the return value of the <code class="backtick">compare</code>
+ routine. This has a rather nice advantage of making the algorithm very
+general, but has the disadvantage that the algorithm can extract only
+one bit of information from every call to <code class="backtick">compare</code>. Since there are <span class="math inline"><em>n</em>!</span> possible ways to reorder the input sequence, this means we need at least <span class="math inline">log(<em>n</em>!) = <em>O</em>(<em>n</em>log<em>n</em>)</span> calls to <code class="backtick">compare</code> to finish the sort. If we are sorting something like strings, this can get particularly expensive, because calls to <code class="backtick">strcmp</code> may take time linear in the length of the strings being compared. In the worst case, sorting <span class="math inline"><em>n</em></span> strings of length <span class="math inline"><em>m</em></span> each could take <span class="math inline"><em>O</em>(<em>n</em><em>m</em>log<em>n</em>)</span> time.</p>
+<h4 id="Bucket_sort"><span class="header-section-number">5.15.2.1</span> Bucket sort</h4>
+<p>The core idea of radix sort is that if we want to sort values from a
+small range, we can do it by making one bucket for each possible value
+and throw any object with that value into the corresponding bucket. In
+the old days, when <a href="http://en.wikipedia.org/wiki/Solitaire_%28Windows%29" title="WikiPedia">Solitaire</a>
+ was a game played with physical pieces of cardboard, a player who
+suspected that that one of these "cards" had fallen under the couch
+might sort the deck by dividing it up into Spades, Hearts, Diamonds, and
+ Club piles and then sorting each pile recursively. The same trick works
+ in a computer, but there the buckets are typically implemented as an
+array of some convenient data structure.</p>
+<p>If the number of possible values is too big, we may still be able to
+use bucket sort digit-by-digit. The resulting algorithms are known
+generally as <strong>radix sort</strong>. These are a class of
+algorithms designed for sorting strings in lexicographic order—the order
+ used by dictionaries where one string is greater than another if the
+first character on which they differ is greater. One particular variant,
+ <strong>most-significant-byte radix sort</strong> or MSB radix sort,
+has the beautiful property that its running time is not only linear in
+the size of the input in bytes, but is also linear in the smallest
+number of characters in the input that need to be examined to determine
+the correct order. This algorithm is so fast that it takes not much more
+ time to sort data than it does to read the data from memory and write
+it back. But it's a little trickier to explain that the original <strong>least-significant-byte radix sort</strong> or LSB radix sort.</p>
+<h4 id="Classic_LSB_radix_sort"><span class="header-section-number">5.15.2.2</span> Classic LSB radix sort</h4>
+<p>This is the variant used for punch cards, and works well for
+fixed-length strings. The idea is to sort on the least significant
+position first, then work backwards to the most significant position.
+This works as long as each sort is <em>stable</em>, meaning that it doesn't reorder values with equal keys. For example, suppose we are sorting the strings:</p>
+<pre><code>sat
+bat
+bad</code></pre>
+<p>The first pass sorts on the third column, giving:</p>
+<pre><code>bad
+sat
+bat</code></pre>
+<p>The second pass sorts on the second column, producing no change in
+the order (all the characters are the same). The last pass sorts on the
+first column. This moves the <code class="backtick">s</code> after the two <code class="backtick">b</code>s, but preserves the order of the two words starting with <code class="backtick">b</code>. The result is:</p>
+<pre><code>bad
+bat
+sat</code></pre>
+<p>There are three downsides to LSB radix sort:</p>
+<ol style="list-style-type: decimal">
+<li>All the strings have to be the same length (this is not necessarily a problem if they are really fixed-width data types like <code class="backtick">int</code>s).</li>
+<li>The algorithm used to sort each position must be stable, which may require more effort than most programmers would like to take.</li>
+<li>It may be that the late positions in the strings don't affect the order, but we have to sort on them anyway. If we are sorting <code class="backtick">aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</code> and <code class="backtick">baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa</code>, we spend a lot of time matching up <code class="backtick">a</code>s against each other.</li>
+</ol>
+<h4 id="MSB_radix_sort"><span class="header-section-number">5.15.2.3</span> MSB radix sort</h4>
+<p>For these reasons, MSB radix sort is used more often. This is basically the radix sort version of <a href="#quicksort">quicksort</a>,
+ where instead of partitioning our input data into two piles based on
+whether each element is less than or greater than a random pivot, we
+partition the input into 256 piles, one for each initial byte. We can
+then sort each pile recursively using the same algorithm, taking
+advantage of the fact that we know that the first byte (or later, the
+first k bytes) are equal and so we only need to look at the next one.
+The recursion stops when we get down to 1 value, or in practice where we
+ get down to a small enough number of values that the cost of doing a
+different sorting algorithm becomes lower than the cost of creating and
+tearing down the data structures for managing the piles.</p>
+<h5 id="Issues_with_recursion_depth"><span class="header-section-number">5.15.2.3.1</span> Issues with recursion depth</h5>
+<p>The depth of recursion for MSB radix sort is equal to the length of
+the second-longest string in the worst case. Since strings can be pretty
+ long, this creates a danger of blowing out the stack. The solution (as
+in <a href="#quicksort">quicksort</a>) is to use tail recursion for the
+largest pile. Now any pile we recurse into with an actual procedure call
+ is at most half the size of the original pile, so we get stack depth at
+ most <span class="math inline"><em>O</em>(log<em>n</em>)</span>.</p>
+<h5 id="Implementing_the_buckets"><span class="header-section-number">5.15.2.3.2</span> Implementing the buckets</h5>
+<p>There is a trick we can do analagous to the Dutch flag algorithm
+where we sort the array in place. The idea is that we first count the
+number of elements that land in each bucket and assign a block of the
+array for each bucket, keeping track in each block of an initial prefix
+of values that belong in the bucket with the rest not yet processed. We
+then walk through the buckets swapping out any elements at the top of
+the good prefix to the bucket they are supposed to be in. This procedure
+ puts at least one element in the right bucket for each swap, so we
+reorder everything correctly in at most <span class="math inline"><em>n</em></span> swaps and <span class="math inline"><em>O</em>(<em>n</em>)</span> additional work.</p>
+<p>To keep track of each bucket, we use two pointers <code class="backtick">bucket[i]</code> for the first element of the bucket and <code class="backtick">top[i]</code>
+ for the first unused element. We could make these be integer array
+indices, but this slows the code down by about 10%. This seems to be a
+situation where our use of pointers is complicated enough that the
+compiler can't optimize out the array lookups.</p>
+<h5 id="Further_optimization"><span class="header-section-number">5.15.2.3.3</span> Further optimization</h5>
+<p>Since we are detecting the heaviest bucket anyway, there is a
+straightforward optimization that speeds the sort up noticeably on
+inputs with a lot of duplicates: if everything would land in the same
+bucket, we can skip the bucket-sort and just go directly to the next
+character.</p>
+<h5 id="radixSortImplementation"><span class="header-section-number">5.15.2.3.4</span> Sample implementation</h5>
+<p>Here is an implementation of MSB radix sort using the ideas above:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;limits.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+
+<span class="ot">#include "radixSort.h"</span>
+
+<span class="co">/* in-place MSB radix sort for null-terminated strings */</span>
+
+<span class="co">/* helper routine for swapping */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+swapStrings(<span class="dt">const</span> <span class="dt">char</span> **a, <span class="dt">const</span> <span class="dt">char</span> **b)
+{
+ <span class="dt">const</span> <span class="dt">char</span> *temp;
+
+ temp = *a;
+ *a = *b;
+ *b = temp;
+}
+
+<span class="co">/* this is the internal routine that assumes all strings are equal for the</span>
+<span class="co"> * first k characters */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+radixSortInternal(<span class="dt">int</span> n, <span class="dt">const</span> <span class="dt">char</span> **a, <span class="dt">int</span> k)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> count[UCHAR_MAX<span class="dv">+1</span>]; <span class="co">/* number of strings with given character in position k */</span>
+ <span class="dt">int</span> mode; <span class="co">/* most common position-k character */</span>
+ <span class="dt">const</span> <span class="dt">char</span> **bucket[UCHAR_MAX<span class="dv">+1</span>]; <span class="co">/* position of character block in output */</span>
+ <span class="dt">const</span> <span class="dt">char</span> **top[UCHAR_MAX<span class="dv">+1</span>]; <span class="co">/* first unused index in this character block */</span>
+
+ <span class="co">/* loop implements tail recursion on most common character */</span>
+ <span class="kw">while</span>(n &gt; <span class="dv">1</span>) {
+
+ <span class="co">/* count occurrences of each character */</span>
+ memset(count, <span class="dv">0</span>, <span class="kw">sizeof</span>(<span class="dt">int</span>)*(UCHAR_MAX<span class="dv">+1</span>));
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ count[(<span class="dt">unsigned</span> <span class="dt">char</span>) a[i][k]]++;
+ }
+
+ <span class="co">/* find the most common nonzero character */</span>
+ <span class="co">/* we will handle this specially */</span>
+ mode = <span class="dv">1</span>;
+ <span class="kw">for</span>(i = <span class="dv">2</span>; i &lt; UCHAR_MAX<span class="dv">+1</span>; i++) {
+ <span class="kw">if</span>(count[i] &gt; count[mode]) {
+ mode = i;
+ }
+ }
+
+ <span class="kw">if</span>(count[mode] &lt; n) {
+
+ <span class="co">/* generate bucket and top fields */</span>
+ bucket[<span class="dv">0</span>] = top[<span class="dv">0</span>] = a;
+ <span class="kw">for</span>(i = <span class="dv">1</span>; i &lt; UCHAR_MAX<span class="dv">+1</span>; i++) {
+ top[i] = bucket[i] = bucket[i<span class="dv">-1</span>] + count[i<span class="dv">-1</span>];
+ }
+
+ <span class="co">/* reorder elements by k-th character */</span>
+ <span class="co">/* this is similar to dutch flag algorithm */</span>
+ <span class="co">/* we start at bottom character and swap values out until everything is in place */</span>
+ <span class="co">/* invariant is that for all i, bucket[i] &lt;= j &lt; top[i] implies a[j][k] == i */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; UCHAR_MAX<span class="dv">+1</span>; i++) {
+ <span class="kw">while</span>(top[i] &lt; bucket[i] + count[i]) {
+ <span class="kw">if</span>((<span class="dt">unsigned</span> <span class="dt">char</span>) top[i][<span class="dv">0</span>][k] == i) {
+ <span class="co">/* leave it in place, advance bucket */</span>
+ top[i]++;
+ } <span class="kw">else</span> {
+ <span class="co">/* swap with top of appropriate block */</span>
+ swapStrings(top[i], top[(<span class="dt">unsigned</span> <span class="dt">char</span>) top[i][<span class="dv">0</span>][k]]++);
+ }
+ }
+ }
+
+ <span class="co">/* we have now reordered everything */</span>
+ <span class="co">/* recurse on all but 0 and mode */</span>
+ <span class="kw">for</span>(i = <span class="dv">1</span>; i &lt; UCHAR_MAX<span class="dv">+1</span>; i++) {
+ <span class="kw">if</span>(i != mode) {
+ radixSortInternal(count[i], bucket[i], k<span class="dv">+1</span>);
+ }
+ }
+
+ <span class="co">/* tail recurse on mode */</span>
+ n = count[mode];
+ a = bucket[mode];
+ k = k<span class="dv">+1</span>;
+
+ } <span class="kw">else</span> {
+
+ <span class="co">/* tail recurse on whole pile */</span>
+ k = k<span class="dv">+1</span>;
+ }
+ }
+}
+
+<span class="dt">void</span>
+radixSort(<span class="dt">int</span> n, <span class="dt">const</span> <span class="dt">char</span> **a)
+{
+ radixSortInternal(n, a, <span class="dv">0</span>);
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sorting/radixSort/radixSort.c" class="uri">examples/sorting/radixSort/radixSort.c</a>
+</div>
+<p>Some additional files: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sorting/radixSort/radixSort.h">radixSort.h</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sorting/radixSort/test_radixSort.c">test_radixSort.c</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sorting/radixSort/Makefile">Makefile</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/sorting/radixSort/sortInput.c">sortInput.c</a>. The last is a program that sorts lines on <code class="backtick">stdin</code> and writes the result to <code class="backtick">stdout</code>, similar to the GNU <code class="backtick">sort</code> utility. When compiled with <code class="backtick">-O3</code> and run on my machine, this runs in about the same time as the standard <code class="backtick">sort</code> program when run on a 4.7 million line input file consisting of a random shuffle of 20 copies of <code class="backtick">/usr/share/dict/words</code>, provided <code class="backtick">sort</code> is run with <code class="backtick">LANG=C&nbsp;sort&nbsp;&lt;&nbsp;/usr/share/dict/words</code> to keep it from having to deal with locale-specific collating issues. On other inputs, <code class="backtick">sort</code> is faster. This is not bad given how thoroughly <code class="backtick">sort</code> has been optimized, but is a sign that further optimization is possible.</p>
+<h1 id="other-topics-not-covered-in-detail-in-2015"><span class="header-section-number">6</span> Other topics not covered in detail in 2015</h1>
+<p>These are mostly leftovers from previous versions of the class where different topics were emphasized.</p>
+<h2 id="more-applications-of-function-pointers"><span class="header-section-number">6.1</span> More applications of function pointers</h2>
+<p>Here we show how to implement various mechanisms often found in more
+sophisticated programming languages in C using function pointers.</p>
+<h3 id="iterators"><span class="header-section-number">6.1.1</span> Iterators</h3>
+<p>Suppose we have an abstract data type that represents some sort of
+container, such as a list or dictionary. We'd like to be able to do
+something to every element of the container; say, count them up. How can
+ we write operations on the abstract data type to allow this, without
+exposing the implementation?</p>
+<p>To make the problem more concrete, let's suppose we have an abstract
+data type that represents the set of all non-negative numbers less than
+some fixed bound. The core of its interface might look like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Abstract data type representing the set of numbers from 0 to</span>
+<span class="co"> * bound-1 inclusive, where bound is passed in as an argument at creation.</span>
+<span class="co"> */</span>
+<span class="kw">typedef</span> <span class="kw">struct</span> nums *Nums;
+
+<span class="co">/* Create a Nums object with given bound. */</span>
+Nums nums_create(<span class="dt">int</span> bound);
+
+<span class="co">/* Destructor */</span>
+<span class="dt">void</span> nums_destroy(Nums);
+
+<span class="co">/* Returns 1 if nums contains element, 0 otherwise */</span>
+<span class="dt">int</span> nums_contains(Nums nums, <span class="dt">int</span> element);</code></pre></div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include "nums.h"</span>
+
+<span class="kw">struct</span> nums {
+ <span class="dt">int</span> bound;
+};
+
+Nums nums_create(<span class="dt">int</span> bound)
+{
+ <span class="kw">struct</span> nums *n;
+ n = malloc(<span class="kw">sizeof</span>(*n));
+ n-&gt;bound = bound;
+ <span class="kw">return</span> n;
+}
+
+<span class="dt">void</span> nums_destroy(Nums n) { free(n); }
+
+<span class="dt">int</span> nums_contains(Nums n, <span class="dt">int</span> element)
+{
+ <span class="kw">return</span> element &gt;= <span class="dv">0</span> &amp;&amp; element &lt; n-&gt;bound;
+}</code></pre></div>
+<p>From the outside, a <code>Nums</code> acts like the set of numbers from <code>0</code> to <code>bound&nbsp;-&nbsp;1</code>; <code>nums_contains</code> will insist that it contains any <code>int</code> that is in this set and contains no <code>int</code> that is not in this set.</p>
+<p>Let's suppose now that we want to loop over all elements of some <code>Nums</code>, say to add them together. In particular, we'd like to implement the following pseudocode, where <code>nums</code> is some <code>Nums</code> instance:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c">sum = <span class="dv">0</span>;
+<span class="kw">for</span>(each i in nums) {
+ sum += i;
+}</code></pre></div>
+<p>One way to do this would be to build the loop into some operation in <code>nums.c</code>, including its body. But we'd like to be able to substitute any body for the <code>sum&nbsp;+=&nbsp;i</code> line. Since we can't see the inside of a <code>Nums</code>, we need to have some additional operation or operations on a <code>Nums</code> that lets us write the loop. How can we do this?</p>
+<h4 id="Option_1:_Function_that_returns_a_sequence"><span class="header-section-number">6.1.1.1</span> Option 1: Function that returns a sequence</h4>
+<p>A data-driven approach might be to add a <code>nums_contents</code>
+function that returns a sequence of all elements of some instance,
+perhaps in the form of an array or linked list. The advantage of this
+approach is that once you have the sequence, you don't need to worry
+about changes to (or destruction of) the original object. The
+disadvantage is that you have to deal with storage management issues,
+and have to pay the costs in time and space of allocating and filling in
+ the sequence. This can be particularly onerous for a "virtual"
+container like <code>Nums</code>, since we could conceivably have a <code>Nums</code> instance with billions of elements.</p>
+<p>Bearing these facts in mind, let's see what this approach might look like. We'll define a new function <code>nums_contents</code> that returns an array of <code>int</code>s, terminated by a <code>-1</code> sentinel:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span> *
+nums_contents(Nums n)
+{
+ <span class="dt">int</span> *a;
+ <span class="dt">int</span> i;
+ a = malloc(<span class="kw">sizeof</span>(*a) * (n-&gt;bound + <span class="dv">1</span>));
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n-&gt;bound; i++) a[i] = i;
+ a[n-&gt;bound] = -<span class="dv">1</span>;
+ <span class="kw">return</span> a;
+}</code></pre></div>
+<p>We might use it like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> sum = <span class="dv">0</span>;
+ contents = nums_contents(nums);
+ <span class="kw">for</span>(p = contents; *p != -<span class="dv">1</span>; p++) {
+ sum += *p;
+ }
+ free(contents);</code></pre></div>
+<p>Despite the naturalness of the approach, returning a sequence in this case leads to the <em>most</em> code complexity of the options we will examine.</p>
+<h4 id="Option_2:_Iterator_with_first.2Fdone.2Fnext_operations"><span class="header-section-number">6.1.1.2</span> Option 2: Iterator with first/done/next operations</h4>
+<p>If we don't want to look at all the elements at once, but just want to process them one at a time, we can build an <em>iterator</em>.
+ An iterator is an object that allows you to step through the contents
+of another object, by providing convenient operations for getting the
+first element, testing when you are done, and getting the next element
+if you are not. In C, we try to design iterators to have operations that
+ fit well in the top of a <code>for</code> loop.</p>
+<p>For the <code>Nums</code> type, we'll make each <code>Nums</code> its own iterator. The new operations are given here:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span> nums_first(Nums n) { <span class="kw">return</span> <span class="dv">0</span>; }
+<span class="dt">int</span> nums_done(Nums n, <span class="dt">int</span> val) { <span class="kw">return</span> val &gt;= n-&gt;bound; }
+<span class="dt">int</span> nums_next(Nums n, <span class="dt">int</span> val) { <span class="kw">return</span> val<span class="dv">+1</span>; }</code></pre></div>
+<p>And we use them like this:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> sum = <span class="dv">0</span>;
+ <span class="kw">for</span>(i = nums_first(nums); !nums_done(nums, i); i = nums_next(nums, i)) {
+ sum += i;
+ }</code></pre></div>
+<p>Not only do we completely avoid the overhead of building a sequence,
+we also get much cleaner code. It helps in this case that all we need to
+ find the next value is the previous one; for a more complicated problem
+ we might have to create and destroy a separate iterator object that
+holds the state of the loop. But for many tasks in C, the
+first/done/next idiom is a pretty good one.</p>
+<h4 id="Option_3:_Iterator_with_function_argument"><span class="header-section-number">6.1.1.3</span> Option 3: Iterator with function argument</h4>
+<p>Suppose we have a very complicated iteration, say one that might
+require several nested loops or even a recursion to span all the
+elements. In this case it might be very difficult to provide
+first/done/next operations, because it would be hard to encode the state
+ of the iteration so that we could easily pick up in the next operation
+where we previously left off. What we'd really like to do is to be able
+to plug arbitrary code into the innermost loop of our horrible iteration
+ procedure, and do it in a way that is reasonably typesafe and doesn't
+violate our abstraction barrier. This is a job for function pointers,
+and an example of the <em>functional programming style</em> in action.</p>
+<p>We'll define a <code>nums_foreach</code> function that takes a function as an argument:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span> nums_foreach(Nums n, <span class="dt">void</span> (*f)(<span class="dt">int</span>, <span class="dt">void</span> *), <span class="dt">void</span> *f_data)
+{
+ <span class="dt">int</span> i;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n-&gt;bound; i++) f(i, f_data);
+}</code></pre></div>
+<p>The <code>f_data</code> argument is used to pass extra state into the passed-in function <code>f</code>; it's a <code>void&nbsp;*</code> because we want to let <code>f</code> work on any sort of extra state it likes.</p>
+<p>Now to do our summation, we first define an extra function <code>sum_helper</code>, which adds each element to an accumulator pointed to by <code>f_data</code>:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">static</span> <span class="dt">void</span> sum_helper(<span class="dt">int</span> i, <span class="dt">void</span> *f_data)
+{
+ *((<span class="dt">int</span> *) f_data) += i;
+}</code></pre></div>
+<p>We then feed <code>sum_helper</code> to the <code>nums_foreach</code> function:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> sum = <span class="dv">0</span>;
+ nums_foreach(nums, sum_helper, (<span class="dt">void</span> *) &amp;sum);</code></pre></div>
+<p>There is a bit of a nuisance in having to define the auxiliary <code>sum_helper</code> function and in all the casts to and from <code>void</code>,
+ but on the whole the complexity of this solution is not substantially
+greater than the first/done/next approach. Which you should do depends
+on whether it's harder to encapsulate the state of the iterator (in
+which case the functional approach is preferable) or of the loop body
+(in which case the first/done/next approach is preferable), and whether
+you need to bail out of the loop early (which would require special
+support from the <code>foreach</code> procedure, perhaps checking a
+return value from the function). However, it's almost always
+straightforward to encapsulate the state of a loop body; just build a <code>struct</code> containing all the variables that it uses, and pass a pointer to this struct as <code>f_data</code>.</p>
+<h3 id="closures"><span class="header-section-number">6.1.2</span> Closures</h3>
+<p>A <strong>closure</strong> is a function plus some associated state. A simple way to implement closures in C is to use a <code class="backtick">static</code>
+ local variable, but then you only get one. Better is to allocate the
+state somewhere and pass it around with the function. For example,
+here's a simple functional implementation of infinite sequences:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* a sequence is an object that returns a new value each time it is called */</span>
+<span class="kw">struct</span> sequence {
+ <span class="dt">int</span> (*next)(<span class="dt">void</span> *data);
+ <span class="dt">void</span> *data;
+};
+
+<span class="kw">typedef</span> <span class="kw">struct</span> sequence *Sequence;
+
+Sequence
+create_sequence(<span class="dt">int</span> (*next)(<span class="dt">void</span> *data), <span class="dt">void</span> *data)
+{
+ Sequence s;
+
+ s = malloc(<span class="kw">sizeof</span>(*s));
+ assert(s);
+
+ s-&gt;next = next;
+ s-&gt;data = data;
+
+ <span class="kw">return</span> s;
+}
+
+<span class="dt">int</span>
+sequence_next(Sequence s)
+{
+ <span class="kw">return</span> s-&gt;next(s-&gt;data);
+}</code></pre></div>
+<p>And here are some examples of sequences:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* make a constant sequence that always returns x */</span>
+<span class="dt">static</span> <span class="dt">int</span>
+constant_sequence_next(<span class="dt">void</span> *data)
+{
+ <span class="kw">return</span> *((<span class="dt">int</span> *) data);
+}
+
+Sequence
+constant_sequence(<span class="dt">int</span> x)
+{
+ <span class="dt">int</span> *data;
+
+ data = malloc(<span class="kw">sizeof</span>(*data));
+ <span class="kw">if</span>(data == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ *data = x;
+
+ <span class="kw">return</span> create_sequence(constant_sequence_next, data);
+}
+
+<span class="co">/* make a sequence x, x+a, x+2*a, x+3*a, ... */</span>
+<span class="kw">struct</span> arithmetic_sequence_data {
+ <span class="dt">int</span> cur;
+ <span class="dt">int</span> step;
+};
+
+<span class="dt">static</span> <span class="dt">int</span>
+arithmetic_sequence_next(<span class="dt">void</span> *data)
+{
+ <span class="kw">struct</span> arithmetic_sequence_data *d;
+
+ d = data;
+ d-&gt;cur += d-&gt;step;
+
+ <span class="kw">return</span> d-&gt;cur;
+}
+
+Sequence
+arithmetic_sequence(<span class="dt">int</span> x, <span class="dt">int</span> a)
+{
+ <span class="kw">struct</span> arithmetic_sequence_data *d;
+
+ d = malloc(<span class="kw">sizeof</span>(*d));
+ <span class="kw">if</span>(d == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ d-&gt;cur = x - a; <span class="co">/* back up so first value returned is x */</span>
+ d-&gt;step = a;
+
+ <span class="kw">return</span> create_sequence(arithmetic_sequence_next, d);
+}
+
+<span class="co">/* Return the sum of two sequences */</span>
+<span class="dt">static</span> <span class="dt">int</span>
+add_sequences_next(<span class="dt">void</span> *data)
+{
+ Sequence *s;
+
+ s = data;
+ <span class="kw">return</span> sequence_next(s[<span class="dv">0</span>]) + sequence_next(s[<span class="dv">1</span>]);
+}
+
+Sequence
+add_sequences(Sequence s0, Sequence s1)
+{
+ Sequence *s;
+
+ s = malloc(<span class="dv">2</span>*<span class="kw">sizeof</span>(*s));
+ <span class="kw">if</span>(s == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ s[<span class="dv">0</span>] = s0;
+ s[<span class="dv">1</span>] = s1;
+
+ <span class="kw">return</span> create_sequence(add_sequences_next, s);
+}
+
+<span class="co">/* Return the sequence x, f(x), f(f(x)), ... */</span>
+<span class="kw">struct</span> iterated_function_sequence_data {
+ <span class="dt">int</span> x;
+ <span class="dt">int</span> (*f)(<span class="dt">int</span>);
+}
+
+<span class="dt">static</span> <span class="dt">int</span>
+iterated_function_sequence_next(<span class="dt">void</span> *data)
+{
+ <span class="kw">struct</span> iterated_function_sequence_data *d;
+ <span class="dt">int</span> retval;
+
+ d = data;
+
+ retval = d-&gt;x;
+ d-&gt;x = d-&gt;f(d-&gt;x);
+
+ <span class="kw">return</span> retval;
+}
+
+Sequence
+iterated_function_sequence(<span class="dt">int</span> (*f)(<span class="dt">int</span>), <span class="dt">int</span> x0)
+{
+ <span class="kw">struct</span> iterated_function_sequence_data *d;
+
+ d = malloc(<span class="kw">sizeof</span>(*d));
+ <span class="kw">if</span>(d == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ d-&gt;x = x0;
+ d-&gt;f = f;
+
+ <span class="kw">return</span> create_sequence(iterated_function_sequence_next, d);
+}</code></pre></div>
+<p>Note that we haven't worried about how to free the <code class="backtick">data</code> field inside a <code class="backtick">Sequence</code>,
+ and indeed it's not obvious that we can write a generic data-freeing
+routine since we don't know what structure it has. The solution is to
+add more function pointers to a <code class="backtick">Sequence</code>,
+so that we can get the next value, get the sequence to destroy itself,
+etc. When we do so, we have gone beyond building a closure to building
+an <strong>object</strong>.</p>
+<h3 id="Objects"><span class="header-section-number">6.1.3</span> Objects</h3>
+<p>Here's an example of a hierarchy of counter objects. Each counter object has (at least) three operations: <code class="backtick">reset</code>, <code class="backtick">next</code>, and <code class="backtick">destroy</code>. To call the <code class="backtick">next</code> operation on counter <code class="backtick">c</code> we include <code class="backtick">c</code> and the first argument, e.g. <code class="backtick">c-&gt;next(c)</code> (one could write a wrapper to enforce this).</p>
+<p>The main trick is that we define a basic counter structure and then
+extend it to include additional data, using lots of pointer conversions
+to make everything work.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* use preprocessor to avoid rewriting these */</span>
+<span class="ot">#define COUNTER_FIELDS \</span>
+<span class="ot"> void (*reset)(struct counter *); \</span>
+<span class="ot"> int (*next)(struct counter *); \</span>
+<span class="ot"> void (*destroy)(struct counter *);</span>
+
+<span class="kw">struct</span> counter {
+ COUNTER_FIELDS
+};
+
+<span class="kw">typedef</span> <span class="kw">struct</span> counter *Counter;
+
+<span class="co">/* minimal counter--always returns zero */</span>
+<span class="co">/* we don't even allocate this, just have one global one */</span>
+<span class="dt">static</span> <span class="dt">void</span> noop(Counter c) { ; }
+<span class="dt">static</span> <span class="dt">int</span> return_zero(Counter c) { <span class="kw">return</span> <span class="dv">0</span>; }
+<span class="dt">static</span> <span class="kw">struct</span> counter Zero_counter = { noop, return_zero, noop };
+
+Counter
+make_zero_counter(<span class="dt">void</span>)
+{
+ <span class="kw">return</span> &amp;Zero_counter;
+}
+
+<span class="co">/* a fancier counter that iterates a function sequence */</span>
+<span class="co">/* this struct is not exported anywhere */</span>
+<span class="kw">struct</span> ifs_counter {
+
+ <span class="co">/* copied from struct counter declaration */</span>
+ COUNTER_FIELDS
+
+ <span class="co">/* new fields */</span>
+ <span class="dt">int</span> init;
+ <span class="dt">int</span> cur;
+ <span class="dt">int</span> (*f)(<span class="dt">int</span>); <span class="co">/* update rule */</span>
+};
+
+<span class="dt">static</span> <span class="dt">void</span>
+ifs_reset(Counter c)
+{
+ <span class="kw">struct</span> ifs_counter *ic;
+
+ ic = (<span class="kw">struct</span> ifs_counter *) c;
+
+ ic-&gt;cur = ic-&gt;init;
+}
+
+<span class="dt">static</span> <span class="dt">void</span>
+ifs_next(Counter c)
+{
+ <span class="kw">struct</span> ifs_counter *ic;
+ <span class="dt">int</span> ret;
+
+ ic = (<span class="kw">struct</span> ifs_counter *) c;
+
+ ret = ic-&gt;cur;
+ ic-&gt;cur = ic-&gt;f(ic-&gt;cur);
+
+ <span class="kw">return</span> ret;
+}
+
+Counter
+make_ifs_counter(<span class="dt">int</span> init, <span class="dt">int</span> (*f)(<span class="dt">int</span>))
+{
+ <span class="kw">struct</span> ifs_counter *ic;
+
+ ic = malloc(<span class="kw">sizeof</span>(*ic));
+
+ ic-&gt;reset = ifs_reset;
+ ic-&gt;next = ifs_next;
+ ic-&gt;destroy = (<span class="dt">void</span> (*)(<span class="kw">struct</span> counter *)) free;
+
+ ic-&gt;init = init;
+ ic-&gt;cur = init;
+ ic-&gt;f = f;
+
+ <span class="co">/* it's always a Counter on the outside */</span>
+ <span class="kw">return</span> (Counter) ic;
+}</code></pre></div>
+<p>A typical use might be</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">static</span> <span class="dt">int</span>
+times2(<span class="dt">int</span> x)
+{
+ <span class="kw">return</span> x*<span class="dv">2</span>;
+}
+
+<span class="dt">void</span>
+print_powers_of_2(<span class="dt">void</span>)
+{
+ <span class="dt">int</span> i;
+ Counter c;
+
+ c = make_ifs_counter(<span class="dv">1</span>, times2);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">10</span>; i++) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, c-&gt;next(c));
+ }
+
+ c-&gt;reset(c);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; <span class="dv">20</span>; i++) {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, c-&gt;next(c));
+ }
+
+ c-&gt;destroy(c);
+}</code></pre></div>
+<h2 id="suffixArrays"><span class="header-section-number">6.2</span> Suffix arrays</h2>
+<p>These are notes on practical implementations of suffix arrays, which
+are a data structure for searching quickly for substrings of a given
+large string.</p>
+<h3 id="Why_do_we_want_to_do_this.3F"><span class="header-section-number">6.2.1</span> Why do we want to do this?</h3>
+<ul>
+<li>Answer from the old days: Fast string searching is the key to
+dealing with mountains of information. Why, a modern (c. 1960)
+electronic computer can search the equivalent of hundreds of pages of
+text in just a few hours...</li>
+<li>More recent answer:
+<ul>
+<li>We still need to search enormous corpuses of text (see <a href="http://www.google.com/" class="uri">http://www.google.com</a>).</li>
+<li>Algorithms for finding long repeated substrings or patterns can be useful for <a href="http://en.wikipedia.org/wiki/Data_compression" title="WikiPedia">data compression</a>) or detecting plagiarism.</li>
+<li>Finding all occurrence of a particular substring in some huge corpus is the basis of <a href="http://en.wikipedia.org/wiki/Statistical_machine_translation" title="WikiPedia">statistical machine translation</a>.</li>
+<li>We are made out of strings over a particular finite alphabet GATC. String search is a central tool in computational biology.</li>
+</ul></li>
+</ul>
+<h3 id="String_search_algorithms"><span class="header-section-number">6.2.2</span> String search algorithms</h3>
+<p>Without preprocessing, searching an <span class="math inline"><em>n</em></span>-character string for an <span class="math inline"><em>m</em></span>-character substring can be done using algorithms of varying degrees of sophistication, the worst of which run in time <span class="math inline"><em>O</em>(<em>n</em><em>m</em>)</span> (run <code class="backtick">strncmp</code> on each position in the big string), and best of which run in time <span class="math inline"><em>O</em>(<em>n</em> + <em>m</em>)</span> (run the <a href="http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm" title="WikiPedia">Boyer-Moore string search algorithm</a>).
+ But we are interested in the case where we can preprocess our big
+string into a data structure that will let us do lots of searches for
+cheap.</p>
+<h3 id="Suffix_trees_and_suffix_arrays"><span class="header-section-number">6.2.3</span> Suffix trees and suffix arrays</h3>
+<p><strong>Suffix trees</strong> and <strong>suffix arrays</strong> are
+data structures for representing texts that allow substring queries like
+ "where does this pattern appear in the text" or "how many times does
+this pattern occur in the text" to be answered quickly. Both work by
+storing all suffixes of a text, where a <em>suffix</em> is a substring that runs to the end of the text. Of course, storing actual copies of all suffixes of an <span class="math inline"><em>n</em></span>-character text would take <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span> space, so instead each suffix is represented by a pointer to its first character.</p>
+<p>A suffix array stores all the suffixes sorted in dictionary order. For example, the suffix array of the string <code class="backtick">abracadabra</code>
+ is shown below. The actual contents of the array are the indices in the
+ left-hand column; the right-hand shows the corresponding suffixes.</p>
+<pre><code>11 \0
+10 a\0
+ 7 abra\0
+ 0 abracadabra\0
+ 3 acadabra\0
+ 5 adabra\0
+ 8 bra\0
+ 1 bracadabra\0
+ 4 cadabra\0
+ 6 dabra\0
+ 9 ra\0
+ 2 racadabra\0</code></pre>
+<p>A suffix tree is similar, but instead using an array, we use some
+sort of tree data structure to hold the sorted list. A common choice
+given an alphabet of some fixed size <span class="math inline"><em>k</em></span> is a <a href="#radixSearch">trie</a>, in which each node at depth <span class="math inline"><em>d</em></span> represents a string of length <span class="math inline"><em>d</em></span>, and its up to <span class="math inline"><em>k</em></span> children represent all <span class="math inline">(<em>d</em> + 1)</span>-character extensions of the string. The advantage of using a suffix trie is that searching for a string of length <span class="math inline"><em>m</em></span> takes <span class="math inline"><em>O</em>(<em>m</em>)</span> time, since we can just walk down the trie at the rate of one node per character in <span class="math inline"><em>m</em></span>. A further optimization is to replace any long chain of single-child nodes with a <em>compressed</em>
+ edge labeled with the concatenation all the characters in the chain.
+Such compressed suffix tries can not only be searched in linear time but
+ can also be constructed in linear time with a sufficiently clever
+algorithm (we won't actually do this here). Of course, we could also use
+ a simple balanced binary tree, which might be preferable if the
+alphabet is large.</p>
+<p>The disadvantage of suffix trees over suffix arrays is that they
+generally require more space to store all the internal pointers in the
+tree. If we are indexing a huge text (or collection of texts), this
+extra space may be too expensive.</p>
+<h4 id="Building_a_suffix_array"><span class="header-section-number">6.2.3.1</span> Building a suffix array</h4>
+<p>A straightforward approach to building a suffix array is to run any
+decent comparison-based sorting algorithm on the set of suffixes
+(represented by pointers into the text). This will take <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span> comparisons, but in the worst case each comparison will take <span class="math inline"><em>O</em>(<em>n</em>)</span> time, for a total of <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>log<em>n</em>)</span> time. This is the approach used in the sample code below.</p>
+<p>The original suffix array paper by Manber and Myers ("Suffix arrays: a
+ new method for on-line string searches," SIAM Journal on Computing
+22(5):935-948, 1993) gives an <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span>
+ algorithm, somewhat resembling radix sort, for building suffix arrays
+in place with only a small amount of additional space. They also note
+that for random text, simple radix sorting works well, since most
+suffixes become distinguishable after about <span class="math inline">log<sub><em>k</em></sub><em>n</em></span> characters (where <span class="math inline"><em>k</em></span> is the size of the alphabet); this gives a cost of <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span>
+ to do the sort, since radix sort only looks at the bytes it needs to
+once. For a comparison-based sort, the same assumption gives an <span class="math inline"><em>O</em>(<em>n</em>log<sup>2</sup><em>n</em>)</span> running time; this is a factor of <span class="math inline">log<em>n</em></span> slower, but this may be acceptable if programmer time is more important.</p>
+<p>The fastest approach is to build a suffix tree in <span class="math inline"><em>O</em>(<em>n</em>)</span>
+ time and extract the suffix array by traversing the tree. The only
+complication is that we need the extra space to build the tree, although
+ we get it back when we throw the tree away.</p>
+<h4 id="Searching_a_suffix_array"><span class="header-section-number">6.2.3.2</span> Searching a suffix array</h4>
+<p>Suppose we have a suffix array corresponding to an <span class="math inline"><em>n</em></span>-character text and we want to find all occurrences in the text of an <span class="math inline"><em>m</em></span>-character
+ pattern. Since the suffixes are ordered, the easiest solution is to do
+binary search for the first and last occurrences of the pattern (if any)
+ using <span class="math inline"><em>O</em>(log<em>n</em>)</span>
+comparisons. (The code below does something even lazier than this,
+searching for some match and then scanning linearly for the first and
+last matches.) Unfortunately, each comparison may take as much as <span class="math inline"><em>O</em>(<em>m</em>)</span> time, since we may have to check all <span class="math inline"><em>m</em></span> characters of the pattern. So the total cost will be <span class="math inline"><em>O</em>(<em>m</em>log<em>n</em>)</span> in the worst case.</p>
+<p>By storing additional information about the longest common prefix of
+consecutive suffixes, it is possible to avoid having to re-examine every
+ character in the pattern for every comparison, reducing the search cost
+ to <span class="math inline"><em>O</em>(<em>m</em> + log<em>n</em>)</span>.
+ With a sufficiently clever algorithm, this information can be computed
+in linear time, and can also be used to solve quickly such problems as
+finding the longest duplicate substrings, or most frequently occurring
+strings. For more details, see (Gusfield, Dan. <em>Algorithms on Strings, Trees, and Sequences: Computer Science and Computational Biology</em>. Cambridge University Press, 1997, §7.14.4]).</p>
+<p>Using binary search on the suffix array, most searching tasks are now easy:</p>
+<ul>
+<li>Finding if a substring appears in the array uses binary search directly.</li>
+<li>Finding all occurrences requires two binary searches, one for the
+first occurrence and one for the last. If we only want to count the
+occurrences and not return their positions, this takes <span class="math inline"><em>O</em>(<em>m</em> + log<em>n</em>)</span> time. If we want to return their positions, it takes <span class="math inline"><em>O</em>(<em>m</em> + log<em>n</em> + <em>k</em>)</span> time, where <span class="math inline"><em>k</em></span> is the number of times the pattern occurs.</li>
+<li>Finding duplicate substrings of length <span class="math inline"><em>m</em></span> or more can be done by looking for adjacent entries in the array with long common prefixes, which takes <span class="math inline"><em>O</em>(<em>m</em><em>n</em>)</span> time in the worst case if done naively (and <span class="math inline"><em>O</em>(<em>n</em>)</span> time if we have already computed longest common prefix information.</li>
+</ul>
+<h2 id="Burrows-Wheeler_transform"><span class="header-section-number">6.3</span> Burrows-Wheeler transform</h2>
+<p>Closely related to suffix arrays is the <strong>Burrows-Wheeler transform</strong> (Burrows and Wheeler, <em>A Block-Sorting Lossless Data Compression Algorithm</em>,
+ DEC Systems Research Center Technical Report number 124, 1994), which
+is the basis for some of the best currently known algorithms for text
+compression (it's the technique that is used, for example, in <a href="http://www.bzip.org/">bzip2</a>).</p>
+<p>The idea of the Burrows-Wheeler Transform is to construct an array
+whose rows are all cyclic shifts of the input string in dictionary
+order, and return the last column of the array. The last column will
+tend to have long runs of identical characters, since whenever some
+substring (like <code class="backtick">the</code> appears repeatedly in the input, shifts that put the first character <code class="backtick">t</code> in the last column will put the rest of the substring <code class="backtick">he</code>
+ in the first columns, and the resulting rows will tend to be sorted
+together. The relative regularity of the last column means that it will
+compress well with even very simple compression algorithms like
+run-length encoding.</p>
+<p>Below is an example of the Burrows-Wheeler transform in action, with <code class="backtick">$</code> marking end-of-text. The transformed value of <code class="backtick">abracadabra$</code> is <code class="backtick">$drcraaaabba</code>, the last column of the sorted array; note the long run of a's (and the shorter run of b's).</p>
+<pre><code>abracadabra$ abracadabra$
+bracadabra$a abra$abracad
+racadabra$ab acadabra$abr
+acadabra$abr adabra$abrac
+cadabra$abra a$abracadabr
+adabra$abrac bracadabra$a
+dabra$abraca --&gt; bra$abracada
+abra$abracad cadabra$abra
+bra$abracada dabra$abraca
+ra$abracadab racadabra$ab
+a$abracadabr ra$abracadab
+$abracadabra $abracadabra</code></pre>
+<p>The most useful property of the Burrows-Wheeler transform is that it
+can be inverted; this distinguishes it from other transforms that
+produce long runs like simply sorting the characters. We'll describe two
+ ways to do this; the first is less efficient, but more easily grasped,
+and involves rebuilding the array one column at a time, starting at the
+left. Observe that the leftmost column is just all the characters in the
+ string in sorted order; we can recover it by sorting the rightmost
+column, which we have to start off with. If we paste the rightmost and
+leftmost columns together, we have the list of all 2-character
+substrings of the original text; sorting this list gives the first <em>two</em>
+ columns of the array. (Remember that each copy of the string wraps
+around from the right to the left.) We can then paste the rightmost
+column at the beginning of these two columns, sort the result, and get
+the first three columns. Repeating this process eventually reconstructs
+the entire array, from which we can read off the original string from
+any row. The initial stages of this process for <code class="backtick">abracadabra$</code> are shown below:</p>
+<pre><code>$ a $a ab $ab abr
+d a da ab dab abr
+r a ra ac rac aca
+c a ca ad cad ada
+r a ra a$ ra$ a$a
+a b ab br abr bra
+a -&gt; b ab -&gt; br abr -&gt; bra
+a c ac ca aca cad
+a d ad da ada dab
+b r br ra bra rac
+b r br ra bra ra$
+a $ a$ $a a$a $ab</code></pre>
+<p>Rebuilding the entire array in this fashion takes <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span> time and <span class="math inline"><em>O</em>(<em>n</em><sup>2</sup>)</span>
+ space. In their paper, Burrows and Wheeler showed that one can in fact
+reconstruct the original string from just the first and last columns in
+the array in <span class="math inline"><em>O</em>(<em>n</em>)</span> time.</p>
+<p>Here's the idea: Suppose that all the characters were distinct. Then
+after reconstructing the first column we would know all pairs of
+adjacent characters. So we could just start with the last character <code class="backtick">$</code>
+ and regenerate the string by appending at each step the unique
+successor to the last character so far. If all characters were distinct,
+ we would never get confused about which character comes next.</p>
+<p>The problem is what to do with pairs with duplicate first characters, like <code class="backtick">ab</code> and <code class="backtick">ac</code> in the example above. We can imagine that each <code class="backtick">a</code> in the last column is labeled in some unique way, so that we can talk about the first <code class="backtick">a</code> or the third <code class="backtick">a</code>, but how do we know which <code class="backtick">a</code> is the one that comes before <code class="backtick">b</code> or <code class="backtick">d</code>?</p>
+<p>The trick is to look closely at how the original sort works. Look at
+the rows in the original transformation. If we look at all rows that
+start with <code class="backtick">a</code>, the order they are sorted in is determined by the suffix after <code class="backtick">a</code>. These suffixes also appear as the prefixes of the rows that <em>end</em> with <code class="backtick">a</code>, since the rows that end with <code class="backtick">a</code> are just the rows that start with <code class="backtick">a</code> rotated one position. It follows that <em>all instances of the same letter occur in the same order in the first and last columns</em>. So if we use a stable sort to construct the first column, we will correctly match up instances of letters.</p>
+<p>This method is shown in action below. Each letter is annotated
+uniquely with a count of how many identical letters equal or precede it.
+ Sorting recovers the first column, and combining the last and first
+columns gives a list of unique pairs of adjacent annotated characters.
+Now start with <code class="backtick">$1</code> and construct the full sequence <code class="backtick">$1&nbsp;a1&nbsp;b1&nbsp;r1&nbsp;a3&nbsp;c1&nbsp;a4&nbsp;d1&nbsp;a2&nbsp;b2&nbsp;r2&nbsp;a5&nbsp;$1</code>. The original string is obtained by removing the end-of-string markers and annotations: <code class="backtick">abracadabra</code>.</p>
+<pre><code>$1 a1
+d1 a2
+r1 a3
+c1 a4
+r2 a5
+a1 b1
+a2 --&gt; b2
+a3 c1
+a4 d1
+b1 r1
+b2 r2
+a5 $1</code></pre>
+<p>Because we are only sorting single characters, we can perform the
+sort in linear time using counting sort. Extracting the original string
+also takes linear time if implemented reasonably.</p>
+<h3 id="Suffix_arrays_and_the_Burrows-Wheeler_transform"><span class="header-section-number">6.3.1</span> Suffix arrays and the Burrows-Wheeler transform</h3>
+<p>A useful property of the Burrows-Wheeler transform is that each row
+of the sorted array is essentially the same as the corresponding row in
+the suffix array, except for the rotated string prefix after the <code class="backtick">$</code>
+ marker. This means, among other things, that we can compute the
+Burrows-Wheeler transform in linear time using suffix trees. Ferragina
+and Manzini (<a href="http://people.unipmn.it/%7Emanzini/papers/focs00draft.pdf" class="uri">http://people.unipmn.it/~manzini/papers/focs00draft.pdf</a>)
+ have further exploited this correspondence (and some very clever
+additional ideas) to design compressed suffix arrays that compress and
+index a text at the same time, so that pattern searches can be done
+directly on the compressed text in time close to that needed for suffix
+array searches.</p>
+<h3 id="sample-implementation"><span class="header-section-number">6.3.2</span> Sample implementation</h3>
+<p>As mentioned above, this is a pretty lazy implementation of suffix
+arrays, that doesn't include many of the optimizations that would be
+necessary to deal with huge source texts.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* we expose this so user can iterate through it */</span>
+
+<span class="kw">struct</span> suffixArray {
+ size_t n; <span class="co">/* length of string INCLUDING final null */</span>
+ <span class="dt">const</span> <span class="dt">char</span> *string; <span class="co">/* original string */</span>
+ <span class="dt">const</span> <span class="dt">char</span> **suffix; <span class="co">/* suffix array of length n */</span>
+};
+
+<span class="kw">typedef</span> <span class="kw">struct</span> suffixArray *SuffixArray;
+
+<span class="co">/* construct a suffix array */</span>
+<span class="co">/* it is a bad idea to modify string before destroying this */</span>
+SuffixArray suffixArrayCreate(<span class="dt">const</span> <span class="dt">char</span> *string);
+
+<span class="co">/* destructor */</span>
+<span class="dt">void</span> suffixArrayDestroy(SuffixArray);
+
+<span class="co">/* return number of occurrences of substring */</span>
+<span class="co">/* if non-null, index of first occurrence is place in first */</span>
+size_t
+suffixArraySearch(SuffixArray, <span class="dt">const</span> <span class="dt">char</span> *substring, size_t *first);
+
+<span class="co">/* return the Burrows-Wheeler transform of the underlying string </span>
+<span class="co"> * as malloc'd data of length sa-&gt;n */</span>
+<span class="co">/* note that this may have a null in the middle somewhere */</span>
+<span class="dt">char</span> *suffixArrayBWT(SuffixArray sa);
+
+<span class="co">/* invert BWT of null-terminated string, returning a malloc'd copy of original */</span>
+<span class="dt">char</span> *inverseBWT(size_t len, <span class="dt">const</span> <span class="dt">char</span> *s);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/suffixArray/suffixArray.h" class="uri">examples/suffixArray/suffixArray.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+<span class="ot">#include &lt;limits.h&gt;</span>
+
+<span class="ot">#include "suffixArray.h"</span>
+
+<span class="dt">static</span> <span class="dt">int</span>
+saCompare(<span class="dt">const</span> <span class="dt">void</span> *s1, <span class="dt">const</span> <span class="dt">void</span> *s2)
+{
+ <span class="kw">return</span> strcmp(*((<span class="dt">const</span> <span class="dt">char</span> **) s1), *((<span class="dt">const</span> <span class="dt">char</span> **) s2));
+}
+
+SuffixArray
+suffixArrayCreate(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ size_t i;
+ SuffixArray sa;
+
+ sa = malloc(<span class="kw">sizeof</span>(*sa));
+ assert(sa);
+
+ sa-&gt;n = strlen(s) + <span class="dv">1</span>;
+ sa-&gt;string = s;
+
+ sa-&gt;suffix = malloc(<span class="kw">sizeof</span>(*sa-&gt;suffix) * sa-&gt;n);
+ assert(sa-&gt;suffix);
+
+ <span class="co">/* construct array of pointers to suffixes */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; sa-&gt;n; i++) {
+ sa-&gt;suffix[i] = s+i;
+ }
+
+ <span class="co">/* this could be a lot more efficient */</span>
+ qsort(sa-&gt;suffix, sa-&gt;n, <span class="kw">sizeof</span>(*sa-&gt;suffix), saCompare);
+
+ <span class="kw">return</span> sa;
+}
+
+<span class="dt">void</span>
+suffixArrayDestroy(SuffixArray sa)
+{
+ free(sa-&gt;suffix);
+ free(sa);
+}
+
+size_t
+suffixArraySearch(SuffixArray sa, <span class="dt">const</span> <span class="dt">char</span> *substring, size_t *first)
+{
+ size_t lo;
+ size_t hi;
+ size_t mid;
+ size_t len;
+ <span class="dt">int</span> cmp;
+
+ len = strlen(substring);
+
+ <span class="co">/* invariant: suffix[lo] &lt;= substring &lt; suffix[hi] */</span>
+ lo = <span class="dv">0</span>;
+ hi = sa-&gt;n;
+
+ <span class="kw">while</span>(lo + <span class="dv">1</span> &lt; hi) {
+ mid = (lo+hi)/<span class="dv">2</span>;
+ cmp = strncmp(sa-&gt;suffix[mid], substring, len);
+
+ <span class="kw">if</span>(cmp == <span class="dv">0</span>) {
+ <span class="co">/* we have a winner */</span>
+ <span class="co">/* search backwards and forwards for first and last */</span>
+ <span class="kw">for</span>(lo = mid; lo &gt; <span class="dv">0</span> &amp;&amp; strncmp(sa-&gt;suffix[lo<span class="dv">-1</span>], substring, len) == <span class="dv">0</span>; lo--);
+ <span class="kw">for</span>(hi = mid; hi &lt; sa-&gt;n &amp;&amp; strncmp(sa-&gt;suffix[hi<span class="dv">+1</span>], substring, len) == <span class="dv">0</span>; hi++);
+
+ <span class="kw">if</span>(first) {
+ *first = lo;
+ }
+
+ <span class="kw">return</span> hi - lo + <span class="dv">1</span>;
+ } <span class="kw">else</span> <span class="kw">if</span>(cmp &lt; <span class="dv">0</span>) {
+ lo = mid;
+ } <span class="kw">else</span> {
+ hi = mid;
+ }
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+
+<span class="dt">char</span> *
+suffixArrayBWT(SuffixArray sa)
+{
+ <span class="dt">char</span> *bwt;
+ size_t i;
+
+ bwt = malloc(sa-&gt;n);
+ assert(bwt);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; sa-&gt;n; i++) {
+ <span class="kw">if</span>(sa-&gt;suffix[i] == sa-&gt;string) {
+ <span class="co">/* wraps around to nul */</span>
+ bwt[i] = '\<span class="dv">0</span>';
+ } <span class="kw">else</span> {
+ bwt[i] = sa-&gt;suffix[i][-<span class="dv">1</span>];
+ }
+ }
+
+ <span class="kw">return</span> bwt;
+}
+
+<span class="dt">char</span> *
+inverseBWT(size_t len, <span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="co">/* basic trick: stable sort of s gives successor indices */</span>
+ <span class="co">/* then we just thread through starting from the nul */</span>
+
+ size_t *successor;
+ <span class="dt">int</span> c;
+ size_t count[UCHAR_MAX<span class="dv">+1</span>];
+ size_t offset[UCHAR_MAX<span class="dv">+1</span>];
+ size_t i;
+ <span class="dt">char</span> *ret;
+ size_t thread;
+
+ successor = malloc(<span class="kw">sizeof</span>(*successor) * len);
+ assert(successor);
+
+ <span class="co">/* counting sort */</span>
+ <span class="kw">for</span>(c = <span class="dv">0</span>; c &lt;= UCHAR_MAX; c++) {
+ count[c] = <span class="dv">0</span>;
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; len; i++) {
+ count[(<span class="dt">unsigned</span> <span class="dt">char</span>) s[i]]++;
+ }
+
+ offset[<span class="dv">0</span>] = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(c = <span class="dv">1</span>; c &lt;= UCHAR_MAX; c++) {
+ offset[c] = offset[c<span class="dv">-1</span>] + count[c<span class="dv">-1</span>];
+ }
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; len; i++) {
+ successor[offset[(<span class="dt">unsigned</span> <span class="dt">char</span>) s[i]]++] = i;
+ }
+
+ <span class="co">/* find the nul */</span>
+ <span class="kw">for</span>(thread = <span class="dv">0</span>; s[thread]; thread++);
+
+ <span class="co">/* thread the result */</span>
+ ret = malloc(len);
+ assert(ret);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>, thread = successor[thread]; i &lt; len; i++, thread = successor[thread]) {
+ ret[i] = s[thread];
+ }
+
+ <span class="kw">return</span> ret;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/suffixArray/suffixArray.c" class="uri">examples/suffixArray/suffixArray.c</a>
+</div>
+<p>Here is a Makefile and test code: <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/suffixArray/Makefile">Makefile</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/suffixArray/testSuffixArray.c">testSuffixArray.c</a>.</p>
+<p>The output of <code class="backtick">make&nbsp;test</code> shows all
+occurrences of a target string, the Burrows-Wheeler transform of the
+source string (second-to-last line), and its inversion (last line, which
+ is just the original string):</p>
+<pre><code>$ make test
+/bin/echo -n abracadabra-abracadabra-shmabracadabra | ./testSuffixArray abra
+Count: 6
+abra
+abra-abr
+abra-shm
+abracada
+abracada
+abracada
+aaarrrdddm\x00-rrrcccaaaaaaaaaaaashbbbbbb-
+abracadabra-abracadabra-shmabracadabra</code></pre>
+<h2 id="cplusplus"><span class="header-section-number">6.4</span> C++</h2>
+<p>Here we will describe some basic features of C++ that are useful for
+implementing abstract data types. Like all programming languages, C++
+comes with an ideology, which in this case emphasizes object-oriented
+features like inheritance. We will be ignoring this ideology and
+treating C++ as an improved version of C.</p>
+<p>The goal here is not to teach you all of C++, which would take a
+while, but instead to give you some hints for why you might want to
+learn C++ on your own. If you decide to learn C++ for real, Bjarne
+Stroustrup's <em>The C++ Programming Language</em> is the definitive source. A classic tutorial <a href="http://www.4p8.com/eric.brasseur/cppcen.html">here</a> aimed at C programmers introduces C++ features one at a time (some of these features have since migrated into C). The web site <a href="http://www.cplusplus.com/" class="uri">http://www.cplusplus.com</a> has extensive tutorials and documentation.</p>
+<h3 id="Hello_world"><span class="header-section-number">6.4.1</span> Hello world</h3>
+<p>The C++ version of "hello world" looks like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ std::cout &lt;&lt; <span class="st">"hi</span><span class="ch">\n</span><span class="st">"</span>;
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/helloworld.cpp" class="uri">examples/c++/helloworld.cpp</a>
+</div>
+<p>Compile this using <code class="backtick">g++</code> instead of <code class="backtick">gcc</code>. Make shows how it is done:</p>
+<pre><code>$ make helloworld
+g++ helloworld.cpp -o helloworld</code></pre>
+<p>Or we could use an explicit <code class="backtick">Makefile</code>:</p>
+<pre><code>CPP=g++
+CPPFLAGS=-g3 -Wall
+
+helloworld: helloworld.o
+ $(CPP) $(CPPFLAGS) -o $@ $^</code></pre>
+<p>Now the compilation looks like this:</p>
+<pre><code>$ make helloworld
+g++ -g3 -Wall -c -o helloworld.o helloworld.cpp
+g++ -g3 -Wall -o helloworld helloworld.o</code></pre>
+<p>The main difference from the C version:</p>
+<ol style="list-style-type: decimal">
+<li><code class="backtick">#include&nbsp;&lt;stdio.h&gt;</code> is replaced by <code class="backtick">#include&nbsp;&lt;iostream&gt;</code>, which gets the C++ version of the <code class="backtick">stdio</code> library.</li>
+<li><code class="backtick">printf("hi\n")</code> is replaced by <code class="backtick">std::cout&nbsp;&lt;&lt;&nbsp;"hi\n"</code>. The stream <code class="backtick">std::cout</code> is the C++ wrapper for <code class="backtick">stdout</code>; you should read this variable name as <code class="backtick">cout</code> in the <code class="backtick">std</code> namespace. The <code class="backtick">&lt;&lt;</code>
+ operator is overloaded for streams so that it sends its right argument
+out on its left argument (see the discussion of operator overloading
+below). You can also do things like <code class="backtick">std::cout&nbsp;&lt;&lt;&nbsp;37</code>, <code class="backtick">std::cout&nbsp;&lt;&lt;&nbsp;'q'</code>, <code class="backtick">std::cout&nbsp;&lt;&lt;&nbsp;4.7</code>, etc. These all do pretty much what you expect.</li>
+</ol>
+<p>If you don't like typing <code class="backtick">std::</code> before all the built-in functions and variables, you can put <code class="backtick">using&nbsp;namespace&nbsp;std</code> somewhere early in your program, like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+
+<span class="kw">using</span> <span class="kw">namespace</span> std;
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ cout &lt;&lt; <span class="st">"hi</span><span class="ch">\n</span><span class="st">"</span>;
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/helloworld_using.cpp" class="uri">examples/c++/helloworld_using.cpp</a>
+</div>
+<h3 id="References"><span class="header-section-number">6.4.2</span> References</h3>
+<p>Recall that in C we sometime pass objects into function by reference instead of by value, by using a pointer:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span> increment(<span class="dt">int</span> *x)
+{
+ (*x)++;
+}</code></pre></div>
+<p>This becomes even more useful in C++, since many of the objects we
+are dealing with are quite large, and can defend themselves against
+dangerous modifications by restricting access to their components. So
+C++ provides a special syntax allowing function parameters to be
+declared as call-by-reference rather than call-by-value. The function
+above could be rewritten in C++ as</p>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="dt">void</span> increment(<span class="dt">int</span> &amp;x)
+{
+ x++;
+}</code></pre></div>
+<p>The <code class="backtick">int&nbsp;&amp;x</code> declaration says that <code class="backtick">x</code> is a <strong>reference</strong> to whatever variable is passed as the argument to <code class="backtick">increment</code>. A reference acts exactly like a pointer that has already had <code class="backtick">*</code> applied to it. You can even write <code class="backtick">&amp;x</code> to get a pointer to the original variable if you want to for some reason.</p>
+<p>As with pointers, it's polite to mark a reference with <code class="backtick">const</code> if you don't intend to modify the original object:</p>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="dt">void</span> reportWeight(<span class="dt">const</span> SumoWrestler &amp;huge)
+{
+ cout &lt;&lt; huge.getWeight();
+}</code></pre></div>
+<p>References are also used as a return type to chain operators together; in the expression</p>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"> cout &lt;&lt; <span class="st">"hi"</span> &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;</code></pre></div>
+<p>the return type of the first <code class="backtick">&lt;&lt;</code> operator is an <code class="backtick">ostream&nbsp;&amp;</code> reference (as is <code class="backtick">cout</code>); this means that the <code class="backtick">'\n'</code> gets sent to the same object. We could make the return value be just an <code class="backtick">ostream</code>, but then <code class="backtick">cout</code>
+ would be copied, which could be expensive and would mean that the copy
+was no longer working on the same internal state as the original. This
+same trick is used when overloading the assignment operator.</p>
+<h3 id="Function_overloading"><span class="header-section-number">6.4.3</span> Function overloading</h3>
+<p>C++ lets you define multiple functions with the same name, where the
+choice of which function to call depends on the type of its arguments.
+Here is a program that demonstrates this feature:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+
+<span class="kw">using</span> <span class="kw">namespace</span> std;
+
+<span class="dt">const</span> <span class="dt">char</span> *
+typeName(<span class="dt">int</span> x)
+{
+ <span class="kw">return</span> <span class="st">"int"</span>;
+}
+
+<span class="dt">const</span> <span class="dt">char</span> *
+typeName(<span class="dt">double</span> x)
+{
+ <span class="kw">return</span> <span class="st">"double"</span>;
+}
+
+<span class="dt">const</span> <span class="dt">char</span> *
+typeName(<span class="dt">char</span> x)
+{
+ <span class="kw">return</span> <span class="st">"char"</span>;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ cout &lt;&lt; <span class="st">"The type of "</span> &lt;&lt; <span class="dv">3</span> &lt;&lt; <span class="st">" is "</span> &lt;&lt; typeName(<span class="dv">3</span>) &lt;&lt; <span class="st">".</span><span class="ch">\n</span><span class="st">"</span>;
+ cout &lt;&lt; <span class="st">"The type of "</span> &lt;&lt; <span class="fl">3.1</span> &lt;&lt; <span class="st">" is "</span> &lt;&lt; typeName(<span class="fl">3.1</span>) &lt;&lt; <span class="st">".</span><span class="ch">\n</span><span class="st">"</span>;
+ cout &lt;&lt; <span class="st">"The type of "</span> &lt;&lt; <span class="st">'c'</span> &lt;&lt; <span class="st">" is "</span> &lt;&lt; typeName(<span class="st">'c'</span>) &lt;&lt; <span class="st">".</span><span class="ch">\n</span><span class="st">"</span>;
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/functionOverloading.cpp" class="uri">examples/c++/functionOverloading.cpp</a>
+</div>
+<p>And here is what it looks like when we compile and run it:</p>
+<pre><code>$ make functionOverloading
+g++ functionOverloading.cpp -o functionOverloading
+$ ./functionOverloading
+The type of 3 is int.
+The type of 3.1 is double.
+The type of c is char.</code></pre>
+<p>Internally, <code class="backtick">g++</code> compiles three separate functions with different (and ugly) names, and when you use <code class="backtick">typeName</code> on an object of a particular type, <code class="backtick">g++</code> picks the one whose type matches. This is similar to what happens with built-in operators in straight C, where <code class="backtick">+</code> means different things depending on whether you apply it to a pair of <code class="backtick">int</code>s, a pair of <code class="backtick">double</code>s, or a pointer and an <code class="backtick">int</code>, but C++ lets you do it with your own functions.</p>
+<h3 id="Classes"><span class="header-section-number">6.4.4</span> Classes</h3>
+<p>C++ allows you to declare <strong>classes</strong> that look suspiciously like structs. The main differences between a class and a C-style struct are that (a) classes provide <strong>member functions</strong> or <strong>methods</strong>
+ that operate on instances of the class and that are called using a
+struct-like syntax; and (b) classes can distinguish between private
+members (only accessible to methods of the class) and public members
+(accessible to everybody).</p>
+<p>In C, we organize abstract data types by putting the representation
+in a struct and putting the operations on the data type in functions
+that work on this struct, often giving the functions a prefix that hints
+ at the type of its target (mostly to avoid namespace collisions).
+Classes in C++ make this connection between a data structure and the
+operations on it much more explicit.</p>
+<p>Here is a simple example of a C++ class in action:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+
+<span class="kw">using</span> <span class="kw">namespace</span> std;
+
+<span class="co">/* counters can be incremented or read */</span>
+<span class="kw">class</span> Counter {
+ <span class="dt">int</span> value; <span class="co">/* private value */</span>
+<span class="kw">public</span>:
+ Counter(); <span class="co">/* constructor with default value */</span>
+ Counter(<span class="dt">int</span>); <span class="co">/* constructor with specified value */</span>
+ ~Counter(); <span class="co">/* useless destructor */</span>
+ <span class="dt">int</span> read(); <span class="co">/* get the value of the counter */</span>
+ <span class="dt">void</span> increment(); <span class="co">/* add one to the counter */</span>
+};
+
+Counter::Counter() { value = <span class="dv">0</span>; }
+Counter::Counter(<span class="dt">int</span> initialValue) { value = initialValue; }
+Counter::~Counter() { cerr &lt;&lt; <span class="st">"counter de-allocated with value "</span> &lt;&lt; value &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>; }
+<span class="dt">int</span> Counter::read() { <span class="kw">return</span> value; }
+<span class="dt">void</span> Counter::increment() { value++; }
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ Counter c;
+ Counter c10(<span class="dv">10</span>);
+
+ cout &lt;&lt; <span class="st">"c starts at "</span> &lt;&lt; c.read() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ c.increment();
+ cout &lt;&lt; <span class="st">"c after one increment is "</span> &lt;&lt; c.read() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+
+ cout &lt;&lt; <span class="st">"c10 starts at "</span> &lt;&lt; c10.read() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ c.increment();
+ c.increment();
+ cout &lt;&lt;<span class="st">"c10 after two increments is "</span> &lt;&lt; c10.read() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/counter.cpp" class="uri">examples/c++/counter.cpp</a>
+</div>
+<p>Things to notice:</p>
+<ol style="list-style-type: decimal">
+<li>In the <code class="backtick">class&nbsp;Counter</code> declaration, the <code class="backtick">public:</code> label introduces the public members of the class. The member <code class="backtick">value</code> is only accessible to member functions of <code class="backtick">Counter</code>. This enforces much stronger information hiding than the default in C, although one can still use <code class="backtick">void&nbsp;*</code> trickery to hunt down and extract supposedly private data in C++ objects.</li>
+<li>In addition to the member function declarations in the class
+declaration, we also need to provide definitions. These look like
+ordinary function definitions, except that the class name is prepended
+using <code class="backtick">::</code> as in <code class="backtick">Counter::read</code>.</li>
+<li>Member functions are called using <code class="backtick">struct</code> access syntax, as in <code class="backtick">c.read()</code>. Conceptually, each instance of a class has its own member functions, so that <code class="backtick">c.read</code> is the function for reading <code class="backtick">c</code> while <code class="backtick">c10.read</code> is the function for reading <code class="backtick">c10</code>. Inside a member function, names of class members refer to members of the current instance; <code class="backtick">value</code> inside <code class="backtick">c.read</code> is <code class="backtick">c.value</code> (which otherwise is not accessible, since <code class="backtick">c.value</code> is not public).</li>
+<li>Two special member functions are <code class="backtick">Counter::Counter()</code> and <code class="backtick">Counter::Counter(int)</code>. These are <strong>constructors</strong>,
+ and are identifiable as such because they are named after the class. A
+constructor is called whenever a new instance of the class is created.
+If you create an instance with no arguments (as in the declaration <code class="backtick">Counter&nbsp;c;</code>), you get the constructor with no arguments. If you create an instance with arguments (as in the declaration <code class="backtick">Counter&nbsp;c10(10);</code>),
+ you get the version with the appropriate arguments. This is just
+another example of function overloading. If you don't define any
+constructors, C++ supplies a default constructor that takes no arguments
+ and does nothing. Note that constructors don't have a return type (you
+don't need to preface them with void).</li>
+<li>The special member function <code class="backtick">Counter::~Counter()</code> is a <strong>destructor</strong>; it is called when an object of type <code class="backtick">Counter</code>
+ is de-allocated (say, when returning from a function with a local
+variable of this type). This particular destructor is not very useful.
+Destructors are mostly important for objects that allocate their own
+storage that needs to be de-allocated when the object is; see the
+section on storage allocation below.</li>
+</ol>
+<p>Compiling and running this program gives the following output. Note that the last two lines are produced by the destructor.</p>
+<pre><code>c starts at 0
+c after one increment is 1
+c10 starts at 10
+c10 after two increments is 10
+counter de-allocated with value 10
+counter de-allocated with value 3</code></pre>
+<p>One subtle difference between C and C++ is that C++ uses empty parentheses <code class="backtick">()</code> for functions with no arguments, where C would use <code class="backtick">(void)</code>. This is a bit of a historical artifact, having to do with C allowing <code class="backtick">()</code> for functions whose arguments are not specified in the declaration (which was standard practice before ANSI C).</p>
+<p>Curiously, C++ also allows you to declare <code class="backtick">struct</code>s, with the interpretation that a <code class="backtick">struct</code> is exactly like a <code class="backtick">class</code> except that all members are public by default. So if you change <code class="backtick">class</code> to <code class="backtick">struct</code>
+ in the program above, it will do exactly the same thing. In practice,
+nobody who codes in C++ does this; the feature is mostly useful to allow
+ C code with <code class="backtick">struct</code>s to mix with C++ code.</p>
+<h3 id="Operator_overloading"><span class="header-section-number">6.4.5</span> Operator overloading</h3>
+<p>Sometimes when you define a new class, you also want to define new
+interpretations of operators on that class. Here is an example of a
+class that defines elements of the <strong>max-plus algebra</strong> over <code class="backtick">int</code>s. This gives us objects that act like <code class="backtick">int</code>s, except that the <code class="backtick">+</code> operator now returns the larger of its arguments and the <code class="backtick">*</code> operator now returns the sum.<a href="#fn25" class="footnoteRef" id="fnref25"><sup>25</sup></a></p>
+<p>The mechanism in C++ for doing this is to define member functions with names <code class="backtick">operator</code><em>something</em> where <em>something</em>
+ is the name of the operator we want to define. These member functions
+take one less argument that the operator they define; in effect, <code class="backtick">x&nbsp;+&nbsp;y</code> becomes syntactic sugar for <code class="backtick">x.operator+(y)</code>
+ (which, amazingly, is actually legal C++). Because these are member
+functions, they are allowed to access members of other instances of the
+same class that would normally be hidden.</p>
+<p>This same mechanism is also used to define automatic type conversions out of a type: the <code class="backtick">MaxPlus::operator&nbsp;int()</code> function allows C++ to convert a <code class="backtick">MaxPlus</code> object to an <code class="backtick">int</code> whenever it needs to (for example, to feed it to <code class="backtick">cout</code>). (Automatic type conversions <em>into</em> a type happen if you provide an appropriate constructor.)</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+<span class="ot">#include &lt;algorithm&gt; </span><span class="co">// for max</span>
+
+<span class="kw">using</span> <span class="kw">namespace</span> std;
+
+<span class="co">/* act like ints, except + does max and * does addition */</span>
+<span class="kw">class</span> MaxPlus {
+ <span class="dt">int</span> value;
+<span class="kw">public</span>:
+ MaxPlus(<span class="dt">int</span>);
+ MaxPlus <span class="kw">operator</span>+(<span class="dt">const</span> MaxPlus &amp;);
+ MaxPlus <span class="kw">operator</span>*(<span class="dt">const</span> MaxPlus &amp;);
+ <span class="kw">operator</span> <span class="dt">int</span>();
+};
+
+MaxPlus::MaxPlus(<span class="dt">int</span> x) { value = x; }
+
+MaxPlus
+MaxPlus::<span class="kw">operator</span>*(<span class="dt">const</span> MaxPlus &amp;other)
+{
+ <span class="kw">return</span> MaxPlus(value + other.value);
+}
+
+MaxPlus
+MaxPlus::<span class="kw">operator</span>+(<span class="dt">const</span> MaxPlus &amp;other)
+{
+ <span class="co">/* std::max does what you expect */</span>
+ <span class="kw">return</span> MaxPlus(max(value, other.value));
+}
+
+MaxPlus::<span class="kw">operator</span> <span class="dt">int</span>() { <span class="kw">return</span> value; }
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ cout &lt;&lt; <span class="st">"2+3 == "</span> &lt;&lt; (MaxPlus(<span class="dv">2</span>) + MaxPlus(<span class="dv">3</span>)) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; <span class="st">"2*3 == "</span> &lt;&lt; (MaxPlus(<span class="dv">2</span>) * MaxPlus(<span class="dv">3</span>)) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/maxPlus.cpp" class="uri">examples/c++/maxPlus.cpp</a>
+</div>
+<p>Avoid the temptation to overuse operator overloading, as it can be
+dangerous if used to obfuscate what an operator normally does:</p>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp">MaxPlus::<span class="kw">operator</span>--() { godzilla.eat(tokyo); }</code></pre></div>
+<p>The general rule of thumb is that you should probably only do
+operator overloading if you really are making things that act like
+numbers (yes, <code class="backtick">cout&nbsp;&lt;&lt;</code> violates this).</p>
+<p>Automatic type conversions can be particularly dangerous. The line</p>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"> cout &lt;&lt; (MaxPlus(<span class="dv">2</span>) + <span class="dv">3</span>) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;</code></pre></div>
+<p>is ambiguous: should the compiler convert <code class="backtick">MaxPlus(2)</code> to an <code class="backtick">int</code> using the <code class="backtick">MaxPlus(int)</code> constructor and use ordinary integer addition or convert <code class="backtick">3</code> to a <code class="backtick">MaxPlus</code> using <code class="backtick">MaxPlus::operator&nbsp;int()</code> and use funky <code class="backtick">MaxPlus</code> addition? Fortunately most C++ compilers will complain about the ambiguity and fail rather than guessing wrong.</p>
+<h3 id="Templates"><span class="header-section-number">6.4.6</span> Templates</h3>
+<p>One of the things we kept running into in this class was that if we
+defined a container type like a hash table, binary search tree, or
+priority queue, we had to either bake in the type of the data it held or
+ do horrible tricks with <code class="backtick">void&nbsp;*</code> pointers to work around the C type system. C++ includes a semi-principled work-around for this problem known as <strong>templates</strong>.
+ These are essentially macros that take a type name as an argument, that
+ are expanded as needed to produce functions or classes with specific
+types (see <a href="#macros">Macros</a> for an example of how to do this if you only have C).</p>
+<p>Typical use is to prefix a definition with <code class="backtick">template&nbsp;&lt;class&nbsp;T&gt;</code> and then use <code class="backtick">T</code> as a type name throughout:</p>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="kw">template</span> &lt;<span class="kw">class</span> T&gt;
+T add1(T x)
+{
+ <span class="kw">return</span> x + ((T) <span class="dv">1</span>);
+}</code></pre></div>
+<p>Note the explicit cast to <code class="backtick">T</code> of <code class="backtick">1</code>; this avoids ambiguities that might arise with automatic type conversions.</p>
+<p>If you put this definition in a program, you can then apply <code class="backtick">add1</code> to any type that has a <code class="backtick">+</code> operator and that you can convert <code class="backtick">1</code> to. For example, the output of this code fragment:</p>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"> cout &lt;&lt; <span class="st">"add1(3) == "</span> &lt;&lt; add1(<span class="dv">3</span>) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; <span class="st">"add1(3.1) == "</span> &lt;&lt; add1(<span class="fl">3.1</span>) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; <span class="st">"add1('c') == "</span> &lt;&lt; add1(<span class="st">'c'</span>) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; <span class="st">"add1(MaxPlus(0)) == "</span> &lt;&lt; add1(MaxPlus(<span class="dv">0</span>)) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; <span class="st">"add1(MaxPlus(2)) == "</span> &lt;&lt; add1(MaxPlus(<span class="dv">2</span>)) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;</code></pre></div>
+<p>is</p>
+<pre><code>add1(3) == 4
+add1(3.1) == 4.1
+add1('c') == d
+add1(MaxPlus(0)) == 1
+add1(MaxPlus(2)) == 2</code></pre>
+<p>By default, C++ will instantiate a template to whatever type fits in
+its argument. If you want to force a particular version, you can put the
+ type in angle brackets after the name of whatever you defined. For
+example,</p>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"> cout &lt;&lt; <span class="st">"add1&lt;int&gt;(3.1) == "</span> &lt;&lt; add1&lt;<span class="dt">int</span>&gt;(<span class="fl">3.1</span>) &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;</code></pre></div>
+<p>produces</p>
+<pre><code>add1&lt;int&gt;(3.1) == 4</code></pre>
+<p>because <code class="backtick">add1&lt;int&gt;</code> forces its argument to be converted to an <code class="backtick">int</code> (truncating to <code class="backtick">3</code>) before adding one to it.</p>
+<p>Because templates are really macros that get expanded as needed, it is common to put templates in header (<code class="backtick">.h</code>) files rather than in <code class="backtick">.cpp</code> files. See the stack implementation below for an example of this.</p>
+<h3 id="Exceptions"><span class="header-section-number">6.4.7</span> Exceptions</h3>
+<p>C provides no built-in mechanism for signaling that something bad
+happened. So C programmers are left to come up with ad-hoc mechanisms
+like:</p>
+<ol style="list-style-type: decimal">
+<li>Calling <code class="backtick">abort</code> to kill the program, either directly or via <code class="backtick">assert</code>.</li>
+<li>Calling <code class="backtick">exit</code> with a nonzero exit code.</li>
+<li>Returning a special error value from a function. This is often done
+in library routines, because it's rude for a library routine not to give
+ the caller a chance to figure out how to deal with the error. But it
+means coming up with some special error value that won't be returned
+normally, and these can vary widely from one routine to another (null
+pointers, <code class="backtick">-1</code>, etc.)</li>
+</ol>
+<p>C++ provides a standard mechanism for signaling unusual events known as <strong>exceptions</strong>. The actual mechanism is similar to <code class="backtick">return</code>: the <code class="backtick">throw</code> statement throws an exception that may be caught by a <code class="backtick">try..catch</code> statement anywhere above it on the execution stack (not necessarily in the same function). Example:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+
+<span class="kw">using</span> <span class="kw">namespace</span> std;
+
+<span class="dt">int</span> fail()
+{
+ <span class="kw">throw</span> <span class="st">"you lose"</span>;
+
+ <span class="kw">return</span> <span class="dv">5</span>;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ <span class="kw">try</span> {
+ cout &lt;&lt; fail() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ }
+ <span class="kw">catch</span>(<span class="dt">const</span> <span class="dt">char</span> *s) {
+ cerr &lt;&lt; <span class="st">"Caught error: "</span> &lt;&lt; s &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/exception.cpp" class="uri">examples/c++/exception.cpp</a>
+</div>
+<p>In action:</p>
+<pre><code>$ make exception
+g++ -g3 -Wall exception.cpp -o exception
+$ ./exception
+Caught error: you lose</code></pre>
+<p>Note the use of <code class="backtick">cerr</code> instead of <code class="backtick">cout</code>. This sends the error message to <code class="backtick">stderr</code>.</p>
+<p>A <code class="backtick">try..catch</code> statement will catch an exception only if the type matches the type of the argument to the <code class="backtick">catch</code> part of the statement. This can be used to pick and choose which exceptions you want to catch. See <a href="http://www.cplusplus.com/doc/tutorial/exceptions/" class="uri">http://www.cplusplus.com/doc/tutorial/exceptions/</a> for some examples and descriptions of some C++ standard library exceptions.</p>
+<h3 id="Storage_allocation"><span class="header-section-number">6.4.8</span> Storage allocation</h3>
+<p>C++ programs generally don't use <code class="backtick">malloc</code> and <code class="backtick">free</code>, but instead use the built-in C++ operators <code class="backtick">new</code> and <code class="backtick">delete</code>. The advantage of <code class="backtick">new</code> and <code class="backtick">delete</code> is that they know about types: not only does this mean that you don't have to play games with <code class="backtick">sizeof</code>
+ to figure out how much space to allocate, but if you allocate a new
+object from a class with a constructor, the constructor gets called to
+initialize the object, and if you delete an object, its destructor (if
+it has one) is called.</p>
+<p>There are two versions of <code class="backtick">new</code> and <code class="backtick">delete</code>,
+ depending on whether you want to allocate just one object or an array
+of objects, plus some special syntax for passing constructor arguments:</p>
+<ul>
+<li>To allocate a single object, use <code class="backtick">new</code> <em>type</em>.</li>
+<li>To allocate an array of objects, use <code class="backtick">new</code> <em>type</em><code class="backtick">[</code><em>size</em><code class="backtick">]</code>. As with <code class="backtick">malloc</code>, both operations return a pointer to <em>type</em>.</li>
+<li>If you want to pass arguments to a constructor for <em>type</em>, use <code class="backtick">new</code> <em>type</em><code class="backtick">(</code><em>args</em><code class="backtick">)</code>. This only works with the single-object version, so you can't do <code class="backtick">new&nbsp;SomeClass[12]</code> unless <code class="backtick">SomeClass</code> has a constructor that takes no arguments.</li>
+<li>To de-allocate a single object, use <code class="backtick">delete</code> <em>pointer-to-object</em>.</li>
+<li>To de-allocate an array, use <code class="backtick">delete&nbsp;[]</code> <em>pointer-to-base-of-array</em>. Mixing <code class="backtick">new</code> with <code class="backtick">delete&nbsp;[]</code> or vice versa is an error that may or may not be detected by the compiler. Mixing either with <code class="backtick">malloc</code> or <code class="backtick">free</code> is a very bad idea.</li>
+</ul>
+<p>The program below gives examples of <code class="backtick">new</code> and <code class="backtick">delete</code> in action:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+<span class="ot">#include &lt;cassert&gt;</span>
+
+<span class="kw">using</span> <span class="kw">namespace</span> std;
+
+<span class="kw">class</span> Noisy {
+ <span class="dt">int</span> id;
+<span class="kw">public</span>:
+ Noisy(<span class="dt">int</span>); <span class="co">// create a noisy object with this id</span>
+ ~Noisy();
+};
+
+Noisy::Noisy(<span class="dt">int</span> initId) {
+ id = initId;
+ cout &lt;&lt; <span class="st">"Noisy object created with id "</span> &lt;&lt; id &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+}
+
+Noisy::~Noisy() {
+ cout &lt;&lt; <span class="st">"Noisy object destroyed with id "</span> &lt;&lt; id &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> *p;
+ <span class="dt">int</span> *a;
+ <span class="dt">const</span> <span class="dt">int</span> n = <span class="dv">100</span>;
+ Noisy n1(<span class="dv">1</span>);
+ Noisy *n2;
+
+ p = <span class="kw">new</span> <span class="dt">int</span>;
+ a = <span class="kw">new</span> <span class="dt">int</span>[n];
+ n2 = <span class="kw">new</span> Noisy(<span class="dv">2</span>);
+
+ *p = <span class="dv">5</span>;
+ assert(*p == <span class="dv">5</span>);
+
+ <span class="kw">for</span>(<span class="dt">int</span> i = <span class="dv">0</span>; i &lt; n; i++) {
+ a[i] = i;
+ }
+
+ <span class="kw">for</span>(<span class="dt">int</span> i = <span class="dv">0</span>; i &lt; n; i++) {
+ assert(a[i] == i);
+ }
+
+ <span class="kw">delete</span> [] a;
+ <span class="kw">delete</span> p;
+ <span class="kw">delete</span> n2;
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/allocation.cpp" class="uri">examples/c++/allocation.cpp</a>
+</div>
+<h4 id="Storage_allocation_inside_objects"><span class="header-section-number">6.4.8.1</span> Storage allocation inside objects</h4>
+<p>Inside objects, storage allocation gets complicated. The reason is
+that if the object is copied, either by an assignment or by being passed
+ as a call-by-value parameter, the storage pointed to by the object will
+ not be copied. This can lead to two different objects that share the
+same internal data structures, which is usually not something you want.
+Furthermore, when the object is deallocated, it's necessary to also
+deallocate any space it allocated, which can be done inside the object's
+ destructor.</p>
+<p>To avoid all these problems, any object of type <code class="backtick">T</code> that uses <code class="backtick">new</code> needs to have all of:</p>
+<ol style="list-style-type: decimal">
+<li>A <em>destructor</em> <code class="backtick">T::~T()</code>.</li>
+<li>A <em>copy constructor</em> <code class="backtick">T::T(const&nbsp;T&nbsp;&amp;)</code>, which is a constructor that takes a reference to another object of the same type as an argument and copies its contents.</li>
+<li>An <em>overloaded assignment operator</em> <code class="backtick">T::operator=(const&nbsp;T&nbsp;&amp;)</code>
+ that does the same thing, but also deallocates any internal storage of
+the current object before copying new data in place of it (or possibly
+just copies the contents of internal storage without doing any
+allocation and deallocation). The overloaded assignment operator is
+particularly tricky, because you have to make sure it doesn't destroy
+the contents of the object if somebody writes the useless
+self-assignment <code class="backtick">a&nbsp;=&nbsp;a</code>, and you also need to return a reference to <code class="backtick">*this</code> so that you can chain assignments together as in <code class="backtick">a&nbsp;=&nbsp;b&nbsp;=&nbsp;c</code>.</li>
+</ol>
+<p>Here is an example of a <code class="backtick">Stack</code> class that includes all of these members. Note that it is defined using templates so we can make a stack of any type we like.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="kw">template</span> &lt;<span class="kw">class</span> T&gt;
+<span class="kw">class</span> Stack {
+ <span class="dt">static</span> <span class="dt">const</span> <span class="dt">int</span> initialSize = <span class="dv">32</span>; <span class="co">/* static means this is shared across entire class */</span>
+ <span class="dt">int</span> top;
+ <span class="dt">int</span> size;
+ T* contents;
+<span class="kw">public</span>:
+ Stack(); <span class="co">/* create a new empty stack */</span>
+
+ <span class="co">/* the unholy trinity of complex C++ objects */</span>
+ ~Stack(); <span class="co">/* destructor */</span>
+ Stack(<span class="dt">const</span> Stack &amp;); <span class="co">/* copy constructor */</span>
+ Stack&amp; <span class="kw">operator</span>=(<span class="dt">const</span> Stack &amp;); <span class="co">/* overloaded assignment */</span>
+
+ <span class="dt">void</span> push(T); <span class="co">/* push an element onto the stack */</span>
+ <span class="dt">int</span> isEmpty(); <span class="co">/* return 1 if empty */</span>
+ T pop(); <span class="co">/* pop top element from stack */</span>
+};
+
+<span class="kw">template</span> &lt;<span class="kw">class</span> T&gt;
+Stack&lt;T&gt;::Stack()
+{
+ size = initialSize;
+ top = <span class="dv">0</span>;
+ contents = <span class="kw">new</span> T[size];
+}
+
+<span class="kw">template</span> &lt;<span class="kw">class</span> T&gt;
+Stack&lt;T&gt;::~Stack()
+{
+ <span class="kw">delete</span> [] contents;
+}
+
+<span class="kw">template</span> &lt;<span class="kw">class</span> T&gt;
+Stack&lt;T&gt;::Stack(<span class="dt">const</span> Stack&lt;T&gt; &amp;other)
+{
+ size = other.size;
+ top = other.top;
+ contents = <span class="kw">new</span> T[size];
+
+ <span class="kw">for</span>(<span class="dt">int</span> i = <span class="dv">0</span>; i &lt; top; i++) {
+ contents[i] = other.contents[i];
+ }
+}
+
+<span class="kw">template</span> &lt;<span class="kw">class</span> T&gt;
+Stack&lt;T&gt; &amp;
+Stack&lt;T&gt;::<span class="kw">operator</span>=(<span class="dt">const</span> Stack&lt;T&gt; &amp;other)
+{
+ <span class="kw">if</span>(&amp;other != <span class="kw">this</span>) {
+ <span class="co">/* this is a real assignment */</span>
+
+ <span class="kw">delete</span> [] contents;
+
+ size = other.size;
+ top = other.top;
+ contents = <span class="kw">new</span> T[size];
+
+ <span class="kw">for</span>(<span class="dt">int</span> i = <span class="dv">0</span>; i &lt; top; i++) {
+ contents[i] = other.contents[i];
+ }
+ }
+
+ <span class="kw">return</span> *<span class="kw">this</span>;
+}
+
+<span class="kw">template</span> &lt;<span class="kw">class</span> T&gt;
+<span class="dt">void</span>
+Stack&lt;T&gt;::push(T elt)
+{
+ <span class="kw">if</span>(top &gt;= size) {
+ <span class="dt">int</span> newSize = <span class="dv">2</span>*size;
+ T *newContents = <span class="kw">new</span> T[newSize];
+
+ <span class="kw">for</span>(<span class="dt">int</span> i = <span class="dv">0</span>; i &lt; top; i++) {
+ newContents[i] = contents[i];
+ }
+
+ <span class="kw">delete</span> [] contents;
+
+ contents = newContents;
+ size = newSize;
+ }
+
+ contents[top++] = elt;
+}
+
+<span class="kw">template</span> &lt;<span class="kw">class</span> T&gt;
+T
+Stack&lt;T&gt;::pop()
+{
+ <span class="kw">if</span>(top &gt; <span class="dv">0</span>) {
+ <span class="kw">return</span> contents[--top];
+ } <span class="kw">else</span> {
+ <span class="kw">throw</span> <span class="st">"stack empty"</span>;
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/stack/stack.h" class="uri">examples/c++/stack/stack.h</a>
+</div>
+<p>Here is some code demonstrating use of the stack:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+
+<span class="ot">#include "stack.h"</span>
+
+<span class="kw">using</span> <span class="kw">namespace</span> std;
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ Stack&lt;<span class="dt">int</span>&gt; s;
+ Stack&lt;<span class="dt">int</span>&gt; s2;
+
+ <span class="kw">try</span> {
+ s.push(<span class="dv">1</span>);
+ s.push(<span class="dv">2</span>);
+ s.push(<span class="dv">3</span>);
+
+ s2 = s;
+
+ cout &lt;&lt; s.pop() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; s.pop() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; s.pop() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+
+ cout &lt;&lt; s2.pop() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; s2.pop() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ cout &lt;&lt; s2.pop() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+
+ <span class="kw">try</span> {
+ s2.pop();
+ } <span class="kw">catch</span>(<span class="dt">const</span> <span class="dt">char</span> *err) {
+ cout &lt;&lt; <span class="st">"Caught expected exception "</span> &lt;&lt; err &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ }
+
+ <span class="kw">for</span>(<span class="dt">int</span> i = <span class="dv">0</span>; i &lt; <span class="dv">1000</span>; i++) {
+ s.push(i);
+ }
+
+ cout &lt;&lt; s.pop() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ } <span class="kw">catch</span>(<span class="dt">const</span> <span class="dt">char</span> *err) {
+ cerr &lt;&lt; <span class="st">"Caught error "</span> &lt;&lt; err &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/stack/testStack.cpp" class="uri">examples/c++/stack/testStack.cpp</a>
+</div>
+<h3 id="Standard_library"><span class="header-section-number">6.4.9</span> Standard library</h3>
+<p>C++ has a large standard library that includes implementations of
+many of the data structures we've seen in this class. In most
+situations, it is easier to use the standard library implementations
+than roll your own, although you have to be careful to make sure you
+understand just what the standard library implementations do. For
+example, here is a reimplementation of the main routine from <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/stack/testStack.cpp">testStack.cpp</a> using the <code class="backtick">stack</code> template from <code class="backtick">#include&nbsp;&lt;stack&gt;</code>.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode cpp"><code class="sourceCode cpp"><span class="ot">#include &lt;iostream&gt;</span>
+<span class="ot">#include &lt;stack&gt;</span>
+
+<span class="kw">using</span> <span class="kw">namespace</span> std;
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">const</span> <span class="dt">char</span> **argv)
+{
+ stack&lt;<span class="dt">int</span>&gt; s;
+ stack&lt;<span class="dt">int</span>&gt; s2;
+
+ s.push(<span class="dv">1</span>);
+ s.push(<span class="dv">2</span>);
+ s.push(<span class="dv">3</span>);
+
+ s2 = s;
+
+ cout &lt;&lt; s.top() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>; s.pop();
+ cout &lt;&lt; s.top() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>; s.pop();
+ cout &lt;&lt; s.top() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>; s.pop();
+
+ cout &lt;&lt; s2.top() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>; s2.pop();
+ cout &lt;&lt; s2.top() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>; s2.pop();
+ cout &lt;&lt; s2.top() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>; s2.pop();
+
+ <span class="kw">for</span>(<span class="dt">int</span> i = <span class="dv">0</span>; i &lt; <span class="dv">1000</span>; i++) {
+ s.push(i);
+ }
+
+ cout &lt;&lt; s.top() &lt;&lt; <span class="st">'</span><span class="ch">\n</span><span class="st">'</span>;
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/c++/stack/stdStack.cpp" class="uri">examples/c++/stack/stdStack.cpp</a>
+</div>
+<p>One difference between the standard stack and our stack is that <code class="backtick">std::stack</code>'s <code class="backtick">pop</code> member function doesn't return anything. So we have to use <code class="backtick">top</code> to get the top element before popping it.</p>
+<p>There is a chart of all the standard library data structures at <a href="http://www.cplusplus.com/reference/stl/" class="uri">http://www.cplusplus.com/reference/stl/</a>.</p>
+<h3 id="Things_we_haven.27t_talked_about"><span class="header-section-number">6.4.10</span> Things we haven't talked about</h3>
+<p>The main thing we've omitted here is any discussion of
+object-oriented features of C++, particularly inheritance. These are not
+ immediately useful for the abstract-data-type style of programming
+we've used in CS223, but can be helpful for building more complicated
+systems, where we might want to have various specialized classes of
+objects that can all be approached using a common interface represented
+by a class that they inherit from. If you are interested in exploring
+these tools further, the CS department occasionally offers a class on
+object-oriented programming; Mike Fischer's lecture notes from the last
+time this course was offered can be found at <a href="http://zoo.cs.yale.edu/classes/cs427/2011a/lectures.html" class="uri">http://zoo.cs.yale.edu/classes/cs427/2011a/lectures.html</a>.</p>
+<h2 id="testingDuringDevelopment"><span class="header-section-number">6.5</span> Testing during development</h2>
+<p>It is a truth universally acknowledged that test code should be
+written early in the development process. Unfortunately, most
+programmers (including me) tend to assume that a program will work on
+the first attempt and there's not much point in testing it anyway, so
+writing and running test code often gets deferred indefinitely. The
+solution is to write the test code first, and run it directly from your <code>Makefile</code>
+ every time you save and compile your program. Not only will this
+guarantee that your program actually works when you are done (or at
+least passes the tests you thought of), it allows you to see how the
+program is improving with each positive change, and prevents you from
+accidentally making new negative changes that break things that used to
+work.</p>
+<p>Going one step further, we can often write our interface and test
+code first, build a non-working stub implementation, and then slowly
+flesh out the missing pieces until the implementation passes all the
+tests. This way there is always some obvious step to do next, and we
+don't find ourselves stuck staring at an empty file.</p>
+<h3 id="unitTests"><span class="header-section-number">6.5.1</span> Unit tests</h3>
+<p>A straightforward approach to testing is to include test code with every <strong>unit</strong>
+ in your program, where a unit is any part of the program that can be
+sensibly run by itself. Typically, this will be a single function or a
+group of functions that together implement some data structure.</p>
+<p>In C, these will often make up the contents of a single source file.
+Though this is probably not the best approach if you are building a
+production-quality testing framework, a simple way to include unit tests
+ in a program is to append to each source file a test <code>main</code> function that can be enabled by defining a macro (I like <code>TEST_MAIN</code>). You can then build this file by itself with the macro defined to get a stand-alone test program for just this code.</p>
+<h4 id="what-to-put-in-the-test-code"><span class="header-section-number">6.5.1.1</span> What to put in the test code</h4>
+<p>Ideally, you want to use enough different inputs that every line of code in your program is reached by some test, a goal called <strong>code coverage</strong>. For complex programs, this may be hard to achieve, and there are programs, such as the <code>gcov</code> program that comes with <code>gcc</code>,
+ that will analyze how much code coverage you get out of your tests. For
+ simple programs, we can just try to come up with a set of inputs that
+covers all our bases.</p>
+<p>Testing can be done as <strong>black-box testing</strong>, where the test code assumes no knowledge of the implementation, or <strong>white-box testing</strong>,
+ where the test code has direct access to the implementation and can
+observe the effects of its actions. Black-box testing is handy if your
+implementation may change, and it is generally a good idea to write
+black-box tests first. White-box testing can be useful if some states of
+ the data structure are hard to reach otherwise, or if black-box testing
+ is not very informative about why a particular operation is failing.
+The example given below uses both.</p>
+<h4 id="example"><span class="header-section-number">6.5.1.2</span> Example</h4>
+<p>Here is an example of a simple data structure with some built-in test code conditionally compiled by defining <code>TEST_MAIN</code>.
+ The data structure implements a counter with built-in overflow
+protection. The counter interface does not provide the ability to read
+the counter value; instead, the user can only tell if it is zero or not.</p>
+<p>Because the counter is implemented internally as a <code>uint64_t</code>,
+ black-box testing of what happens with too many increments would take
+centuries. So we include some white-box tests that directly access the
+counter value to set up this (arguably unnecessary) test case.</p>
+<p>The code is given below. We include both the interface file and the implementation, as well as a <code>Makefile</code> showing how to build and run the test program. The <code>Makefile</code> includes some extra arguments to <code>gcc</code> to turn on the <code>TEST_MAIN</code> macro and supply the extra information needed to run <code>gcov</code>. If you type <code>make test</code>, it will make and run <code>testCounter</code>, and then run <code>gcov</code> to verify that we did in fact hit all lines of code in the program.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Abstract counter type.</span>
+<span class="co"> *</span>
+<span class="co"> * You can increment it, decrement it, and test for zero.</span>
+<span class="co"> *</span>
+<span class="co"> * Increment and decrement operations return 1 if successful,</span>
+<span class="co"> * 0 if the operation would cause underflow or overflow.</span>
+<span class="co"> */</span>
+
+<span class="kw">typedef</span> <span class="kw">struct</span> counter Counter;
+
+<span class="co">/* make a new counter starting at 0 */</span>
+Counter *counterCreate(<span class="dt">void</span>);
+
+<span class="co">/* destroy a counter */</span>
+<span class="dt">void</span> counterDestroy(Counter *);
+
+<span class="co">/* return 1 if counter is 0, 0 otherwise */</span>
+<span class="dt">int</span> counterIsZero(<span class="dt">const</span> Counter *);
+
+<span class="co">/* increment a counter, returns 1 if successful, 0 if increment would cause overflow */</span>
+<span class="dt">int</span> counterIncrement(Counter *);
+
+<span class="co">/* decrement a counter, returns 1 if successful, 0 if decrement would cause underflow */</span>
+<span class="dt">int</span> counterDecrement(Counter *);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/unitTest/counter.h" class="uri">examples/unitTest/counter.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include "counter.h"</span>
+
+<span class="ot">#include &lt;stdint.h&gt;</span>
+
+<span class="ot">#define COUNTER_MAX (UINT64_MAX)</span>
+
+<span class="kw">struct</span> counter {
+ <span class="dt">uint64_t</span> value;
+};
+
+<span class="co">/* make a new counter starting at 0 */</span>
+Counter *
+counterCreate(<span class="dt">void</span>)
+{
+ Counter *c;
+
+ c = malloc(<span class="kw">sizeof</span>(Counter));
+ assert(c);
+
+ c-&gt;value = <span class="dv">0</span>;
+
+ <span class="kw">return</span> c;
+}
+
+<span class="co">/* destroy a counter */</span>
+<span class="dt">void</span>
+counterDestroy(Counter *c)
+{
+ free(c);
+}
+
+<span class="co">/* return 1 if counter is 0, 0 otherwise */</span>
+<span class="dt">int</span>
+counterIsZero(<span class="dt">const</span> Counter *c)
+{
+ <span class="kw">return</span> c-&gt;value == <span class="dv">0</span>;
+}
+
+<span class="co">/* increment a counter, returns 1 if successful, 0 if increment would cause overflow */</span>
+<span class="dt">int</span>
+counterIncrement(Counter *c)
+{
+ <span class="kw">if</span>(c-&gt;value == COUNTER_MAX) {
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ c-&gt;value++;
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+}
+
+<span class="co">/* decrement a counter, returns 1 if successful, 0 if decrement would cause underflow */</span>
+<span class="dt">int</span>
+counterDecrement(Counter *c)
+{
+ <span class="kw">if</span>(c-&gt;value == <span class="dv">0</span>) {
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ c-&gt;value--;
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+}
+
+<span class="ot">#ifdef TEST_MAIN</span>
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ Counter *c;
+
+ <span class="co">/* black box testing */</span>
+ c = counterCreate(); <span class="co">/* 0 */</span>
+
+ assert(counterIsZero(c));
+ assert(counterIncrement(c) == <span class="dv">1</span>); <span class="co">/* 1 */</span>
+ assert(!counterIsZero(c));
+ assert(counterIncrement(c) == <span class="dv">1</span>); <span class="co">/* 2 */</span>
+ assert(!counterIsZero(c));
+ assert(counterDecrement(c) == <span class="dv">1</span>); <span class="co">/* 1 */</span>
+ assert(!counterIsZero(c));
+ assert(counterDecrement(c) == <span class="dv">1</span>); <span class="co">/* 0 */</span>
+ assert(counterIsZero(c));
+ assert(counterDecrement(c) == <span class="dv">0</span>); <span class="co">/* 0 */</span>
+ assert(counterIsZero(c));
+ assert(counterIncrement(c) == <span class="dv">1</span>); <span class="co">/* 1 */</span>
+ assert(!counterIsZero(c));
+
+ counterDestroy(c);
+
+ <span class="co">/* white box testing */</span>
+ c = counterCreate(); <span class="co">/* 0 */</span>
+
+ assert(c-&gt;value == <span class="dv">0</span>);
+ assert(counterIncrement(c) == <span class="dv">1</span>); <span class="co">/* 1 */</span>
+ assert(c-&gt;value == <span class="dv">1</span>);
+ assert(counterIncrement(c) == <span class="dv">1</span>); <span class="co">/* 2 */</span>
+ assert(c-&gt;value == <span class="dv">2</span>);
+ assert(counterDecrement(c) == <span class="dv">1</span>); <span class="co">/* 1 */</span>
+ assert(c-&gt;value == <span class="dv">1</span>);
+ assert(counterDecrement(c) == <span class="dv">1</span>); <span class="co">/* 0 */</span>
+ assert(c-&gt;value == <span class="dv">0</span>);
+ assert(counterDecrement(c) == <span class="dv">0</span>); <span class="co">/* 0 */</span>
+ assert(c-&gt;value == <span class="dv">0</span>);
+ assert(counterIncrement(c) == <span class="dv">1</span>); <span class="co">/* 1 */</span>
+ assert(c-&gt;value == <span class="dv">1</span>);
+
+ <span class="co">/* force counter value to COUNTER_MAX to test for overflow protection */</span>
+ c-&gt;value = COUNTER_MAX; <span class="co">/* COUNTER_MAX */</span>
+ assert(counterIncrement(c) == <span class="dv">0</span>); <span class="co">/* COUNTER_MAX */</span>
+ assert(c-&gt;value == COUNTER_MAX);
+ assert(counterDecrement(c) == <span class="dv">1</span>); <span class="co">/* COUNTER_MAX-1 */</span>
+ assert(c-&gt;value == COUNTER_MAX<span class="dv">-1</span>);
+ assert(counterIncrement(c) == <span class="dv">1</span>); <span class="co">/* COUNTER_MAX */</span>
+ assert(c-&gt;value == COUNTER_MAX);
+
+ counterDestroy(c);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+<span class="ot">#endif</span></code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/unitTest/counter.c" class="uri">examples/unitTest/counter.c</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode makefile"><code class="sourceCode makefile"><span class="dt">CC</span><span class="ch">=</span><span class="st">c99</span>
+<span class="dt">CFLAGS</span><span class="ch">=</span><span class="st">-g3 -pedantic -Wall</span>
+
+<span class="dv">all:</span><span class="dt"> seqprinter</span>
+
+<span class="dv">seqprinter:</span><span class="dt"> main.o sequence.o</span>
+ <span class="ch">$(</span><span class="dt">CC</span><span class="ch">)</span> <span class="ch">$(</span><span class="dt">CFLAGS</span><span class="ch">)</span> -o <span class="ch">$@</span> <span class="ch">$^</span>
+
+<span class="dv">test:</span><span class="dt"> seqprinter</span>
+ ./seqprinter
+
+<span class="co"># these rules say to rebuild main.o and sequence.o if sequence.h changes</span>
+<span class="dv">main.o:</span><span class="dt"> main.c sequence.h</span>
+<span class="dv">sequence.o:</span><span class="dt"> sequence.c sequence.h</span>
+
+<span class="dv">clean:</span>
+ <span class="ch">$(</span><span class="dt">RM</span><span class="ch">)</span> -f seqprinter *.o</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/ADT/sequence/Makefile" class="uri">examples/ADT/sequence/Makefile</a>
+</div>
+<h3 id="test-harnesses"><span class="header-section-number">6.5.2</span> Test harnesses</h3>
+<p>Here are some older notes on testing using a test harness that does
+some basic tricks like catching segmentation faults so that a program
+can keep going even if one test fails.</p>
+<h4 id="Module_interface"><span class="header-section-number">6.5.2.1</span> Module interface</h4>
+<p>The module will be a stack for storing integers.</p>
+<p>Let's start with the interface, which we'll put in a file called <code>stack.h</code>:</p>
+<h5 id="stack.h"><span class="header-section-number">6.5.2.1.1</span> stack.h</h5>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * This is an "opaque struct"; it discourages people from looking at</span>
+<span class="co"> * the inside of our structure. The actual definiton of struct stack</span>
+<span class="co"> * is contained in stack.c.</span>
+<span class="co"> */</span>
+<span class="kw">typedef</span> <span class="kw">struct</span> stack *Stack;
+
+<span class="co">/* constructor and destructor */</span>
+Stack stack_create(<span class="dt">void</span>); <span class="co">/* returns 0 on allocation error */</span>
+<span class="dt">void</span> stack_destroy(Stack);
+
+<span class="co">/* push a new element onto the stack */</span>
+<span class="dt">void</span> stack_push(Stack , <span class="dt">int</span> new_element);
+
+<span class="co">/* return 1 if the stack is empty, 0 otherwise */</span>
+<span class="dt">int</span> stack_isempty(Stack);
+
+<span class="co">/* remove and return top element of stack */</span>
+<span class="co">/* returns STACK_EMPTY if stack is empty */</span>
+<span class="ot">#define STACK_EMPTY (-1)</span>
+<span class="dt">int</span> stack_pop(Stack);</code></pre></div>
+<p>Our intent is that an <code>Stack</code> acts like a stack--- we push things onto it using <code>stack_push</code>, and then pull them off again in reverse order using <code>stack_pop</code>. Ideally, we don't ever pop the stack when it's empty (which we can detect using <code>stack_isempty</code>), but if we do, we have <code>stack_pop</code> return something well-defined.</p>
+<h4 id="Test_code"><span class="header-section-number">6.5.2.2</span> Test code</h4>
+<p>Let's write some test code to try this out. Because our initial stack
+ implementation may be exceptionally bug-ridden, we'll use a test
+harness that provides macros for detecting and intercepting segmentation
+ faults and similar disasters. The various testing wrappers are defined
+in the files <code>tester.h</code> and <code>tester.c</code>, from the <a href="#testingDuringDevelopment">chapter on testing</a>;
+ you should feel free to use it for your own purposes. I've added line
+numbers in comments to all the TEST lines so we can find them again
+later.</p>
+<h5 id="test-stack.c"><span class="header-section-number">6.5.2.2.1</span> test-stack.c</h5>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;setjmp.h&gt;</span>
+<span class="ot">#include &lt;signal.h&gt;</span>
+<span class="ot">#include &lt;unistd.h&gt;</span>
+
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#include "stack.h"</span>
+<span class="ot">#include "tester.h"</span>
+
+<span class="ot">#define STRESS_TEST_ITERATIONS (1000000)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ Stack s;
+ <span class="dt">int</span> i;
+
+ tester_init();
+
+ <span class="co">/* first we need to build one */</span>
+ TRY { s = stack_create(); } ENDTRY;
+
+ <span class="co">/* 25 */</span> TEST_ASSERT(s != <span class="dv">0</span>);
+
+ <span class="co">/* now we'll try pushing and popping a bit */</span>
+ TRY { stack_push(s, <span class="dv">1</span>); } ENDTRY;
+ TRY { stack_push(s, <span class="dv">2</span>); } ENDTRY;
+ TRY { stack_push(s, <span class="dv">3</span>); } ENDTRY;
+
+ <span class="co">/* 32 */</span> TEST(stack_isempty(s), <span class="dv">0</span>);
+ <span class="co">/* 33 */</span> TEST(stack_pop(s), <span class="dv">3</span>);
+ <span class="co">/* 34 */</span> TEST(stack_isempty(s), <span class="dv">0</span>);
+ <span class="co">/* 35 */</span> TEST(stack_pop(s), <span class="dv">2</span>);
+ <span class="co">/* 36 */</span> TEST(stack_isempty(s), <span class="dv">0</span>);
+ <span class="co">/* 37 */</span> TEST(stack_pop(s), <span class="dv">1</span>);
+ <span class="co">/* 38 */</span> TEST(stack_isempty(s), <span class="dv">1</span>);
+ <span class="co">/* 39 */</span> TEST(stack_pop(s), STACK_EMPTY);
+ <span class="co">/* 40 */</span> TEST(stack_isempty(s), <span class="dv">1</span>);
+ <span class="co">/* 41 */</span> TEST(stack_pop(s), STACK_EMPTY);
+
+ <span class="co">/* can we still push after popping too much? */</span>
+ TRY { stack_push(s, <span class="dv">4</span>); } ENDTRY;
+ <span class="co">/* 45 */</span> TEST(stack_isempty(s), <span class="dv">0</span>);
+ <span class="co">/* 46 */</span> TEST(stack_pop(s), <span class="dv">4</span>);
+ <span class="co">/* 47 */</span> TEST(stack_isempty(s), <span class="dv">1</span>);
+ <span class="co">/* 48 */</span> TEST(stack_pop(s), STACK_EMPTY);
+ <span class="co">/* 49 */</span> TEST(stack_isempty(s), <span class="dv">1</span>);
+
+ <span class="co">/* let's do some stress testing */</span>
+ <span class="co">/* we won't use TEST for this because we might get too much output */</span>
+ TRY {
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; STRESS_TEST_ITERATIONS; i++) {
+ stack_push(s, i);
+ }
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; STRESS_TEST_ITERATIONS; i++) {
+ stack_push(s, <span class="dv">957</span>);
+ <span class="kw">if</span>(stack_pop(s) != <span class="dv">957</span>) {
+ <span class="co">/* 60 */</span> FAIL(<span class="st">"wanted 957 but didn't get it"</span>);
+ abort();
+ }
+ }
+ <span class="kw">for</span>(i = STRESS_TEST_ITERATIONS - <span class="dv">1</span>; i &gt;= <span class="dv">0</span>; i--) {
+ <span class="kw">if</span>(stack_isempty(s)) {
+ <span class="co">/* 66 */</span> FAIL(<span class="st">"stack empty too early"</span>);
+ abort();
+ }
+ <span class="kw">if</span>(stack_pop(s) != i) {
+ <span class="co">/* 70 */</span> FAIL(<span class="st">"got wrong value!"</span>);
+ abort();
+ }
+ }
+ } ENDTRY; <span class="co">/* 74 */</span>
+
+ <span class="co">/* 76 */</span> TEST(stack_isempty(s), <span class="dv">1</span>);
+
+ TRY { stack_destroy(s); } ENDTRY;
+
+ tester_report(stdout, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> tester_result();
+}</code></pre></div>
+<p>There is a lot of test code here. In practice, we might write just a
+few tests to start off with, and, to be honest, I didn't write all of
+this at once. But you can never have too many tests--- if nothing else,
+they give an immediate sense of gratification as the number of failed
+tests drops.</p>
+<h4 id="Makefile"><span class="header-section-number">6.5.2.3</span> Makefile</h4>
+<ul>
+<li>Finally, we'll write a <code>Makefile</code>:</li>
+</ul>
+<h5 id="Makefile-1"><span class="header-section-number">6.5.2.3.1</span> Makefile</h5>
+<pre><code>CC=gcc
+CFLAGS=-g3 -Wall -ansi -pedantic
+
+all:
+
+test: test-stack
+ ./test-stack
+ @echo OK!
+
+test-stack: test-stack.o tester.o stack.o
+ $(CC) $(CFLAGS) -o $@ $^
+
+test-stack.o: stack.h tester.h
+stack.o: stack.h</code></pre>
+<p>Note that we <em>don't</em> provide a convenient shortcut for building <code>test-stack</code> without running it. That's because we want to run the test code every single time.</p>
+<h3 id="Stub_implementation"><span class="header-section-number">6.5.3</span> Stub implementation</h3>
+<p>Of course, we still can't compile anything, because we don't have any
+ implementation. Let's fix that. To make it easy to write, we will try
+to add as little as possible to what we already have in <code>stack.h</code>:</p>
+<h4 id="stack.c"><span class="header-section-number">6.5.3.1</span> stack.c</h4>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt; </span>
+<span class="ot">#include "stack.h"</span>
+
+<span class="kw">struct</span> stack { <span class="dt">int</span> dummy; };
+Stack stack_create(<span class="dt">void</span>) { <span class="kw">return</span> malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> stack)); }
+<span class="dt">void</span> stack_destroy(Stack s) { free(s); }
+<span class="dt">void</span> stack_push(Stack s, <span class="dt">int</span> elem) { ; }
+<span class="dt">int</span> stack_pop(Stack s) { <span class="kw">return</span> STACK_EMPTY; }
+<span class="dt">int</span> stack_isempty(Stack s) { <span class="kw">return</span> <span class="dv">1</span>; }</code></pre></div>
+<p>Will this work? Of course not. There's hardly any code! But maybe it will compile if we run <code>make&nbsp;test</code>:</p>
+<pre><code>$ make test
+gcc -g3 -Wall -ansi -pedantic -c -o test-stack.o test-stack.c
+gcc -g3 -Wall -ansi -pedantic -c -o tester.o tester.c
+gcc -g3 -Wall -ansi -pedantic -c -o stack.o stack.c
+gcc -g3 -Wall -ansi -pedantic -o test-stack test-stack.o tester.o stack.o
+./test-stack
+test-stack.c:32: TEST FAILED: stack_isempty(s) -&gt; 1 but expected 0
+test-stack.c:33: TEST FAILED: stack_pop(s) -&gt; -1 but expected 3
+test-stack.c:34: TEST FAILED: stack_isempty(s) -&gt; 1 but expected 0
+test-stack.c:35: TEST FAILED: stack_pop(s) -&gt; -1 but expected 2
+test-stack.c:36: TEST FAILED: stack_isempty(s) -&gt; 1 but expected 0
+test-stack.c:37: TEST FAILED: stack_pop(s) -&gt; -1 but expected 1
+test-stack.c:45: TEST FAILED: stack_isempty(s) -&gt; 1 but expected 0
+test-stack.c:46: TEST FAILED: stack_pop(s) -&gt; -1 but expected 4
+test-stack.c:60: wanted 957 but didn't get it
+test-stack.c:74: Aborted (signal 6)
+./test-stack: errors 8/17, signals 1, FAILs 1
+make[1]: *** [test] Error 8</code></pre>
+<p>Hooray! It compiles on the first try! (Well, not really, but let's
+pretend it did.) Unfortunately, it only passes any tests at all by pure
+dumb luck. But now we just need to get the code to pass a few more
+tests.</p>
+<h3 id="Bounded-space_implementation"><span class="header-section-number">6.5.4</span> Bounded-space implementation</h3>
+<p>Here's a first attempt at a stack that suffers from some artificial
+limits. We retain the structure of the original broken implementation,
+we just put a few more lines of code in and format it more expansively.</p>
+<h4 id="stack.c-1"><span class="header-section-number">6.5.4.1</span> stack.c</h4>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt; </span>
+<span class="ot">#include "stack.h"</span>
+
+<span class="ot">#define MAX_STACK_SIZE (100)</span>
+
+<span class="kw">struct</span> stack {
+ <span class="dt">int</span> top;
+ <span class="dt">int</span> data[MAX_STACK_SIZE];
+};
+
+Stack
+stack_create(<span class="dt">void</span>)
+{
+ <span class="kw">struct</span> stack *s;
+
+ s = malloc(<span class="kw">sizeof</span>(*s));
+ s-&gt;top = <span class="dv">0</span>;
+ <span class="kw">return</span> s;
+}
+
+<span class="dt">void</span>
+stack_destroy(Stack s)
+{
+ free(s);
+}
+
+<span class="dt">void</span>
+stack_push(Stack s, <span class="dt">int</span> elem)
+{
+ s-&gt;data[(s-&gt;top)++] = elem;
+}
+
+<span class="dt">int</span>
+stack_pop(Stack s)
+{
+ <span class="kw">return</span> s-&gt;data[--(s-&gt;top)];
+}
+
+<span class="dt">int</span>
+stack_isempty(Stack s)
+{
+ <span class="kw">return</span> s-&gt;top == <span class="dv">0</span>;
+}</code></pre></div>
+<p>Let's see what happens now:</p>
+<pre><code>$ make test
+gcc -g3 -Wall -ansi -pedantic -c -o test-stack.o test-stack.c
+gcc -g3 -Wall -ansi -pedantic -c -o tester.o tester.c
+gcc -g3 -Wall -ansi -pedantic -c -o stack.o stack.c
+gcc -g3 -Wall -ansi -pedantic -o test-stack test-stack.o tester.o stack.o
+./test-stack
+test-stack.c:40: TEST FAILED: stack_isempty(s) -&gt; 0 but expected 1
+test-stack.c:41: TEST FAILED: stack_pop(s) -&gt; 409 but expected -1
+test-stack.c:47: TEST FAILED: stack_isempty(s) -&gt; 0 but expected 1
+test-stack.c:48: TEST FAILED: stack_pop(s) -&gt; 0 but expected -1
+test-stack.c:49: TEST FAILED: stack_isempty(s) -&gt; 0 but expected 1
+test-stack.c:74: Segmentation fault (signal 11)
+test-stack.c:76: TEST FAILED: stack_isempty(s) -&gt; 0 but expected 1
+free(): invalid pointer 0x804b830!
+./test-stack: errors 6/17, signals 1, FAILs 0
+make[1]: *** [test] Error 6</code></pre>
+<p>There are still errors, but we get past several initial tests before things blow up. Looking back at the line numbers in <code>test-stack.c</code>, we see that the first failed test is the one that checks if the stack is empty after we pop from an empty stack. The code for <code>stack_isempty</code> looks pretty clean, so what happened? Somewhere <code>s-&gt;top</code> got set to a nonzero value, and the only place this can happen is inside <code>stack_pop</code>. Aha! There's no check in <code>stack_pop</code> for an empty stack, so it's decrementing <code>s-&gt;top</code> past 0. (Exercise: why didn't the test of <code>stack_pop</code> fail?)</p>
+<h3 id="First_fix"><span class="header-section-number">6.5.5</span> First fix</h3>
+<p>If we're lucky, fixing this problem will make the later tests happier. Let's try a new version of <code>stack_pop</code>. We'll leave everything else the same.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">int</span>
+stack_pop(Stack s)
+{
+ <span class="kw">if</span>(stack_isempty(s)) {
+ <span class="kw">return</span> STACK_EMPTY;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> s-&gt;data[--(s-&gt;top)];
+ }</code></pre></div>
+<p>}</p>
+<p>And now we get:</p>
+<pre><code>$ make test
+gcc -g3 -Wall -ansi -pedantic -c -o test-stack.o test-stack.c
+gcc -g3 -Wall -ansi -pedantic -c -o tester.o tester.c
+gcc -g3 -Wall -ansi -pedantic -c -o stack.o stack.c
+gcc -g3 -Wall -ansi -pedantic -o test-stack test-stack.o tester.o stack.o
+./test-stack
+test-stack.c:74: Segmentation fault (signal 11)
+test-stack.c:76: TEST FAILED: stack_isempty(s) -&gt; 0 but expected 1
+./test-stack: errors 1/17, signals 1, FAILs 0
+make[1]: *** [test] Error 1</code></pre>
+<p>Which is much nicer. We are still failing the stress test, but that's not terribly surprising.</p>
+<h3 id="Final_version"><span class="header-section-number">6.5.6</span> Final version</h3>
+<p>After some more tinkering, this is what I ended up with. This version
+ uses a malloc'd data field, and realloc's it when the stack gets too
+big.</p>
+<h4 id="stack.c-2"><span class="header-section-number">6.5.6.1</span> stack.c</h4>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt; </span>
+<span class="ot">#include "stack.h"</span>
+
+<span class="kw">struct</span> stack {
+ <span class="dt">int</span> top; <span class="co">/* first unused slot in data */</span>
+ <span class="dt">int</span> size; <span class="co">/* number of slots in data */</span>
+ <span class="dt">int</span> *data; <span class="co">/* stack contents */</span>
+};
+
+<span class="ot">#define INITIAL_STACK_SIZE (1)</span>
+<span class="ot">#define STACK_SIZE_MULTIPLIER (2)</span>
+
+Stack
+stack_create(<span class="dt">void</span>)
+{
+ <span class="kw">struct</span> stack *s;
+
+ s = malloc(<span class="kw">sizeof</span>(*s));
+ <span class="kw">if</span>(s == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ s-&gt;top = <span class="dv">0</span>;
+ s-&gt;size = INITIAL_STACK_SIZE;
+ s-&gt;data = malloc(s-&gt;size * <span class="kw">sizeof</span>(*(s-&gt;data)));
+ <span class="kw">if</span>(s-&gt;data == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ <span class="co">/* else everything is ok */</span>
+ <span class="kw">return</span> s;
+}
+
+<span class="dt">void</span>
+stack_destroy(Stack s)
+{
+ free(s-&gt;data);
+ free(s);
+}
+
+<span class="dt">void</span>
+stack_push(Stack s, <span class="dt">int</span> elem)
+{
+ <span class="kw">if</span>(s-&gt;top == s-&gt;size) {
+ <span class="co">/* need more space */</span>
+ s-&gt;size *= STACK_SIZE_MULTIPLIER;
+ s-&gt;data = realloc(s-&gt;data, s-&gt;size * <span class="kw">sizeof</span>(*(s-&gt;data)));
+ <span class="kw">if</span>(s-&gt;data == <span class="dv">0</span>) {
+ abort(); <span class="co">/* we have no other way to signal failure :-( */</span>
+ }
+ }
+ <span class="co">/* now there is enough room */</span>
+ s-&gt;data[s-&gt;top++] = elem;
+}
+
+<span class="dt">int</span>
+stack_pop(Stack s)
+{
+ <span class="kw">if</span>(stack_isempty(s)) {
+ <span class="kw">return</span> STACK_EMPTY;
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> s-&gt;data[--(s-&gt;top)];
+ }
+}
+
+<span class="dt">int</span>
+stack_isempty(Stack s)
+{
+ <span class="kw">return</span> s-&gt;top == <span class="dv">0</span>;
+}</code></pre></div>
+<p>At last we have a version that passes all tests:</p>
+<pre><code>$ make test
+gcc -g3 -Wall -ansi -pedantic -c -o test-stack.o test-stack.c
+gcc -g3 -Wall -ansi -pedantic -c -o tester.o tester.c
+gcc -g3 -Wall -ansi -pedantic -c -o stack.o stack.c
+gcc -g3 -Wall -ansi -pedantic -o test-stack test-stack.o tester.o stack.o
+./test-stack
+OK!</code></pre>
+<h3 id="Moral"><span class="header-section-number">6.5.7</span> Moral</h3>
+<p>Writing a big program all at once is hard. If you can break the
+problem down into little problems, it becomes easier. "Test first" is a
+strategy not just for getting a well-tested program, but for giving you
+something easy to do at each step--- it's usually not too hard to write
+one more test, and it's usually not too hard to get just one test
+working. If you can keep taking those small, easy steps, eventually you
+will run out of failed tests and have a working program.</p>
+<h3 id="Appendix:_Test_macros"><span class="header-section-number">6.5.8</span> Appendix: Test macros</h3>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Test macros.</span>
+<span class="co"> * </span>
+<span class="co"> * Usage:</span>
+<span class="co"> *</span>
+<span class="co"> * #include &lt;setjmp.h&gt;</span>
+<span class="co"> * #include &lt;stdio.h&gt;</span>
+<span class="co"> * #include &lt;signal.h&gt;</span>
+<span class="co"> * #include &lt;unistd.h&gt;</span>
+<span class="co"> *</span>
+<span class="co"> * testerInit(); -- Initialize internal data structures.</span>
+<span class="co"> * testerReport(FILE *, "name"); -- Print report.</span>
+<span class="co"> * testerResult(); -- Returns # of failed tests.</span>
+<span class="co"> *</span>
+<span class="co"> * TRY { code } ENDTRY;</span>
+<span class="co"> *</span>
+<span class="co"> * Wraps code to catch seg faults, illegal instructions, etc. May not be</span>
+<span class="co"> * nested.</span>
+<span class="co"> * Prints a warning if a signal is caught.</span>
+<span class="co"> * To enforce a maximum time, set alarm before entering.</span>
+<span class="co"> *</span>
+<span class="co"> * TEST(expr, expected_value);</span>
+<span class="co"> *</span>
+<span class="co"> * Evaluates expr (which should yield an integer value) inside a TRY.</span>
+<span class="co"> * Prints a warning if evaluating expr causes a fault or returns a value</span>
+<span class="co"> * not equal to expected_value.</span>
+<span class="co"> *</span>
+<span class="co"> * TEST_ASSERT(expr)</span>
+<span class="co"> *</span>
+<span class="co"> * Equivalent to TEST(!(expr), 0)</span>
+<span class="co"> *</span>
+<span class="co"> * You can also cause your own failures with FAIL:</span>
+<span class="co"> *</span>
+<span class="co"> * TRY {</span>
+<span class="co"> * x = 1;</span>
+<span class="co"> * if(x == 2) FAIL("why is x 2?");</span>
+<span class="co"> * } ENDTRY;</span>
+<span class="co"> *</span>
+<span class="co"> * To limit the time taken by a test, call tester_set_time_limit with</span>
+<span class="co"> * a new limit in seconds, e.g.</span>
+<span class="co"> *</span>
+<span class="co"> * tester_set_time_limit(1);</span>
+<span class="co"> * TRY { while(1); } ENDTRY;</span>
+<span class="co"> *</span>
+<span class="co"> * There is an initial default limit of 10 seconds.</span>
+<span class="co"> * If you don't want any limit, set the limit to 0.</span>
+<span class="co"> *</span>
+<span class="co"> */</span>
+
+<span class="co">/* global data used by macros */</span>
+<span class="co">/* nothing in here should be modified directly */</span>
+<span class="kw">extern</span> <span class="kw">struct</span> tester_global_data {
+ jmp_buf escape_hatch; <span class="co">/* jump here on surprise signals */</span>
+ <span class="dt">int</span> escape_hatch_active; <span class="co">/* true if escape hatch is usable */</span>
+ <span class="dt">int</span> tests; <span class="co">/* number of tests performed */</span>
+ <span class="dt">int</span> errors; <span class="co">/* number of tests failed */</span>
+ <span class="dt">int</span> signals; <span class="co">/* number of signals caught */</span>
+ <span class="dt">int</span> expr_value; <span class="co">/* expression value */</span>
+ <span class="dt">int</span> setjmp_return; <span class="co">/* return value from setjmp */</span>
+ <span class="dt">int</span> try_failed; <span class="co">/* true if last try failed */</span>
+ <span class="dt">int</span> user_fails; <span class="co">/* number of calls to FAIL */</span>
+ <span class="dt">int</span> time_limit; <span class="co">/* time limit for TRY */</span>
+} TesterData;
+
+<span class="co">/* set up system; call this before using macros */</span>
+<span class="dt">void</span> testerInit(<span class="dt">void</span>);
+
+<span class="co">/* prints a summary report of all errors to f, prefixed with preamble */</span>
+<span class="co">/* If there were no errors, nothing is printed */</span>
+<span class="dt">void</span> testerReport(FILE *f, <span class="dt">const</span> <span class="dt">char</span> *preamble);
+
+<span class="co">/* returns number of errors so far. */</span>
+<span class="dt">int</span> testerResult(<span class="dt">void</span>);
+
+<span class="co">/* set a time limit t for TRY, TEST, TEST_ASSERT etc. */</span>
+<span class="co">/* After t seconds, an ALARM signal will interrupt the test. */</span>
+<span class="co">/* Set t = 0 to have no time limit. */</span>
+<span class="co">/* Default time limit is 10 seconds. */</span>
+<span class="dt">void</span> tester_set_time_limit(<span class="dt">int</span> t);
+
+<span class="dt">const</span> <span class="dt">char</span> *testerStrsignal(<span class="dt">int</span>); <span class="co">/* internal hack; don't use this */</span>
+
+<span class="co">/* gruesome non-syntactic macros */</span>
+<span class="ot">#define TRY \</span>
+<span class="ot"> TesterData.try_failed = 0; \</span>
+<span class="ot"> alarm(TesterData.time_limit); \</span>
+<span class="ot"> if(((TesterData.setjmp_return = setjmp(TesterData.escape_hatch)) == 0) \</span>
+<span class="ot"> &amp;&amp; (TesterData.escape_hatch_active = 1) /* one = is correct*/)</span>
+<span class="ot">#define ENDTRY else { \</span>
+<span class="ot"> fprintf(stderr, "%s:%d: %s (signal %d)\n", \</span>
+<span class="ot"> __FILE__, __LINE__, \</span>
+<span class="ot"> testerStrsignal(TesterData.setjmp_return), \</span>
+<span class="ot"> TesterData.setjmp_return); \</span>
+<span class="ot"> TesterData.signals++; \</span>
+<span class="ot"> TesterData.try_failed = 1; \</span>
+<span class="ot"> } \</span>
+<span class="ot"> alarm(0); \</span>
+<span class="ot"> TesterData.escape_hatch_active = 0</span>
+
+<span class="co">/* another atrocity */</span>
+<span class="ot">#define TEST(expr, expected_value) \</span>
+<span class="ot"> TesterData.tests++; \</span>
+<span class="ot"> TesterData.errors++; /* guilty until proven innocent */ \</span>
+<span class="ot"> TRY { TesterData.expr_value = (expr); \</span>
+<span class="ot"> if(TesterData.expr_value != expected_value) { \</span>
+<span class="ot"> fprintf(stderr, "%s:%d: TEST FAILED: %s -&gt; %d but expected %d\n", \</span>
+<span class="ot"> __FILE__, __LINE__, __STRING(expr), \</span>
+<span class="ot"> TesterData.expr_value, expected_value); \</span>
+<span class="ot"> } else { \</span>
+<span class="ot"> TesterData.errors--; \</span>
+<span class="ot"> } \</span>
+<span class="ot"> } \</span>
+<span class="ot"> ENDTRY; \</span>
+<span class="ot"> if(TesterData.try_failed) \</span>
+<span class="ot"> fprintf(stderr, "%s:%d: TEST FAILED: %s caught signal\n", \</span>
+<span class="ot"> __FILE__, __LINE__, __STRING(expr))</span>
+
+<span class="ot">#define TEST_ASSERT(expr) TEST((expr) != 0, 1)</span>
+<span class="ot">#define FAIL(msg) \</span>
+<span class="ot"> (fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, (msg)), \</span>
+<span class="ot"> TesterData.user_fails++, \</span>
+<span class="ot"> TesterData.try_failed = 1)</span></code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/testHarness/tester.h" class="uri">examples/testHarness/tester.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define _GNU_SOURCE </span><span class="co">/* get strsignal def */</span>
+
+<span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;signal.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+<span class="ot">#include &lt;setjmp.h&gt;</span>
+
+<span class="ot">#include "tester.h"</span>
+
+<span class="kw">struct</span> tester_global_data TesterData;
+
+<span class="dt">const</span> <span class="dt">char</span> *
+testerStrsignal(<span class="dt">int</span> sig)
+{
+ <span class="kw">return</span> strsignal(sig);
+}
+
+<span class="dt">static</span> <span class="dt">void</span>
+tester_sighandler(<span class="dt">int</span> signal)
+{
+ <span class="kw">if</span>(TesterData.escape_hatch_active) {
+ TesterData.escape_hatch_active = <span class="dv">0</span>;
+ longjmp(TesterData.escape_hatch, signal);
+ }
+}
+
+<span class="dt">void</span>
+testerInit(<span class="dt">void</span>)
+{
+ TesterData.escape_hatch_active = <span class="dv">0</span>;
+ TesterData.tests = <span class="dv">0</span>;
+ TesterData.errors = <span class="dv">0</span>;
+ TesterData.signals = <span class="dv">0</span>;
+ TesterData.user_fails = <span class="dv">0</span>;
+
+ signal(SIGSEGV, tester_sighandler);
+ signal(SIGILL, tester_sighandler);
+ signal(SIGFPE, tester_sighandler);
+ signal(SIGALRM, tester_sighandler);
+ signal(SIGBUS, tester_sighandler);
+ signal(SIGABRT, tester_sighandler);
+}
+
+<span class="dt">void</span>
+testerReport(FILE *f, <span class="dt">const</span> <span class="dt">char</span> *preamble)
+{
+ <span class="kw">if</span>(TesterData.errors != <span class="dv">0</span> || TesterData.signals != <span class="dv">0</span>) {
+ fprintf(f, <span class="st">"%s: errors %d/%d, signals %d, FAILs %d</span><span class="ch">\n</span><span class="st">"</span>,
+ preamble,
+ TesterData.errors,
+ TesterData.tests,
+ TesterData.signals,
+ TesterData.user_fails);
+ }
+}
+
+<span class="dt">int</span>
+testerResult(<span class="dt">void</span>)
+{
+ <span class="kw">return</span> TesterData.errors;
+}
+
+<span class="dt">void</span>
+tester_set_time_limit(<span class="dt">int</span> t)
+{
+ TesterData.time_limit = t;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/testHarness/tester.c" class="uri">examples/testHarness/tester.c</a>
+</div>
+<h2 id="algorithmDesignTechniques"><span class="header-section-number">6.6</span> Algorithm design techniques</h2>
+<h3 id="Basic_principles_of_algorithm_design"><span class="header-section-number">6.6.1</span> Basic principles of algorithm design</h3>
+<p>The fundamental principle of algorithm design was best expressed by the mathematician <a href="http://en.wikipedia.org/wiki/George_Polya" title="WikiPedia">George Polya</a>:
+ "If there is a problem you can't solve, then there is an easier problem
+ you can solve: find it." For computers, the situation is even better:
+if there is any technique to make a problem easier even by a tiny bit,
+then you can repeat the technique—possibly millions or even billions of
+times—until the problem becomes trivial.</p>
+<p>For example, suppose we want to find the maximum element of an array of <span class="math inline"><em>n</em></span>
+ ints, but we are as dumb as bricks, so it doesn't occur to us to
+iterate through the array keeping track of the largest value seen so
+far. We might instead be able to solve the problem by observing that the
+ maximum element is either (a) the last element, or (b) the maximum of
+the first <span class="math inline"><em>n</em> − 1</span> elements,
+depending on which is bigger. Figuring out (b) is an easier version of
+the original problem, so we are pretty much done once we've realized we
+can split the problem in this way. Here's the code:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* returns maximum of the n elements in a */</span>
+<span class="dt">int</span>
+max_element(<span class="dt">int</span> a[], <span class="dt">int</span> n)
+{
+ <span class="dt">int</span> prefix_max;
+
+ assert(n &gt; <span class="dv">0</span>);
+
+ <span class="kw">if</span>(n == <span class="dv">1</span>) {
+ <span class="kw">return</span> a[<span class="dv">0</span>];
+ } <span class="kw">else</span> {
+ prefix_max = max_element(a, n<span class="dv">-1</span>);
+ <span class="kw">if</span>(prefix_max &lt; a[n<span class="dv">-1</span>]) {
+ <span class="kw">return</span> a[n<span class="dv">-1</span>];
+ } <span class="kw">else</span> {
+ <span class="kw">return</span> prefix_max;
+ }
+ }
+}</code></pre></div>
+<p>Note that we need a special case for a 1-element array, because the
+empty prefix of such an array has no maximum element. We also <code class="backtick">assert</code> that the array contains at least one element, just to avoid mischief.</p>
+<p>One problem with this algorithm (at least when coding in C) is that
+the recursion may get very deep. Fortunately, there is a straightforward
+ way to convert the recursion to a loop. The idea is that instead of
+returning a value from the recursive call, we put it in a variable that
+gets used in the next pass through the loop. The result is</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* returns maximum of the n elements in a */</span>
+<span class="dt">int</span>
+max_element(<span class="dt">int</span> a[], <span class="dt">int</span> n)
+{
+ <span class="dt">int</span> i; <span class="co">/* this replaces n-1 from the recursive version */</span>
+ <span class="dt">int</span> prefix_max;
+
+ assert(n &gt; <span class="dv">0</span>);
+
+ prefix_max = a[<span class="dv">0</span>]; <span class="co">/* this is the i == 0 case */</span>
+
+ <span class="kw">for</span>(i = <span class="dv">1</span>; i &lt; n; i++) {
+ <span class="kw">if</span>(prefix_max &lt; a[i]) {
+ prefix_max = a[i]; <span class="co">/* was return a[n-1] */</span>
+ }
+ <span class="co">/* else case becomes prefix_max = prefix_max, a noop */</span>
+ }
+
+ <span class="co">/* at the end we have to return a value for real */</span>
+ <span class="kw">return</span> prefix_max;
+}</code></pre></div>
+<h3 id="algorithmDesignTechniquesClassification"><span class="header-section-number">6.6.2</span> Specific techniques</h3>
+<p>Algorithm design often requires both creativity and problem-specific
+knowledge, but there are certain common techniques that appear over and
+over again. The following classification is adapted from Anany Levitin, <em>Introduction to the Design &amp; Analysis of Algorithms</em>, Addison-Wesley, 2003.</p>
+<dl>
+<dt>Brute force</dt>
+<dd>Try all possible solutions until you find the right one.
+</dd>
+<dt>Divide and conquer</dt>
+<dd>Split the problem into two or more subproblems, solve the subproblems recursively, and then combine the solutions.
+</dd>
+<dt>Decrease and conquer</dt>
+<dd>Reduce the problem to a single smaller problem, solve that problem
+recursively, and then use that solution to solve the original problem.
+</dd>
+<dt>Transform and conquer</dt>
+<dd>Either (a) transform the input to a form that makes the problem easy
+ to solve, or (b) transform the input into the input to another problem
+whose solution solves the original problem.
+</dd>
+<dt>Use space</dt>
+<dd>Solve the problem using some auxiliary data structure.
+</dd>
+<dt>Dynamic programming</dt>
+<dd>Construct a table of solutions for increasingly large subproblems,
+where each new entry in the table is computed using previous entries in
+the table.
+</dd>
+<dt>Greedy method</dt>
+<dd>Run through your problem one step at a time, keeping track of the
+single best solution at each step. Hope sincerely that this will not
+lead you to make a seemingly-good choice early with bad consequences
+later.
+</dd>
+</dl>
+<p>Some of these approaches work better than others—it is the role of
+algorithm analysis (and experiments with real computers) to figure out
+which are likely to be both correct and efficient in practice. But
+having all of them in your toolbox lets you try different possibilities
+for a given problem.</p>
+<h3 id="Example:_Finding_the_maximum"><span class="header-section-number">6.6.3</span> Example: Finding the maximum</h3>
+<p>Though this classification is not completely well-defined, and is a
+bit arbitrary for some algorithms, it does provide a useful list of
+things to try in solving a problem. Here are some examples of applying
+the different approaches to a simple problem, the problem of finding the
+ maximum of an array of integers.</p>
+<dl>
+<dt>Brute force</dt>
+<dd>For index <span class="math inline"><em>i</em></span>, test if <span class="math inline"><em>A</em>[<em>i</em>]</span> is greater than or equal to every element in the array. When you find such an <span class="math inline"><em>A</em>[<em>i</em>]</span>, return it. For this algorithm, <span class="math inline"><em>T</em>(<em>n</em>)=<em>n</em> ⋅ <em>Θ</em>(<em>n</em>)=<em>Θ</em>(<em>n</em><sup>2</sup>)</span> if implemented in the most natural way.
+</dd>
+<dt>Divide and conquer</dt>
+<dd>If <span class="math inline"><em>A</em></span> has only one element, return it. Otherwise, let <span class="math inline"><em>m</em><sub>1</sub></span> be the maximum of <span class="math inline"><em>A</em>[1]…<em>A</em>[<em>n</em>/2]</span>, and let <span class="math inline"><em>m</em><sub>2</sub></span> be the maximum of <span class="math inline"><em>A</em>[<em>n</em>/2 + 1]…<em>A</em>[<em>n</em>]</span>. Return the larger of <span class="math inline"><em>m</em><sub>1</sub></span> and <span class="math inline"><em>m</em><sub>2</sub></span>. The running time is given by $T(n) = <span class="math inline">2<em>T</em>(<em>n</em>/2)+<em>Θ</em>(1)=<em>Θ</em>(<em>n</em>)</span>.
+</dd>
+<dt>Decrease and conquer</dt>
+<dd>If <span class="math inline"><em>A</em></span> has only one element, return it. Otherwise, let <span class="math inline"><em>m</em></span>* be the maximum of <span class="math inline"><em>A</em>[2]…<em>A</em>[<em>n</em>]</span>. Return the larger of <span class="math inline"><em>A</em>[0]</span> and <span class="math inline"><em>m</em></span>. Now the running time is given by <span class="math inline">$T(n) = T(n-1) + \Theta(1) = \sum_{i=1}^{n} \Theta(1) = \Theta(n)$</span>.
+</dd>
+<dt>Transform and conquer</dt>
+<dd>Sort the array, then return <span class="math inline"><em>A</em>[<em>n</em>]</span>. Using an optimal comparison-based sort, this takes <span class="math inline"><em>Θ</em>(<em>n</em>log<em>n</em>)+<em>Θ</em>(1)=<em>Θ</em>(<em>n</em>log<em>n</em>)</span>
+ time. The advantage of this approach is that you probably don't have to
+ code up the sorting routine yourself, since most libraries include
+sorting.
+</dd>
+<dt>Use space</dt>
+<dd>Insert all elements into a balanced binary search tree, then return the rightmost element. Cost is <span class="math inline"><em>Θ</em>(<em>n</em>log<em>n</em>)</span> to do <span class="math inline"><em>n</em></span> insertions, plus <span class="math inline"><em>Θ</em>(log<em>n</em>)</span> to find the rightmost element, for a total of <span class="math inline"><em>Θ</em>(<em>n</em>log<em>n</em>)</span>. Sorting is equivalent and probably easier.
+</dd>
+<dt>Dynamic programming</dt>
+<dd>Create an auxiliary array <span class="math inline"><em>B</em></span> with indices <span class="math inline">1</span> to <span class="math inline"><em>n</em></span>. Set <span class="math inline"><em>B</em>[1]=<em>A</em>[1]</span>. As <span class="math inline"><em>i</em></span> goes from <span class="math inline">2</span> to <span class="math inline"><em>n</em></span>, set <span class="math inline"><em>B</em>[<em>i</em>]</span> to the larger of <span class="math inline"><em>B</em>[<em>i</em> − 1]</span> and <span class="math inline"><em>A</em>[<em>i</em>]</span>, so that <span class="math inline"><em>B</em>[<em>i</em>]</span> is always the maximum among <span class="math inline"><em>A</em>[1]…<em>A</em>[<em>i</em>]</span>. Return <span class="math inline"><em>B</em>[<em>n</em>]</span>. Cost: <span class="math inline"><em>Θ</em>(<em>n</em>)</span>. As is often the case, one can reduce the space to <span class="math inline"><em>O</em>(1)</span> by throwing away parts of <span class="math inline"><em>B</em></span> that we aren't going to look at again.
+</dd>
+<dt>Greedy method</dt>
+<dd>Let <span class="math inline"><em>m</em> = <em>A</em>[1]</span>. For each element <span class="math inline"><em>A</em>[<em>i</em>]</span> in <span class="math inline"><em>A</em>[2…<em>n</em>]</span>, if <span class="math inline"><em>A</em>[<em>i</em>]&gt;<em>m</em></span>, set <span class="math inline"><em>m</em></span> to <span class="math inline"><em>A</em>[<em>i</em>]</span>. Return the final value of <span class="math inline"><em>m</em></span>. Cost: <span class="math inline"><em>Θ</em>(<em>n</em>)</span>. This algorithm is pretty much identical to the previous one.
+</dd>
+</dl>
+<h3 id="algorithmDesignSorting"><span class="header-section-number">6.6.4</span> Example: Sorting</h3>
+<p>The sorting problem asks, given as input an array <span class="math inline"><em>A</em></span> of <span class="math inline"><em>n</em></span> elements in arbitrary order, to produce as output an array containing the same <span class="math inline"><em>n</em></span> elements in nondecreasing order, i.e. with <span class="math inline"><em>A</em>[<em>i</em>]≤<em>A</em>[<em>i</em> + 1]</span> for all <span class="math inline"><em>i</em></span>. We can apply each of the techniques above to this problem and get a sorting algorithm (though some are not very good).</p>
+<dl>
+<dt>Brute force</dt>
+<dd>For each of the <span class="math inline"><em>n</em>!</span> permutations of the input, test if it is sorted by checking <span class="math inline"><em>A</em>[<em>i</em>]≤<em>A</em>[<em>i</em> + 1]</span> for all <span class="math inline"><em>i</em></span>. Cost if implemented naively: <span class="math inline"><em>n</em>!⋅<em>Θ</em>(<em>n</em>)=<em>Θ</em>((<em>n</em> + 1)!)</span>. This algorithm is known as <strong>deterministic monkeysort</strong> or <strong>deterministic bogosort</strong>. It also has a randomized variant, where the careful generation of all <span class="math inline"><em>n</em>!</span>
+ permutations is replaced by shuffling. The randomized variant is easier
+ to code and runs at about the same speed as the deterministic variant,
+but does not guarantee termination if the shuffling is consistently
+unlucky.
+</dd>
+<dt>Divide and conquer</dt>
+<dd>Sort <span class="math inline"><em>A</em>[1…⌊<em>n</em>/2⌋</span> and <span class="math inline"><em>A</em>[⌊<em>n</em>/2 + 1⌋…<em>n</em>]</span> separately, then merge the results (which takes <span class="math inline"><em>Θ</em>(<em>n</em>)</span> time and <span class="math inline"><em>Θ</em>(<em>n</em>)</span> additional space if implemented in the most straightforward way). Cost: <span class="math inline"><em>T</em>(<em>n</em>)=2<em>T</em>(<em>n</em>/2)+<em>Θ</em>(<em>n</em>)=<em>Θ</em>(<em>n</em>log<em>n</em>)</span> by the Master Theorem. This method gives <a href="#mergesort">mergesort</a>,
+ one of the fastest general-purpose sorting algorithms. The merge can be
+ avoided by carefully splitting the array into elements less than and
+elements greater than some pivot, then sorting the two resulting piles;
+this gives <a href="#quicksort">quicksort</a>. The performance of <a href="#quicksort">quicksort</a> is often faster than <a href="#mergesort">mergesort</a>
+ in practice, but its worst-case performance (when the pivot is chosen
+badly) is just as bad as the result of insertion sort, which we will
+look at next.
+</dd>
+<dt>Decrease and conquer</dt>
+<dd>Remove <span class="math inline"><em>A</em>[<em>n</em>]</span>, sort the remainder, then insert <span class="math inline"><em>A</em>[<em>n</em>]</span> in the appropriate place. This algorithm is called <strong>insertion sort</strong>.
+ The final insertion step requires finding the right place (which can be
+ done fairly quickly if one is clever) but then moving up to <span class="math inline"><em>n</em> − 1</span> elements to make room for <span class="math inline"><em>A</em>[<em>n</em>]</span>. Total cost is given by <span class="math inline"><em>T</em>(<em>n</em>)=<em>T</em>(<em>n</em> − 1)+<em>Θ</em>(<em>n</em>)=<em>T</em>(<em>n</em><sup>2</sup>)</span>.
+</dd>
+<dt>Transform and conquer</dt>
+<dd>I'm not aware of any good general transform-and-conquer approach to
+sorting (there are some bad ones), but in some cases one can transform
+seemingly general sorting problem (e.g. sorting strings) into
+specialized sorting problems that permit faster solutions (e.g. sorting
+small integers).
+</dd>
+<dt>Use space</dt>
+<dd>Insert the elements into a balanced binary search tree, then read
+them out from left to right. Another version: insert them into a heap.
+Both take <span class="math inline"><em>Θ</em>(<em>n</em>log<em>n</em>)</span> time, but are more complicated to implement than <a href="#mergesort">mergesort</a> or <a href="#quicksort">quicksort</a> unless you have binary search tree or heap code lying around already.
+</dd>
+<dt>Dynamic programming</dt>
+<dd>Insertion sort may be seen as an example of this.
+</dd>
+<dt>Greedy method</dt>
+<dd>Find the smallest element, mark it as used, and output it. Repeat until no elements are left. The result is <a href="http://en.wikipedia.org/wiki/Selection_Sort">selection sort</a>), which runs in a respectable but suboptimal <span class="math inline"><em>Θ</em>(<em>n</em><sup>2</sup>)</span> time.
+</dd>
+</dl>
+<h2 id="bitManipulation"><span class="header-section-number">6.7</span> Bit manipulation</h2>
+<p>Sometimes it is convenient to consider a block of <code class="backtick">char</code>s as really being a block of bits. This requires using C's bit operators to get at individual bits.</p>
+<p>Here are some simple macros for extracting a particular bit from a <code class="backtick">char</code> array, thought of as a large vector of bits. These assume that the bytes are stored in <strong>little-endian</strong> order, which means that the least significant bytes come first (see <a href="http://en.wikipedia.org/wiki/Endianness" title="WikiPedia">Endianness</a>). This may produce odd results if you feed them a <code class="backtick">char&nbsp;*</code> that has been converted from a larger integer type.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define BITS_PER_BYTE (8)</span>
+
+<span class="co">/* extract the n-th bit of x */</span>
+<span class="ot">#define GET_BIT(x, n) ((((x)[(n) / BITS_PER_BYTE]) &amp; (0x1 &lt;&lt; ((n) % BITS_PER_BYTE))) != 0)</span>
+
+<span class="co">/* set the n-th bit of x to 1 */</span>
+<span class="ot">#define SET_BIT(x, n) ((x)[(n) / BITS_PER_BYTE]) |= (0x1 &lt;&lt; ((n) % BITS_PER_BYTE))</span>
+
+<span class="co">/* set the n-th bit of x to 0 */</span>
+<span class="ot">#define RESET_BIT(x, n) ((x)[(n) / BITS_PER_BYTE]) &amp;= ~(0x1 &lt;&lt; ((n) % BITS_PER_BYTE))</span></code></pre></div>
+<p>If you want to get multiple bits, use the right-shift operator to
+shift them over to the right end of the word and then mask with bitwise
+AND. For example:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define BITS_PER_BYTE (8)</span>
+
+<span class="co">/* this rather nasty expression constructs an all-ones byte */</span>
+<span class="ot">#define BYTE_MASK ((1 &lt;&lt; BITS_PER_BYTE) - 1)</span>
+
+<span class="co">/* extract the n-th byte from a word */</span>
+<span class="ot">#define GET_BYTE(x, n) (((x) &gt;&gt; BITS_PER_BYTE * (n)) &amp; BYTE_MASK)</span>
+
+<span class="co">/* extract n bits starting at position i from x */</span>
+<span class="ot">#define GET_BITS(x, i, j) (((x) &gt;&gt; (i)) &amp; ((1 &lt;&lt; n) - 1))</span>
+
+<span class="co">/* another definition of GET_BIT */</span>
+<span class="ot">#define GET_BIT2(x, n) GET_BITS(x, n, 1)</span></code></pre></div>
+<p>Many much more sophisticated techniques for doing bit-fiddling can be found at <a href="http://www.jjj.de/bitwizardry/bitwizardrypage.html" class="uri">http://www.jjj.de/bitwizardry/bitwizardrypage.html</a>.</p>
+<h2 id="persistence"><span class="header-section-number">6.8</span> Persistence</h2>
+<p>When a C program exits, all of its global variables, local variables,
+ and heap-allocated blocks are lost. Its memory is reclaimed by the
+operating system, erased, and handed out to other programs. So what
+happens if you want to keep data around for later?</p>
+<p>To make this problem concrete, let's suppose we want to keep track of
+ a hit counter for web pages. From time to time, the user will run the
+command <code class="backtick">count_hit&nbsp;number</code> where <code class="backtick">number</code>
+ is an integer value in the range 0 to 99, say. (A real application
+would probably be using urls, but let's keep things as simple as
+possible.) We want <code class="backtick">count_hit</code> to print the number of times the page with the given number has been hit, i.e. <code class="backtick">1</code> the first time it is called, <code class="backtick">2</code> the next time, etc. Where can we store the counts so that they will survive to the next execution of <code class="backtick">count_hit</code>?</p>
+<h3 id="A_simple_solution_using_text_files"><span class="header-section-number">6.8.1</span> A simple solution using text files</h3>
+<p>The simplest solution is probably to store the data in a text file. Here's a program that reads a file <code class="backtick">hits</code>, increments the appropriate value, and the writes out a new version. To reduce the chances that data is lost (say if <code class="backtick">count_hit</code> blows up halfway through writing the file), the new values are written to a new file <code class="backtick">hit~</code>, which is then renamed to <code class="backtick">hit</code>, taking the place of the previous version.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#define NUM_COUNTERS (100) </span><span class="co">/* number of counters we keep track of */</span>
+<span class="ot">#define COUNTER_FILE "/tmp/hit" </span><span class="co">/* where they are stored */</span>
+<span class="ot">#define NEW_COUNTER_FILE COUNTER_FILE "~" </span><span class="co">/* note use of constant string concatenation */</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> c;
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> counts[NUM_COUNTERS];
+ FILE *f;
+
+ <span class="kw">if</span>(argc &lt; <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s number</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ exit(<span class="dv">1</span>);
+ }
+ <span class="co">/* else */</span>
+
+ c = atoi(argv[<span class="dv">1</span>]);
+ <span class="kw">if</span>(c &lt; <span class="dv">0</span> || c &gt;= NUM_COUNTERS) {
+ fprintf(stderr, <span class="st">"Counter %d not in range 0..%d</span><span class="ch">\n</span><span class="st">"</span>, c, NUM_COUNTERS - <span class="dv">1</span>);
+ exit(<span class="dv">2</span>);
+ }
+
+ f = fopen(COUNTER_FILE, <span class="st">"r"</span>);
+ <span class="kw">if</span>(f == <span class="dv">0</span>) {
+ perror(COUNTER_FILE);
+ exit(<span class="dv">3</span>);
+ }
+
+ <span class="co">/* read them in */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; NUM_COUNTERS; i++) {
+ fscanf(f, <span class="st">"%d"</span>, &amp;counts[i]);
+ }
+ fclose(f);
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, ++counts[c]);
+
+ <span class="co">/* write them back */</span>
+ f = fopen(NEW_COUNTER_FILE, <span class="st">"w"</span>);
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; NUM_COUNTERS; i++) {
+ fprintf(f, <span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, counts[i]);
+ }
+ fclose(f);
+
+ rename(NEW_COUNTER_FILE, COUNTER_FILE);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/persistence/textFile.c" class="uri">examples/persistence/textFile.c</a>
+</div>
+<p>If you want to use this, you will need to create an initial file <code class="backtick">/tmp/hit</code> with <code class="backtick">NUM_COUNTERS</code> zeroes in it.</p>
+<p>Using a simple text file like this is the easiest way to keep data
+around, since you can look at the file with a text editor or other tools
+ if you want to do things to it. But it means that the program has to
+parse the file every time it runs. We can speed things up a little bit
+(and simplify the code) by storing the values in binary.</p>
+<h3 id="Using_a_binary_file"><span class="header-section-number">6.8.2</span> Using a binary file</h3>
+<p>Here's a version that stores the data as a binary file of exactly <code class="backtick">sizeof(int)&nbsp;*&nbsp;NUM_COUNTERS</code> bytes. It uses the <code class="backtick">stdio</code> routines <code class="backtick">fread</code> and <code class="backtick">fwrite</code>
+ to read and write the file. These are much faster than the loops in the
+ previous program, since they can just slap the bytes directly into <code class="backtick">counts</code> without processing them at all.</p>
+<p>The program also supplies and extra flag <code class="backtick">b</code> to <code class="backtick">fopen</code>.
+ This is ignored on Unix-like machines but is needed on Windows machines
+ to tell the operating system that the file contains binary data (such
+files are stored differently from text files on Windows).</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#define NUM_COUNTERS (100) </span><span class="co">/* number of counters we keep track of */</span>
+<span class="ot">#define COUNTER_FILE "/tmp/hit" </span><span class="co">/* where they are stored */</span>
+<span class="ot">#define NEW_COUNTER_FILE COUNTER_FILE "~" </span><span class="co">/* note use of constant string concatenation */</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> c;
+ <span class="dt">int</span> counts[NUM_COUNTERS];
+ FILE *f;
+
+ <span class="kw">if</span>(argc &lt; <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s number</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ exit(<span class="dv">1</span>);
+ }
+ <span class="co">/* else */</span>
+
+ c = atoi(argv[<span class="dv">1</span>]);
+ <span class="kw">if</span>(c &lt; <span class="dv">0</span> || c &gt;= NUM_COUNTERS) {
+ fprintf(stderr, <span class="st">"Counter %d not in range 0..%d</span><span class="ch">\n</span><span class="st">"</span>, c, NUM_COUNTERS - <span class="dv">1</span>);
+ exit(<span class="dv">2</span>);
+ }
+
+ f = fopen(COUNTER_FILE, <span class="st">"rb"</span>);
+ <span class="kw">if</span>(f == <span class="dv">0</span>) {
+ perror(COUNTER_FILE);
+ exit(<span class="dv">3</span>);
+ }
+
+ <span class="co">/* read them in */</span>
+ fread(counts, <span class="kw">sizeof</span>(*counts), NUM_COUNTERS, f);
+ fclose(f);
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, ++counts[c]);
+
+ <span class="co">/* write them back */</span>
+ f = fopen(NEW_COUNTER_FILE, <span class="st">"wb"</span>);
+ fwrite(counts, <span class="kw">sizeof</span>(*counts), NUM_COUNTERS, f);
+ fclose(f);
+
+ rename(NEW_COUNTER_FILE, COUNTER_FILE);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/persistence/binaryFile.c" class="uri">examples/persistence/binaryFile.c</a>
+</div>
+<p>Again, you'll have to initialize <code class="backtick">/tmp/hit</code>
+ to use this; in this case, you want it to contain exactly 400 null
+characters. On a Linux machine you can do this with the command <code class="backtick">dd&nbsp;if=/dev/zero&nbsp;of=/tmp/hit&nbsp;bs=400&nbsp;count=1</code>.</p>
+<p>The advantage of using binary files is that reading and writing them
+is both simpler and faster. The disadvantages are (a) you can't look at
+or update the binary data with your favorite text editor any more, and
+(b) the file may no longer be portable from one machine to another, if
+the different machines have different endianness or different values of <code class="backtick">sizeof(int)</code>.
+ The second problem we can deal with by converting the data to a
+standard word size and byte order before storing it, but then we lose
+some advantages of speed.</p>
+<h3 id="A_version_that_updates_the_file_in_place"><span class="header-section-number">6.8.3</span> A version that updates the file in place</h3>
+<p>We still may run into speed problems if <code class="backtick">NUM_COUNTERS</code> is huge. The next program avoids rewriting the entire file just to update one value inside it. This program uses the <code class="backtick">fseek</code> function to position the cursor inside the file. It opens the file using the <code class="backtick">"r+b"</code> flag to <code class="backtick">fopen</code>, which means to open an existing binary file for reading and writing.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+
+<span class="ot">#define NUM_COUNTERS (100) </span><span class="co">/* number of counters we keep track of */</span>
+<span class="ot">#define COUNTER_FILE "/tmp/hit" </span><span class="co">/* where they are stored */</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> c;
+ <span class="dt">int</span> count;
+ FILE *f;
+
+ <span class="kw">if</span>(argc &lt; <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s number</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ exit(<span class="dv">1</span>);
+ }
+ <span class="co">/* else */</span>
+
+ c = atoi(argv[<span class="dv">1</span>]);
+ <span class="kw">if</span>(c &lt; <span class="dv">0</span> || c &gt;= NUM_COUNTERS) {
+ fprintf(stderr, <span class="st">"Counter %d not in range 0..%d</span><span class="ch">\n</span><span class="st">"</span>, c, NUM_COUNTERS - <span class="dv">1</span>);
+ exit(<span class="dv">2</span>);
+ }
+
+ f = fopen(COUNTER_FILE, <span class="st">"r+b"</span>);
+ <span class="kw">if</span>(f == <span class="dv">0</span>) {
+ perror(COUNTER_FILE);
+ exit(<span class="dv">3</span>);
+ }
+
+ <span class="co">/* read counter */</span>
+ fseek(f, <span class="kw">sizeof</span>(<span class="dt">int</span>) * c, SEEK_SET);
+ fread(&amp;count, <span class="kw">sizeof</span>(<span class="dt">int</span>), <span class="dv">1</span>, f);
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, ++count);
+
+ <span class="co">/* write it back */</span>
+ fseek(f, <span class="kw">sizeof</span>(<span class="dt">int</span>) * c, SEEK_SET);
+ fwrite(&amp;count, <span class="kw">sizeof</span>(<span class="dt">int</span>), <span class="dv">1</span>, f);
+ fclose(f);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/persistence/binaryFileFseek.c" class="uri">examples/persistence/binaryFileFseek.c</a>
+</div>
+<p>Note that this program is not only shorter than the last one, but it also avoids allocating the <code class="backtick">counts</code>
+ array. It also is less likely to run into trouble with running out of
+space during writing. If we ignore issues of concurrency, this is the
+best we can probably do with just <code class="backtick">stdio</code>.</p>
+<h3 id="An_even_better_version_using_mmap"><span class="header-section-number">6.8.4</span> An even better version using mmap</h3>
+<p>We can do even better using the <code class="backtick">mmap</code> routine, available in all POSIX-compliant C libraries. <a href="http://en.wikipedia.org/wiki/POSIX" title="WikiPedia">POSIX</a>, which is short for <em>Portable Standard Unix</em>, is supported by essentially all Unix-like operating systems and NT-based versions of Microsoft Windows. The <code class="backtick">mmap</code>
+ routine tells the operating system to "map" a file in the filesystem to
+ a region in the process's address space. Reading bytes from this region
+ will read from the file; writing bytes to this region will write to the
+ file (although perhaps not immediately). Even better, if more than one
+process calls <code class="backtick">mmap</code> on the same file at
+once, they will share the memory region, so that updates made by one
+process will be seen immediately by the others (with some caveats having
+ to do with how concurrent access to memory actually works on real
+machines).</p>
+<p>Here is the program using <code class="backtick">mmap</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;sys/types.h&gt;</span>
+<span class="ot">#include &lt;sys/stat.h&gt;</span>
+<span class="ot">#include &lt;fcntl.h&gt;</span>
+<span class="ot">#include &lt;sys/mman.h&gt; </span><span class="co">/* For mmap. I think mman is short for "memory management." */</span>
+
+<span class="ot">#define NUM_COUNTERS (100) </span><span class="co">/* number of counters we keep track of */</span>
+<span class="ot">#define COUNTER_FILE "/tmp/hit" </span><span class="co">/* where they are stored */</span>
+<span class="ot">#define NEW_COUNTER_FILE COUNTER_FILE "~" </span><span class="co">/* note use of constant string concatenation */</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> c;
+ <span class="dt">int</span> *counts;
+ <span class="dt">int</span> fd;
+
+ <span class="kw">if</span>(argc &lt; <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s number</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ exit(<span class="dv">1</span>);
+ }
+ <span class="co">/* else */</span>
+
+ c = atoi(argv[<span class="dv">1</span>]);
+ <span class="kw">if</span>(c &lt; <span class="dv">0</span> || c &gt;= NUM_COUNTERS) {
+ fprintf(stderr, <span class="st">"Counter %d not in range 0..%d</span><span class="ch">\n</span><span class="st">"</span>, c, NUM_COUNTERS - <span class="dv">1</span>);
+ exit(<span class="dv">2</span>);
+ }
+
+ <span class="co">/* open and map the file */</span>
+ fd = open(COUNTER_FILE, O_RDWR);
+ <span class="kw">if</span>(fd &lt; <span class="dv">0</span>) {
+ perror(COUNTER_FILE);
+ exit(<span class="dv">3</span>);
+ }
+ counts = mmap(<span class="dv">0</span>, <span class="kw">sizeof</span>(*counts) * NUM_COUNTERS, PROT_READ|PROT_WRITE, MAP_SHARED, fd, <span class="dv">0</span>);
+
+ <span class="kw">if</span>(counts == <span class="dv">0</span>) {
+ perror(COUNTER_FILE);
+ exit(<span class="dv">4</span>);
+ }
+
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, ++counts[c]);
+
+ <span class="co">/* unmap the region and close the file just to be safe */</span>
+ munmap(counts, <span class="kw">sizeof</span>(*counts) * NUM_COUNTERS);
+ close(fd);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/persistence/binaryFileMmap.c" class="uri">examples/persistence/binaryFileMmap.c</a>
+</div>
+<p>Now the code for actually incrementing <code class="backtick">counts[c]</code> and writing it to the file is trivial. Unfortunately, we have left <code class="backtick">stdio</code> behind, and have to deal with low-level POSIX calls like <code class="backtick">open</code> and <code class="backtick">close</code>
+ to get at the file. Still, this may be the most efficient version we
+can do, and becomes even better if we plan to do many updates to the
+same file, since we can just keep the file open.</p>
+<h3 id="Concurrency_and_fault-tolerance_issues:_ACIDity"><span class="header-section-number">6.8.5</span> Concurrency and fault-tolerance issues: ACIDity</h3>
+<p>All of the solutions described so far can fail if you run two copies of <code class="backtick">count_hits</code> simultaneously. The <code class="backtick">mmap</code>
+ solution is probably the least vulnerable to failures, as the worst
+that can happen is that some update is lost if the same locations is
+updated at exactly the same time. The other solutions can fail more
+spectacularly; simultaneous writes to <code class="backtick">/tmp/hit~</code>
+ in the simple text file version, for example, can produce a wide
+variety of forms of file corruption. For a simple web page hit counter,
+this may not be a problem. If you are writing a back-end for a bank, you
+ probably want something less vulnerable.</p>
+<p>Database writers aim for a property called <strong>ACIDity</strong> from the acronym <strong>ACID</strong> = <strong>Atomicity</strong>, <strong>Consistency</strong>, <strong>Isolation</strong>, and <strong>Durability</strong>. These are defined for a system in which the database is accessed via <strong>transactions</strong> consisting of one or more operations. An example of a transaction might be <code class="backtick">++counts[c]</code>, which we can think of as consisting of two operations: reading <code class="backtick">counts[c]</code>, and writing back <code class="backtick">counts[c]+1</code>.</p>
+<p><em>Atomicity</em> means that either every operation in a transaction
+ is performed or none is. In practice, this means if the transaction
+fails any partial progress must be undone.</p>
+<p><em>Consistency</em> means that at the end of a transaction the
+database is in a "consistent" state. This may just mean that no data has
+ been corrupted (e.g. in the text data file we have exactly 100 lines
+and they're all integer counts), or it may also extend to integrity
+constraints enforce by the database (e.g. in a database of airline
+flights, the fact that flight 2937 lands at HVN at 22:34 on 12/17
+implies that flight 2937 exists, has an assigned pilot, etc.).</p>
+<p><em>Isolation</em> says that two concurrent transactions can't detect
+ each other; the partial progress of one transaction is not visible to
+others until the transaction commits.</p>
+<p><em>Durability</em> means that the results of any committed
+transaction are permanent. In practice this means there is enough
+information physically written to a disk to reconstruct the transaction
+before the transaction finished.</p>
+<p>How can we enforce these requirements for our hit counter? Atomicity
+is not hard: if I stop a transaction after a read but before the write,
+no one will be the wiser (although there is a possible problem if only
+half of my write succeeds). Consistency is enforced by the <code class="backtick">fseek</code> and <code class="backtick">mmap</code>
+ solutions, since they can't change the structure of the file. Isolation
+ is not provided by any of our solutions, and would require some sort of
+ locking (e.g. using <code class="backtick">flock</code>) to make sure that only one program uses the file at a time. Durability is enforced by not having <code class="backtick">count_hits</code> return until the <code class="backtick">fclose</code> or <code class="backtick">close</code> operation has succeeded (although full durability would require running <code class="backtick">fsync</code> or <code class="backtick">msync</code> to actually guarantee data was written to disk).</p>
+<p>Though it would be possible to provide full ACIDity with enough work,
+ this is a situation where using an existing well-debugged tool beats
+writing our own. Depending on what we are allowed to do to the machine
+our program is running on, we have many options for getting much better
+handling of concurrency. Some standard tools we could use are:</p>
+<ul>
+<li><a href="http://www.gnu.org/software/gdbm/gdbm.html">gdbm</a>. This
+is a minimal hash-table-on-disk library that uses simplistic locking to
+get isolation. The advantage of this system is that it's probably
+already installed on any Linux machine. The disadvantage is that it
+doesn't provide much functionality beyond basic transactions.</li>
+<li><a href="http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html">Berkeley DB</a>
+ is a fancier hash-table-on-disk library that provides full ACIDity but
+not much else. There is a good chance that some version of this is also
+installed by default on any Linux or BSD machine you run into.</li>
+<li>Various toy databases like <a href="http://www.sqlite.org/">SQLite</a> or <a href="http://www.mysql.com/">MySQL</a>
+ provide tools that look very much like serious databases with easy
+installation and little overhead. These are probably the solutions most
+people choose, especially since MySQL is integrated tightly with PHP and
+ other Web-based scription languages. Such a solution also allows other
+programs to access the table without having to know a lot of details
+about how it is stored, because the <a href="http://en.wikipedia.org/wiki/SQL" title="WikiPedia">SQL</a> query language hides the underlying storage format.</li>
+<li>Production-quality databases like <a href="http://www.postgresql.org/">PostgreSQL</a>, <a href="http://www.microsoft.com/en-us/server-cloud/products/sql-server/">SQL Server</a>, or <a href="http://www.oracle.com/index.html">Oracle</a>
+ provide very high levels of robustness and concurrency at the cost of
+requiring non-trivial management and possibly large licensing fees. This
+ is what you pick if you really are running a bank.</li>
+</ul>
+<h1 id="whatNext"><span class="header-section-number">7</span> What next?</h1>
+<p>Congratulations! You now know everything there is to know about programming in C. Now what do you do?</p>
+<p>My recommendation would be the following: learn C++, since you know
+75% of it already, and you will be able to escape from some (but not
+all) of the annoying limitations of C. And learn a scripting language
+you can be comfortable with, for writing programs quickly where
+performance isn't the main requirement.</p>
+<h2 id="What.27s_wrong_with_C"><span class="header-section-number">7.1</span> What's wrong with C</h2>
+<p>In this class, we had to work around fundamental limitations in C on several occasions.</p>
+<dl>
+<dt>C doesn't have a garbage collector</dt>
+<dd>Many modern program languages will detect and free unreachable data
+for you automatically. C doesn't, so the programmer has to spend a lot
+of time worrying about when and by whom data allocated with <code class="backtick">malloc</code> will be passed to <code class="backtick">free</code>.
+ Not only does this create many possibilities for error, but it also
+means that certain kinds of data structures in which a single component
+of the data structure is pointed to by an unpredictable number of other
+components are difficult to write in C, since it's hard to tell when it
+is safe to free a component. Garbage-collected languages avoid all of
+these problems at a slight cost in performance. Though there exists a
+garbage collector for C/C++ <a href="http://www.hboehm.info/gc/" class="uri">http://www.hboehm.info/gc/</a>, it isn't 100% portable and may not work as well as a built-in collector.
+</dd>
+<dt>C doesn't support any kind of polymorphism</dt>
+<dd>Polymorphism is when a function can work on more than one data type. The closest C can do is either parameterized macros (see <a href="#macros">Macros</a>Macros.html)), heavy use of <code class="backtick">void&nbsp;*</code> and function pointers as in <code class="backtick">qsort</code>, or various nasty hacks where code is automatically generated with type names filled in from a base template (see <a href="#macros">Macros</a>Macros.html)
+ again). Most modern programming languages have some sort of support for
+ polymorphism, allowing you to write, for example, a generic sorting
+routine without resorting to <code class="backtick">void&nbsp;*</code>-like departures from the type system.
+</dd>
+<dt>C doesn't have exceptions</dt>
+<dd>Exceptions are a mechanism for doing non-standard returns from a
+function when something blows up, which get caught using an "exception
+handler" that is often separate from the code handling the normal return
+ values and which can often be used to catch exceptions from a variety
+of sources. Instead, C requires function writers to invent and document
+an ad-hoc protocol for indicating bad outcomes for every function they
+write, and requires function users to remember to test for bad return
+values. Most programmers are too lazy to do this all the time, leading
+to undetected run-time errors. Most modern programming languages fix
+this.
+</dd>
+<dt>C doesn't support object-oriented programming very well</dt>
+<dd>"Object-oriented" is a buzzword with many possible meanings (but see <a href="http://c2.com/cgi/wiki?HeInventedTheTerm" class="uri">http://c2.com/cgi/wiki?HeInventedTheTerm</a>).
+ However, at minimum it means that in addition to supporting
+polymorphism (described above), your language should support strong
+encapsulation (controlling who can get at the internal data of an
+object) and inheritance (allowing one abstract data type to be defined
+by extending another). You can fake most of these things in C if you try
+ hard enough (for example, using <a href="#functionPointers">function pointers</a>),
+ but it is always possible to muck around with internal bits of things
+just because of the unlimited control C gives you over the environment.
+This can quickly become dangerous in large software projects.
+</dd>
+<dt>C provides only limited support for avoiding namespace collisions</dt>
+<dd>In a large C program, it's impossible to guarantee that my <code class="backtick">eat_leftovers</code> function exported from <code class="backtick">leftovers.c</code> doesn't conflict with your <code class="backtick">eat_leftovers</code> function in <code class="backtick">cannibalism.c</code>. A mediocre solution is to use longer names: <code class="backtick">leftovers_eat_leftovers</code> vs <code class="backtick">cannibalism_eat_leftovers</code>, and one can also play games with function pointers and global <code class="backtick">struct</code> variables to allow something like <code class="backtick">leftovers.eat_leftovers</code> vs <code class="backtick">cannibalism.eat_leftovers</code>.
+ Most modern programming languages provide an explicit package or
+namespace mechanism to allow the programmer to control who sees what
+names where.
+</dd>
+</dl>
+<h2 id="What_C.2B-.2B-_fixes"><span class="header-section-number">7.2</span> What C++ fixes</h2>
+<p>On the above list, C++ fixes everything except the missing garbage
+collector. If you want to learn C++, you should get a copy of <em>The C++ Programming Language</em>, by Bjarne Stroustrup, which is the definitive reference manual. But you can get a taste of it from several on-line tutorials:</p>
+<ul>
+<li><a href="http://www.4p8.com/eric.brasseur/cppcen.html">C++ tutorial for C users</a>, by <a href="http://www.4p8.com/eric.brasseur/index.html">Eric Brasseur</a>. Exactly what it says. Introduces C++ features not found in C in order of increasing complexity.</li>
+<li>Some other on-line tutorials that assume little or no prior programming experience:
+<ul>
+<li><a href="http://www.cplusplus.com/doc/tutorial/" class="uri">http://www.cplusplus.com/doc/tutorial/</a></li>
+<li><a href="http://www.cprogramming.com/tutorial.html" class="uri">http://www.cprogramming.com/tutorial.html</a></li>
+</ul></li>
+</ul>
+<h2 id="other-c-like-languages"><span class="header-section-number">7.3</span> Other C-like languages</h2>
+<p>C syntax has become the default for new programming languages
+targeted at a general audience. Some noteworthy examples of C-like
+languages are <a href="http://www.oracle.com/technetwork/java/index.html">Java</a> (used in Android), <a href="https://developer.apple.com/library/mac/documentation/Cocoa/Conceptual/ProgrammingWithObjectiveC/Introduction/Introduction.html">Objective-C</a> (used in OSX and iOS), and <a href="http://www.ecma-international.org/publications/standards/Ecma-334.htm">C#</a> (used in Windows).</p>
+<p>Each of these fix some of the misfeatures of C (including the lack of
+ a garbage collector and bounds checks on arrays) while retaining much
+of the flavor of C. Which to choose probably depends on what platform
+you are interested in developing for.</p>
+<h2 id="Scripting_languages"><span class="header-section-number">7.4</span> Scripting languages</h2>
+<p>Much current programming is done in so-called <strong>scripting languages</strong> like <a href="https://www.python.org/">Python</a>, <a href="http://www.perl.org/">Perl</a>, <a href="http://php.net/">PHP</a>, <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript">JavaScript</a>, <a href="http://msdn.microsoft.com/en-us/library/2x7h1hfk.aspx">Visual Basic</a>, <a href="http://www.tcl.tk/">Tcl</a>,
+ etc. These are generally interpreted languages similar to Lisp or
+Scheme under the hood, with dynamic typing (type information is carried
+along with values, so type errors are detected only at runtime but
+polymorphism is provided automatically), garbage collectors, and support
+ for many advanced programming features like objects and anonymous
+functions. What distinguishes scripting languages from the Lisp-like
+languages is that the syntax is generally more accessible to newcomers
+and the language runtime usually comes with very large libraries
+providing built-in tools for doing practical programming tasks like
+parsing odd input formats and interfacing to databases and network
+services. The result is that common programming tasks can be implemented
+ using very few lines of code, at a cost in performance that ranges from
+ slight to horrendous depending on what you are doing.</p>
+<p>Let's look at an example in two common scripting languages, Perl and Python.</p>
+<p>Here are some solutions to an old assignment, which find all the palindromes on <code class="backtick">stdin</code> and report the first non-matching character for any non-palindrome.</p>
+<p>The original C version looks like this:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+
+<span class="co">/* Palindrome detector.</span>
+<span class="co"> *</span>
+<span class="co"> * For each line of the input, prints PALINDROME if it is a palindrome</span>
+<span class="co"> * or the index of the first non-matching character otherwise.</span>
+<span class="co"> *</span>
+<span class="co"> * Note: does not handle lines containing nulls.</span>
+<span class="co"> */</span>
+
+<span class="co">/* read a line of text from stdin</span>
+<span class="co"> * and return it (without terminating newline) as a freshly-malloc'd block.</span>
+<span class="co"> * Caller is responsible for freeing this block.</span>
+<span class="co"> * Returns 0 on error or EOF.</span>
+<span class="co"> */</span>
+<span class="dt">char</span> *
+getLine(<span class="dt">void</span>)
+{
+ <span class="dt">char</span> *line; <span class="co">/* line buffer */</span>
+ <span class="dt">int</span> n; <span class="co">/* characters read */</span>
+ <span class="dt">int</span> size; <span class="co">/* size of line buffer */</span>
+ <span class="dt">int</span> c;
+
+ size = <span class="dv">1</span>;
+ line = malloc(size);
+ <span class="kw">if</span>(line == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+
+ n = <span class="dv">0</span>;
+
+ <span class="kw">while</span>((c = getchar()) != <span class="ch">'\n'</span> &amp;&amp; c != EOF) {
+ <span class="kw">while</span>(n &gt;= size - <span class="dv">1</span>) {
+ size *= <span class="dv">2</span>;
+ line = realloc(line, size);
+ <span class="kw">if</span>(line == <span class="dv">0</span>) <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ line[n++] = c;
+ }
+
+ <span class="kw">if</span>(c == EOF &amp;&amp; n == <span class="dv">0</span>) {
+ <span class="co">/* got nothing */</span>
+ free(line);
+ <span class="kw">return</span> <span class="dv">0</span>;
+ } <span class="kw">else</span> {
+ line[n++] = '\<span class="dv">0</span>';
+ <span class="kw">return</span> line;
+ }
+}
+
+<span class="ot">#define IS_PALINDROME (-1)</span>
+
+<span class="co">/* returns IS_PALINDROME if s is a palindrome,</span>
+<span class="co"> * or index of first unmatched character otherwise. */</span>
+<span class="dt">int</span>
+testPalindrome(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="dt">int</span> n; <span class="co">/* length of s */</span>
+ <span class="dt">int</span> i;
+
+ n = strlen(s);
+
+ <span class="co">/* we only have to check up to floor(n/2) */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n/<span class="dv">2</span>; i++) {
+ <span class="kw">if</span>(s[i] != s[n<span class="dv">-1</span>-i]) {
+ <span class="kw">return</span> i;
+ }
+ }
+ <span class="co">/* else */</span>
+ <span class="kw">return</span> IS_PALINDROME;
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">char</span> *line;
+ <span class="dt">int</span> mismatch;
+
+ <span class="kw">while</span>((line = getLine()) != <span class="dv">0</span>) {
+ mismatch = testPalindrome(line);
+ <span class="kw">if</span>(mismatch == IS_PALINDROME) {
+ puts(<span class="st">"PALINDROME"</span>);
+ } <span class="kw">else</span> {
+ printf(<span class="st">"%d</span><span class="ch">\n</span><span class="st">"</span>, mismatch);
+ }
+
+ free(line);
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+
+ </code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/scripting/palindrome.c" class="uri">examples/scripting/palindrome.c</a>
+</div>
+<p>This version is written in Perl (<a href="http://www.perl.org/" class="uri">http://www.perl.org</a>):</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode perl"><code class="sourceCode perl"><span class="kw">#!/usr/bin/perl</span>
+
+<span class="co"># For each line in stdin, print PALINDROME if it is a palindrome, or index of</span>
+<span class="co"># the first non-matching character otherwise.</span>
+
+<span class="kw">while</span>(&lt;&gt;) {
+ <span class="fu">chomp</span>; <span class="co"># remove trailing newline</span>
+ <span class="kw">if</span>(<span class="dt">$_</span> <span class="kw">eq</span> <span class="fu">reverse</span> <span class="dt">$_</span>) {
+ <span class="fu">print</span> <span class="kw">"</span><span class="st">PALINDROME</span><span class="ch">\n</span><span class="kw">"</span>;
+ } <span class="kw">else</span> {
+ <span class="kw">for</span> <span class="dt">$i</span> (<span class="dv">0</span>..<span class="fu">length</span>(<span class="dt">$_</span>) - <span class="dv">1</span>) {
+ <span class="kw">if</span>(<span class="fu">substr</span>(<span class="dt">$_</span>, <span class="dt">$i</span>, <span class="dv">1</span>) <span class="kw">ne</span> <span class="fu">substr</span>(<span class="dt">$_</span>, <span class="fu">length</span>(<span class="dt">$_</span>) - <span class="dt">$i</span> - <span class="dv">1</span>, <span class="dv">1</span>)) {
+ <span class="fu">print</span> <span class="dt">$i</span>, <span class="kw">"</span><span class="ch">\n</span><span class="kw">"</span>;
+ <span class="kw">last</span>;
+ }
+ }
+ }
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/scripting/palindrome.pl" class="uri">examples/scripting/palindrome.pl</a>
+</div>
+<p>The things to notice about Perl is that the syntax is deliberately
+very close to C (with some idiosyncratic extensions like putting <code class="backtick">$</code>
+ on the front of all variable names), and that common tasks like reading
+ all input lines get hidden inside default constructions like <code class="backtick">while(&lt;&gt;)</code> and the <code class="backtick">$_</code> variable that functions with no arguments like <code class="backtick">chomp</code> operate on by default. This can allow for very compact but sometimes very incomprehensible code.</p>
+<p>Here's a version in Python (<a href="https://www.python.org/" class="uri">https://www.python.org/</a>):</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode python"><code class="sourceCode python"><span class="co">#!/usr/bin/python</span>
+
+<span class="co">"""For each line in stdin, print PALINDROME if it is a palindrome, or index of</span>
+<span class="co">the first non-matching character otherwise."""</span>
+
+<span class="im">import</span> sys
+
+<span class="cf">for</span> line <span class="op">in</span> sys.stdin:
+ line <span class="op">=</span> line.rstrip(<span class="st">'</span><span class="ch">\n</span><span class="st">'</span>) <span class="co"># remove trailing newline</span>
+ <span class="cf">if</span> line <span class="op">==</span> line[::<span class="op">-</span><span class="dv">1</span>]:
+ <span class="bu">print</span>(<span class="st">"PALINDROME"</span>)
+ <span class="cf">else</span>:
+ mismatches <span class="op">=</span> [ i <span class="cf">for</span> i <span class="op">in</span> <span class="bu">range</span>(<span class="bu">len</span>(line)) <span class="cf">if</span> line[i] <span class="op">!=</span> line[<span class="op">-</span>(i<span class="dv">+1</span>)] ]
+ <span class="bu">print</span>(<span class="bu">min</span>(mismatches))</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/scripting/palindrome.py" class="uri">examples/scripting/palindrome.py</a>
+</div>
+<p>Here the syntax is a little more alien if you are used to C: Python
+doesn't use curly braces for block structure, using indentation instead.
+ The code above uses some other odd features of the language, such as
+the ability to take "slices" of sequence variables like strings (the
+expression <code class="backtick">line[::-1]</code> means "take all elements of <code class="backtick">line</code>
+ starting from the obvious default starting point (the empty string
+before the first colon) to the obvious default ending point (the empty
+string before the second colon) stepping backwards one character at a
+time (the <code class="backtick">-1</code>)), a feature the language adopted from array-processing languages like <a href="http://www.mathworks.com/">MatLab</a>; and the ability to do <em>list comprehensions</em> (the large expression assigned to <code class="backtick">mismatches</code>), a feature that Python adopted from <a href="http://www.haskell.org/">Haskell</a> and that Haskell adopted from set theory.</p>
+<p>What these gain in short code length they lose in speed; run times on <code class="backtick">/usr/share/dict/words</code> in the Zoo are</p>
+<table style="width:33%;">
+<colgroup>
+<col width="15%">
+<col width="18%">
+</colgroup>
+<tbody>
+<tr class="odd">
+<td align="left"><p>C</p></td>
+<td align="left"><p>0.107s</p></td>
+</tr>
+<tr class="even">
+<td align="left"><p>Perl</p></td>
+<td align="left"><p>0.580s</p></td>
+</tr>
+<tr class="odd">
+<td align="left"><p>Python</p></td>
+<td align="left"><p>2.052s</p></td>
+</tr>
+</tbody>
+</table>
+<p>Note that for Perl and Python some of the cost is the time to start
+the interpreter and parse the script, but factors of 10–100 are not
+unusual slowdowns when moving from C to a scripting language. The
+selling point of these languages is that in many applications run time
+is not as critical as ease and speed of implementation.</p>
+<p>As an even shorter example, if you just want to print all the
+palindromes in a file, you can do that from the command line in one line
+ of Perl, e.g:</p>
+<pre><code>$ perl -ne 'chomp; print $_, "\n" if($_ eq reverse $_)' &lt; /usr/share/dict/words</code></pre>
+<h1 id="assignments"><span class="header-section-number">8</span> Assignments</h1>
+<h2 id="hw1"><span class="header-section-number">8.1</span> Assignment 1, due Thursday 2015-01-29, at 11:00pm</h2>
+<h3 id="bureaucratic-part"><span class="header-section-number">8.1.1</span> Bureaucratic part</h3>
+<p>Make sure that you sign up for an account on the Zoo at <a href="http://zoo.cs.yale.edu/accounts.html" class="uri">http://zoo.cs.yale.edu/accounts.html</a>.
+ If you already have an account, you still need to check the CPSC 223
+box so that you can turn in assignments. It's best to do this as soon as
+ possible.</p>
+<p>You do not need to develop your solution on the Zoo, but you will
+need to turn it in there, and it will be tested using the compiler on
+the Zoo.</p>
+<h3 id="a-rotten-cipher"><span class="header-section-number">8.1.2</span> A rotten cipher</h3>
+<p>For this assignment, you are to implement an encoder for a
+polyalphabetic substitution cipher vaguely inspired by the Enigma
+machines used by Germany during World War 2. Unlike the Enigma machine,
+this cipher doesn't provide much security, so you can probably tell your
+ non-US-national friends about it without violating US export control
+laws.<a href="#fn26" class="footnoteRef" id="fnref26"><sup>26</sup></a></p>
+<p>Each letter <code>'A'</code> through <code>'Z'</code> or <code>'a'</code> through <code>'z'</code> is encrypted by shifting it some number of positions forward in the alphabet, wrapping around at the end.</p>
+<p>The number of positions is determined by an offset that changes over
+time. The initial shift is 17 positions. After encoding an uppercase
+letter, the shift is increased by 5. After encoding a lowercase letter,
+the shift is increased by 3. To avoid overflow on long texts, it's
+probably a good idea to store the offset modulo 26.</p>
+<p>An uppercase letter is always encoded by an uppercase letter, and a
+lowercase letter is always encoded by a lowercase letter. All other
+characters are passed through intact.</p>
+<p>Below is an example of encoding a famous Napoleonic palindrome using this cipher:</p>
+<pre><code>Plaintext: A b l e w a s I e r e I s a w E l b a
+Offset: 17 22 25 2 5 8 11 14 19 22 25 2 7 10 13 16 21 24 1
+Ciphertext: R x k g b i d W x n d K z k j U g z b</code></pre>
+<h3 id="your-task"><span class="header-section-number">8.1.3</span> Your task</h3>
+<p>For this assignment, you are to write a program <code>encode.c</code> that takes a plaintext from <code>stdin</code>, encodes it using the above algorithm, and writes the result to <code>stdout</code>.</p>
+<p>For example, given the input</p>
+<pre><code>"Stop, thief!" cried Tom, arrestingly.
+
+"You'll never take me alive!" replied the criminal, funereally.</code></pre>
+<p>Your program should output</p>
+<pre><code>"Jpnr, yptsw!" woihj Ccd, uorhycucygw.
+
+"Zud'xa fztfv akxu fa znndp!" fvjiihj ctt umgnmuky, vnjdtjiwzp.</code></pre>
+<h3 id="hints"><span class="header-section-number">8.1.4</span> Hints</h3>
+<ul>
+<li>You should assume that you are using the standard Latin 26-letter alphabet.</li>
+<li>You may assume that the characters <code>'A'</code> through <code>'Z'</code> and <code>'a'</code> through <code>'z'</code> are represented using continuous ranges of integers, so that the expression <code>c - 'A'</code> gives the position of <code>c</code> in the alphabet, provided <code>c</code> is an uppercase character, and counting <code>A</code> as <span class="math inline">0</span>. This means that your program will not be portable to machines that use EBCDIC or some other exotic character representation.</li>
+<li>To test if a character is uppercase or lowercase, one option would be to put <code>#include &lt;ctype.h&gt;</code> in your program and use the <code>isupper</code> and <code>islower</code>
+ macros. Note that these may behave oddly if you have set a locale that
+uses a different alphabet. It may be safer to make your own tests.</li>
+</ul>
+<h3 id="testing-your-assignment"><span class="header-section-number">8.1.5</span> Testing your assignment</h3>
+<p>Sample inputs and outputs can be found in <code>/c/cs223/Hwk1/testFiles</code>
+ on the Zoo. Note that some of these files contain non-printing
+characters that may have odd effects if you send them to your screen.
+The safest way to test if your program produces the same output as the
+sample output is probably to use <code>cmp</code>, for example:</p>
+<pre><code>$ ./encode &lt; test.in &gt; tmp
+$ cmp tmp test.out</code></pre>
+<p>If <code>tmp</code> and <code>test.out</code> contain the same characters, <code>cmp</code> will say nothing. Otherwise it will tell you the first position where they differ.</p>
+<p>If you want to see what characters are in a binary file, trying using <code>od -t x1z</code>, as in</p>
+<pre><code>$ echo hi &gt; file
+$ cat file
+hi
+$ od -t x1z file
+0000000 68 69 0a &gt;hi.&lt;
+0000003</code></pre>
+<h3 id="submitting-your-assignment"><span class="header-section-number">8.1.6</span> Submitting your assignment</h3>
+<p>Submit your assignment using the command:</p>
+<pre><code>/c/cs223/bin/submit 1 encode.c</code></pre>
+<p>You can test that your program compiles (and passes a few basic tests) using the command:</p>
+<pre><code>/c/cs223/bin/testit 1 encode</code></pre>
+<p>This runs the test script in <code>/c/cs223/Hwk1/test.encode</code> on your submitted assignment. You can also run this script by hand to test the version of <code>encode.c</code> in your current working directory.</p>
+<p>The unsympathetic robo-grading script used to grade this assignment
+may or may not use the same tests as this command, so you should make
+sure your program works on other inputs as well. You may also want to
+look at the <a href="#programmingStyle">style grading checklist</a> to
+see that you haven't committed any gross atrocities against readability,
+ in case a human being should happen to look at your code.</p>
+<p>You can submit your assignment more than once, but any late penalties
+ will be assessed based on the last submission. For more details about
+the <code>submit</code> script and its capabilities, see <a href="#submitScript">here</a>.</p>
+<h3 id="hw1Solution"><span class="header-section-number">8.1.7</span> Sample solution</h3>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Encode text on stdin by alphabet rotation with shifting offset.</span>
+<span class="co"> *</span>
+<span class="co"> * Initially, each character 'A'..'Z' or 'a'..'z' is rotated 17 positions.</span>
+<span class="co"> *</span>
+<span class="co"> * After encoding an uppercase letter, the offset is increased by 5 (mod 26).</span>
+<span class="co"> *</span>
+<span class="co"> * After encoding a lowercase letter, the offset is increased by 3 (mod 26).</span>
+<span class="co"> *</span>
+<span class="co"> * These parameters are set using the INITIAL_OFFSET, UPPERCASE_STEP, and LOWERCASE_STEP</span>
+<span class="co"> * constants defined below.</span>
+<span class="co"> *</span>
+<span class="co"> */</span>
+<span class="ot">#include &lt;stdio.h&gt;</span>
+
+<span class="ot">#define INITIAL_OFFSET (17)</span>
+
+<span class="ot">#define UPPERCASE_STEP (5)</span>
+<span class="ot">#define LOWERCASE_STEP (3)</span>
+
+<span class="ot">#define MODULUS ('z' - 'a' + 1)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> offset = INITIAL_OFFSET;
+ <span class="dt">int</span> c;
+
+ <span class="kw">while</span>((c = getchar()) != EOF) {
+ <span class="kw">if</span>(('a' &lt;= c) &amp;&amp; (c &lt;= 'z')) {
+ putchar(((c - 'a') + offset) % MODULUS + 'a');
+ offset = (offset + LOWERCASE_STEP) % MODULUS;
+ } <span class="kw">else</span> <span class="kw">if</span>(('A' &lt;= c) &amp;&amp; (c &lt;= 'Z')) {
+ putchar(((c - 'A') + offset) % MODULUS + 'A');
+ offset = (offset + UPPERCASE_STEP) % MODULUS;
+ } <span class="kw">else</span> {
+ putchar(c);
+ }
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/1/encode.c" class="uri">examples/2015/hw/1/encode.c</a>
+</div>
+<h2 id="hw2"><span class="header-section-number">8.2</span> Assignment 2, due Wednesday 2015-02-04, at 11:00pm</h2>
+<h3 id="opening-a-safe"><span class="header-section-number">8.2.1</span> Opening a safe</h3>
+<p>The Hollywood Hackable Safe Company has announced a new line of
+electronically-controlled safes with a C API to permit maximum opening
+speed while still protecting your valuable loot. This interface is
+defined in the following file, which can also be found on the Zoo in the
+ directory <code>/c/cs223/Hwk2/sourceFiles</code>:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * API for safes made by the </span>
+<span class="co"> * Hollywood Hackable Safe Company LLC.</span>
+<span class="co"> */</span>
+
+<span class="kw">typedef</span> <span class="kw">struct</span> safe Safe; <span class="co">/* opaque data type for a safe */</span>
+
+<span class="co">/*</span>
+<span class="co"> * Returns the number of tumblers on a safe.</span>
+<span class="co"> * If this is n, the possible tumbler indices will be 0 through n-1.</span>
+<span class="co"> * */</span>
+<span class="dt">int</span> numTumblers(Safe *s);
+
+<span class="co">/* </span>
+<span class="co"> * Returns the number of positions of each tumbler.</span>
+<span class="co"> * If this is n, the possible tumbler positions will be 0 through n-1.</span>
+<span class="co"> */</span>
+<span class="dt">int</span> numPositions(Safe *s);
+
+<span class="co">/* Return codes for tryCombination */</span>
+<span class="ot">#define SAFE_BAD_COMBINATION (-1)</span>
+<span class="ot">#define SAFE_SELF_DESTRUCTED (-2)</span>
+
+<span class="co">/*</span>
+<span class="co"> * Try a combination.</span>
+<span class="co"> *</span>
+<span class="co"> * This should be an array of numTumbler(s) ints.</span>
+<span class="co"> *</span>
+<span class="co"> * Returns contents of safe (a non-negative int) if combination is correct</span>
+<span class="co"> * and safe has not yet self-destructed.</span>
+<span class="co"> *</span>
+<span class="co"> * Returns SAFE_BAD_COMBINATION if combination is incorrect</span>
+<span class="co"> * and safe has not yet self-destructed.</span>
+<span class="co"> *</span>
+<span class="co"> * Returns SAFE_SELF_DESTRUCTED if safe has self-destructed.</span>
+<span class="co"> *</span>
+<span class="co"> * Note: may modify combination.</span>
+<span class="co"> */</span>
+<span class="dt">int</span> tryCombination(Safe *s, <span class="dt">int</span> *combination);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/2/safe.h" class="uri">examples/2015/hw/2/safe.h</a>
+</div>
+<p>The noteworthy function in this API is <code>tryCombination</code>, which takes a pointer to a safe and an array of <code>int</code>s representing the combination, and returns either the contents of the safe (an <code>int</code>), the special code <code>SAFE_BAD_COMBINATION</code> if the combination is incorrect, or the special code <code>SAFE_SELF_DESTRUCTED</code> if the safe blew up after seeing too many bad combinations. Note that <code>tryCombination</code> does not declare its second argument to be <code>const</code>
+ and may not leave it intact. The additional functions allow you to
+obtain important information about the safe, like how many tumblers it
+has and what values these tumblers can be set to. The behavior of a safe
+ given a combination with the wrong number of values or values outside
+the permitted range is undefined.</p>
+<p>Your task is to write a function <code>openSafe</code> that will open
+ a safe, if possible, by trying all possible combinations. Note that if
+the safe self-destructs before you can try all the possibilities, this
+task may not in fact be possible. Your <code>openSafe</code> function should return <code>SAFE_SELF_DESTRUCTED</code> in this case. Your function should be defined in a file <code>openSafe.c</code> and should match the declaration in this file:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/* Include safe.h before this file to get the definition of Safe. */</span>
+
+<span class="co">/*</span>
+<span class="co"> * Open a safe and return the value returned by tryCombination,</span>
+<span class="co"> * or SAFE_SELF_DESTRUCTED if the safe self-destructed.</span>
+<span class="co"> */</span>
+<span class="dt">int</span> openSafe(Safe *s);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/2/openSafe.h" class="uri">examples/2015/hw/2/openSafe.h</a>
+</div>
+<p>It is recommended that you put the lines below in your <code>openSafe.c</code> file to ensure consistency with these declarations:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include "safe.h"</span>
+<span class="ot">#include "openSafe.h"</span></code></pre></div>
+<p>You may put additional functions in <code>openSafe.c</code> if that would be helpful. You should declare these <code>static</code> to avoid the possibility of namespace conflicts.</p>
+<p>In addition to <code>safe.h</code> and <code>openSafe.h</code>, <code>/c/cs223/Hwk2/sourceFiles</code> also contains a <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/2/main.c">main.c</a> file that can be compiled together with <code>openSafe.c</code>
+ to generate a program that can be called from the command line. This
+program generates a safe with a pseudorandom combination based on
+parameters specified on the command line, runs your <code>openSafe</code> routine on it, and prints the value that <code>openSafe</code> returns. You should not rely on your function being tested with this particular program.</p>
+<h3 id="submitting-your-assignment-1"><span class="header-section-number">8.2.2</span> Submitting your assignment</h3>
+<p>Submit your assignment as usual with</p>
+<pre><code>/c/cs223/bin/submit 2 openSafe.c</code></pre>
+<p>You do not need to submit any other files (and the test script will ignore them if you do).</p>
+<p>You can test that your program compiles and passes a few basic tests with the command</p>
+<pre><code>/c/cs223/bin/testit 2 openSafe</code></pre>
+<p>This runs the test script in <code>/c/cs223/Hwk2/test.openSafe</code> on your submitted assignment. You can also run this script by hand to test the version of <code>openSafe.c</code> in your current working directory.</p>
+<h3 id="hw2valgrind"><span class="header-section-number">8.2.3</span> Valgrind</h3>
+<p>You may need to allocate storage using <code>malloc</code> to complete this assignment. If you do so, you should make sure that you call <code>free</code> on any block you allocate inside your <code>openSafe</code> function before the function returns. The <code>test.openSafe</code> script attempts to detect storage leaks or other problems resulting from misuse of these routines by running your program with <a href="#valgrind">valgrind</a>. You can also use <code>valgrind</code> yourself to track down the source of errors, particularly if you remember to compile with debugging info turned on using the <code>-g3</code> option to <code>gcc</code>. The script <code>/c/cs223/bin/vg</code> gives a shortcut for running <code>valgrind</code> with some of the more useful options.</p>
+<h3 id="hw2Solution"><span class="header-section-number">8.2.4</span> Sample solution</h3>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#include "safe.h"</span>
+<span class="ot">#include "openSafe.h"</span>
+
+<span class="co">/* set combination to all zeros */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+zeroCombination(<span class="dt">int</span> n, <span class="dt">int</span> *combination)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ combination[i] = <span class="dv">0</span>;
+ }
+}
+
+<span class="co">/* non-destructive version of tryCombination */</span>
+<span class="dt">static</span> <span class="dt">int</span>
+nondestructiveTryCombination(Safe *s, <span class="dt">const</span> <span class="dt">int</span> *combination)
+{
+ <span class="dt">int</span> *copy; <span class="co">/* duplicate of combination */</span>
+ <span class="dt">int</span> result; <span class="co">/* result of tryCombination */</span>
+ <span class="dt">int</span> n; <span class="co">/* number of tumblers */</span>
+ <span class="dt">int</span> i;
+
+ n = numTumblers(s);
+
+ copy = (<span class="dt">int</span> *) malloc(<span class="kw">sizeof</span>(<span class="dt">int</span>) * n);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n; i++) {
+ copy[i] = combination[i];
+ }
+
+ result = tryCombination(s, copy);
+
+ free(copy);
+
+ <span class="kw">return</span> result;
+}
+
+<span class="co">/* update combination to next value */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+nextCombination(<span class="dt">int</span> n, <span class="dt">int</span> base, <span class="dt">int</span> *combination)
+{
+ <span class="dt">int</span> i;
+
+ <span class="co">/* we are essentially incrementing an n-digit number in given base */</span>
+ <span class="co">/* this means setting any digit that overflows to 0 and continuing */</span>
+ <span class="co">/* until we get a digit we can increment without carrying */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; n &amp;&amp; ++(combination[i]) &gt;= base; i++) {
+ combination[i] = <span class="dv">0</span>;
+ }
+}
+
+
+<span class="dt">int</span>
+openSafe(Safe *s)
+{
+ <span class="dt">int</span> *combination; <span class="co">/* counter for combinations */</span>
+ <span class="dt">int</span> n; <span class="co">/* number of tumblers */</span>
+ <span class="dt">int</span> base; <span class="co">/* number of positions */</span>
+ <span class="dt">int</span> result; <span class="co">/* result of tryCombination */</span>
+
+ <span class="co">/* allocate space */</span>
+ n = numTumblers(s);
+ base = numPositions(s);
+
+ combination = malloc(<span class="kw">sizeof</span>(<span class="dt">int</span>) * n);
+ assert(combination);
+
+ <span class="kw">for</span>(zeroCombination(n, combination);
+ (result = nondestructiveTryCombination(s, combination)) == SAFE_BAD_COMBINATION;
+ nextCombination(n, base, combination));
+
+ free(combination);
+ <span class="kw">return</span> result;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/2/openSafe.c" class="uri">examples/2015/hw/2/openSafe.c</a>
+</div>
+<h2 id="hw3"><span class="header-section-number">8.3</span> Assignment 3, due Wednesday 2015-02-11, at 11:00pm</h2>
+<h3 id="quadratic-letter-sequences"><span class="header-section-number">8.3.1</span> Quadratic letter sequences</h3>
+<p>Given a string <span class="math inline"><em>s</em></span>, an <strong>quadratic letter sequence</strong> for <span class="math inline"><em>s</em></span> is defined by giving non-negative integer coefficients <span class="math inline"><em>c</em><sub>0</sub>, <em>c</em><sub>1</sub>, <em>c</em><sub>2</sub></span>, where at least one of <span class="math inline"><em>c</em><sub>1</sub></span> and <span class="math inline"><em>c</em><sub>2</sub></span> is not zero, and computing the sequence of letters <span class="math inline"><em>s</em>[<em>c</em><sub>0</sub> + <em>c</em><sub>1</sub> ⋅ <em>i</em> + <em>c</em><sub>2</sub> ⋅ <em>i</em><sup>2</sup>]</span> for <span class="math inline"><em>i</em> = 0, 1, 2, …</span>.</p>
+<p>These can be used to hide secret message inside larger texts. For
+example, the famous Napoleonic palindrome "Able was I ere I saw Elba"
+hides the word "bIb" at positions 1, 9, and 23, which are generated by <span class="math inline"><em>c</em><sub>0</sub> = 1</span>, <span class="math inline"><em>c</em><sub>1</sub> = 5</span> and <span class="math inline"><em>c</em><sub>2</sub> = 3</span>:</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left"><span class="math inline"><em>i</em></span></th>
+<th align="left"><span class="math inline"><em>c</em><sub>0</sub> + <em>c</em><sub>1</sub><em>i</em> + <em>c</em><sub>2</sub><em>i</em><sup>2</sup></span></th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">0</td>
+<td align="left"><span class="math inline">1 = 1 + 5 ⋅ 0 + 3 ⋅ 0</span></td>
+</tr>
+<tr class="even">
+<td align="left">1</td>
+<td align="left"><span class="math inline">9 = 1 + 5 ⋅ 1 + 3 ⋅ 1<sup>2</sup></span></td>
+</tr>
+<tr class="odd">
+<td align="left">2</td>
+<td align="left"><span class="math inline">23 = 1 + 5 ⋅ 2 + 3 ⋅ 2<sup>2</sup></span></td>
+</tr>
+</tbody>
+</table>
+<p>Similarly, we can use quadratic letter sequences to reveal secret messages hidden in the <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/3/gangnam-style-excerpt.txt">lyrics of K-pop songs</a>:</p>
+<pre><code>$ ./qls hail satan &lt; gangnam-style-excerpt.txt
+470 3 5 hail
+14 10 30 satan
+14 56 7 satan</code></pre>
+<p>or even examine <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/3/tempest-act-one.txt">Act 1 of <em>The Tempest</em></a> to help resolve the <a href="http://en.wikipedia.org/wiki/Shakespeare_authorship_question">Shakespeare authorship question</a>:<a href="#fn27" class="footnoteRef" id="fnref27"><sup>27</sup></a></p>
+<pre><code>$ ./qls "Bacon" "de Vere" "Marlowe" "Stanley" "that Stratford dude" &lt; tempest-act-one.txt
+120 387 777 Bacon
+120 542 906 Bacon
+120 851 850 Bacon
+120 1592 726 Bacon
+120 1607 472 Bacon
+120 2461 95 Bacon
+120 2729 50 Bacon
+120 3225 215 Bacon
+120 3420 284 Bacon
+120 4223 330 Bacon
+120 4534 76 Bacon
+120 5803 29 Bacon
+143 46 161 Bacon
+143 268 727 Bacon
+143 684 1434 Bacon
+[... 280 more lines of Bacon omitted ...]
+19959 1178 87 Bacon
+5949 239 465 Marlowe</code></pre>
+<h3 id="your-task-1"><span class="header-section-number">8.3.2</span> Your task</h3>
+<p>Write a program <code>qls.c</code> that takes a text on <code>stdin</code> and searches for quadratic letter sequences that start with the strings given in <code>argv</code>. Your program should output all such quadratic letter sequences that it finds, using the format</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> printf(<span class="st">"%d %d %d %s</span><span class="ch">\n</span><span class="st">"</span>, [...]);</code></pre></div>
+<p>where <code>[...]</code> should be replaced by appropriate expressions to give <span class="math inline"><em>c</em><sub>0</sub></span>, <span class="math inline"><em>c</em><sub>1</sub></span>, <span class="math inline"><em>c</em><sub>2</sub></span>, and the string found.</p>
+<p>If a string appears more than once at the start of a quadratic letter
+ sequence, your program should print all occurrences. The order your
+output lines appear in is not important, since the test script sorts
+them into a canonical order. Do whatever is convenient.</p>
+<p>Your program should be reasonably efficient, but you do not need to
+get carried away looking for a sophisticated algorithm for this problem.
+ Simply testing all plausible combinations of coefficients should be
+enough.</p>
+<p>Because neither K-pop songs nor Elizabethan plays use null
+characters, you may assume that no null characters appear in your input.</p>
+<p>You may also assume that any search strings will contain at least two characters, in order to keep the number of outputs finite.</p>
+<h3 id="submitting-your-assignment-2"><span class="header-section-number">8.3.3</span> Submitting your assignment</h3>
+<p>Submit your assignment as usual with</p>
+<pre><code>/c/cs223/bin/submit 3 qls.c</code></pre>
+<p>You can run some basic tests on your submitted solution with</p>
+<pre><code>/c/cs223/bin/testit 3 qls</code></pre>
+<p>The test program is also available as <code>/c/cs223/Hwk3/test.qls</code>. Sample inputs and outputs can be found in <code>/c/cs223/Hwk3/testFiles</code>. The title of each file contains the test strings used, separated by <code>-</code> characters. Before comparing the output of your program to the output files, you may find it helpful to run it through <code>sort</code>, e.g.</p>
+<pre><code>./qls hail satan &lt; hail-satan.in | sort &gt; test.out
+diff test.out hail-satan.out</code></pre>
+<h3 id="hw3Solution"><span class="header-section-number">8.3.4</span> Sample solution</h3>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Search for quadratic letter sequences starting with words from argv on stdin.</span>
+<span class="co"> *</span>
+<span class="co"> * A quadratic letter sequence of length n in s is a sequence of characters</span>
+<span class="co"> *</span>
+<span class="co"> * s[c0 + c1*i + c2*i*i]</span>
+<span class="co"> *</span>
+<span class="co"> * where c0, c1, c2 are all &gt;= 0, at least one of c1 and c2 is &gt; 0,</span>
+<span class="co"> * and i ranges over 0, 1, 2, ..., n-1.</span>
+<span class="co"> *</span>
+<span class="co"> * For each QLS found, prints c0, c1, c2, and the target string to stdout.</span>
+<span class="co"> */</span>
+
+<span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="ot">#define NUM_COEFFICIENTS (3) </span><span class="co">/* how many coefficients to pass around */</span>
+
+<span class="co">/*</span>
+<span class="co"> * Return true iff we get a match in s for t with coefficients c</span>
+<span class="co"> *</span>
+<span class="co"> * Behavior is undefined if coefficients would send us off the end of s.</span>
+<span class="co"> */</span>
+<span class="dt">static</span> <span class="dt">int</span>
+qlsMatch(<span class="dt">const</span> <span class="dt">char</span> *s, <span class="dt">const</span> <span class="dt">char</span> *t, <span class="dt">int</span> c[NUM_COEFFICIENTS])
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; t[i] != '\<span class="dv">0</span>'; i++) {
+ <span class="kw">if</span>(s[c[<span class="dv">0</span>] + c[<span class="dv">1</span>] * i + c[<span class="dv">2</span>] * i * i] != t[i]) {
+ <span class="co">/* no match */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+ }
+ }
+
+ <span class="kw">return</span> <span class="dv">1</span>;
+}
+
+<span class="co">/* </span>
+<span class="co"> * Search for quadratic letter sequences in s starting with t</span>
+<span class="co"> * and print results to stdout.</span>
+<span class="co"> */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+qlsSearch(<span class="dt">const</span> <span class="dt">char</span> *s, <span class="dt">const</span> <span class="dt">char</span> *t)
+{
+ <span class="dt">int</span> c[NUM_COEFFICIENTS]; <span class="co">/* coefficients */</span>
+ <span class="dt">int</span> lenS; <span class="co">/* length of s */</span>
+ <span class="dt">int</span> lenT; <span class="co">/* length of t */</span>
+ <span class="dt">int</span> maxI; <span class="co">/* maximum value for i (this is lenT-1) */</span>
+
+ lenS = strlen(s);
+ lenT = strlen(t);
+ maxI = lenT<span class="dv">-1</span>;
+
+ <span class="co">/* try all possible c[0] that will let us finish before lenS */</span>
+ <span class="kw">for</span>(c[<span class="dv">0</span>] = <span class="dv">0</span>; c[<span class="dv">0</span>] + maxI &lt; lenS; c[<span class="dv">0</span>]++) {
+ <span class="co">/* if s[c[0]] isn't right, c[1] and c[2] can't fix it */</span>
+ <span class="kw">if</span>(s[c[<span class="dv">0</span>]] == t[<span class="dv">0</span>]) {
+ <span class="co">/* try all feasible c[1] */</span>
+ <span class="kw">for</span>(c[<span class="dv">1</span>] = <span class="dv">0</span>; c[<span class="dv">0</span>] + c[<span class="dv">1</span>] * maxI &lt; lenS; c[<span class="dv">1</span>]++) {
+ <span class="co">/* try all feasible c[2], but start at 1 if c[1] == 0 */</span>
+ <span class="kw">for</span>(c[<span class="dv">2</span>] = (c[<span class="dv">1</span>] == <span class="dv">0</span>); c[<span class="dv">0</span>] + c[<span class="dv">1</span>] * maxI + c[<span class="dv">2</span>] * maxI * maxI &lt; lenS; c[<span class="dv">2</span>]++) {
+ <span class="co">/* now see if we get a match */</span>
+ <span class="kw">if</span>(qlsMatch(s, t, c)) {
+ printf(<span class="st">"%d %d %d %s</span><span class="ch">\n</span><span class="st">"</span>, c[<span class="dv">0</span>], c[<span class="dv">1</span>], c[<span class="dv">2</span>], t);
+ }
+ }
+ }
+ }
+ }
+}
+
+<span class="co">/* used internally by getContents; initial size of buffer */</span>
+<span class="ot">#define INITIAL_BUFFER_SIZE (16)</span>
+
+<span class="co">/* </span>
+<span class="co"> * Return a single string holding all characters from stdin.</span>
+<span class="co"> *</span>
+<span class="co"> * This is malloc'd data that the caller should eventually free.</span>
+<span class="co"> */</span>
+<span class="dt">static</span> <span class="dt">char</span> *
+getContents(<span class="dt">void</span>)
+{
+ size_t size;
+ size_t len;
+ <span class="dt">char</span> *text;
+ <span class="dt">int</span> c;
+
+ size = INITIAL_BUFFER_SIZE;
+ len = <span class="dv">0</span>;
+
+ text = malloc(size);
+ assert(text);
+
+ <span class="kw">while</span>((c = getchar()) != EOF) {
+ <span class="co">/* grow the buffer if full */</span>
+ <span class="kw">if</span>(len &gt;= size) {
+ size *= <span class="dv">2</span>;
+ text = realloc(text, size);
+ assert(text);
+ }
+
+ text[len++] = c;
+ }
+
+ <span class="co">/* cleanup */</span>
+ text = realloc(text, len<span class="dv">+1</span>);
+ assert(text);
+
+ text[len] = '\<span class="dv">0</span>';
+
+ <span class="kw">return</span> text;
+}
+
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="dt">int</span> i;
+ <span class="dt">char</span> *s;
+
+ s = getContents();
+
+ <span class="kw">for</span>(i = <span class="dv">1</span>; i &lt; argc; i++) {
+ qlsSearch(s, argv[i]);
+ }
+
+ free(s);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/3/qls.c" class="uri">examples/2015/hw/3/qls.c</a>
+</div>
+<h2 id="hw4"><span class="header-section-number">8.4</span> Assignment 4, due Wednesday 2015-02-18, at 11:00pm</h2>
+<h3 id="an-ascii-art-compositor"><span class="header-section-number">8.4.1</span> An ASCII art compositor</h3>
+<p>For this assignment you are to write a program that takes from <code>stdin</code>
+ a sequence of instructions for pasting ASCII art pictures together,
+reads those pictures from files, and writes the combined picture to <code>stdout</code>.</p>
+<p>Each instruction is of the form <em>row column filename</em>, suitable for reading with <code>scanf("%d %d %s", &amp;row, &amp;col, filename);</code>, where <code>row</code> and <code>col</code> are declared as <code>int</code>s and <code>filename</code> is a suitably large buffer of <code>char</code>s. Such an instruction means to paste the contents of file <em>filename</em> into the picture with each character shifted <em>row</em> rows down and <em>column</em> columns to the right of its position in file <em>filename</em>. When pasting an image, all characters other than space (<code>' '</code>,
+ or ASCII code 32) overwrite any characters from earlier files at the
+same position. Spaces should be treated as transparent, having no effect
+ on the final image.</p>
+<p>For example, suppose that the current directory contains these files:</p>
+<div>
+<pre><code> # # #
+\==========/
+ \......../
+</code></pre>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/4/ship" class="uri">examples/2015/hw/4/ship</a>
+</div>
+<div>
+<pre><code> /\
+ /vv\
+/vvvv\
+ ||
+</code></pre>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/4/tree" class="uri">examples/2015/hw/4/tree</a>
+</div>
+<div>
+<pre><code> * * *
+ ____|_|_|_____
+|_____________|
+|___HAPPY_____|
+|__BIRTHDAY___|
+|_____________|
+</code></pre>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/4/cake" class="uri">examples/2015/hw/4/cake</a>
+</div>
+<p>Then this is what we should get from executing the command:</p>
+<pre><code>$ echo "1 1 ship 3 5 ship 3 19 tree 7 2 ship 13 4 ship 4 22 tree 5 6 cake" | ./compositor</code></pre>
+<div>
+<pre><code>
+ # # #
+ \==========/
+ \......#.# # /\
+ \==========/ /vv\/\
+ \....*.*.* /vvv/vv\
+ ____|_|_|_____|/vvvv\
+ |_____________| ||
+ \===|___HAPPY_____|
+ \..|__BIRTHDAY___|
+ |_____________|
+
+
+ # # #
+ \==========/
+ \......../
+</code></pre>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/4/example.out" class="uri">examples/2015/hw/4/example.out</a>
+</div>
+<h3 id="submitting-your-assignment-3"><span class="header-section-number">8.4.2</span> Submitting your assignment</h3>
+<p>For this assignment, you may submit whatever source files you like, along with a file <code>Makefile</code> that will generate the program <code>compositor</code> when <code>make</code> is called with no arguments (see the <a href="#make">instructions for using make</a>.)</p>
+<p>You can test your submitted assignment using the public test script with</p>
+<pre><code>/c/cs223/bin/testit 4 public</code></pre>
+<p>You may also test your unsubmitted assignment in the current working directory with</p>
+<pre><code>/c/cs223/Hwk4/test.public</code></pre>
+<p>The test script is intended mostly to guard against trivial errors in output format and is not necessarily exhaustive.</p>
+<h3 id="notes"><span class="header-section-number">8.4.3</span> Notes</h3>
+<h4 id="input"><span class="header-section-number">8.4.3.1</span> Input</h4>
+<p>For parsing the commands on <code>stdin</code>, we recommend using <code>scanf</code>. You can test for end of file by checking if <code>scanf</code> correctly parsed all three arguments, as in</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"> <span class="dt">int</span> row;
+ <span class="dt">int</span> col;
+ <span class="dt">char</span> filename[BUFFER_SIZE];
+
+ <span class="kw">while</span>(scanf(<span class="st">"%d %d %s"</span>, &amp;row, &amp;col, filename) == <span class="dv">3</span>) {
+ <span class="co">/* do something with this file */</span>
+ }</code></pre></div>
+<p>You may assume that <code>row</code> and <code>col</code> are always non-negative.</p>
+<p>Your program should exit with a non-zero error code if it cannot open a file for reading. Because <code>scanf</code>'s <code>%s</code>
+ conversion specifier only reads up to the next whitespace character,
+you may assume that filenames do not contain whitespace. You may also
+assume that no filename appearing in the input will require more than
+2048 bytes to store (including the terminal null character).<a href="#fn28" class="footnoteRef" id="fnref28"><sup>28</sup></a></p>
+<p>You may assume that the input contains no null characters.</p>
+<h4 id="output"><span class="header-section-number">8.4.3.2</span> Output</h4>
+<p>Your output should include newline and space characters to put the
+composited characters in the appropriate rows and columns. It should not
+ include any more of such characters than are absolutely necessary.</p>
+<p>For example, there should never be a space at the end of a line (even
+ if there is a space at the end of a line in one of the input files).
+Similarly, there should not be any blank lines at the end of your
+output. You may, however, find it necessary to add a newline to the end
+of the last line to avoid having the output end in the middle of a line.</p>
+<h4 id="general"><span class="header-section-number">8.4.3.3</span> General</h4>
+<p>You may assume that the final picture is not so big that you can't store a row or column number for one of its characters in an <code>int</code>.</p>
+<h3 id="hw4Solution"><span class="header-section-number">8.4.4</span> Sample solution</h3>
+<p>I wrote two versions of this. <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/4/jaggedArray">The first</a>
+ used a jagged array to represent an image, but I decided I didn't like
+it and did another version using a sorted linked list of points. This
+second version is shown below.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Alternate version of ASCII art thing using a queue.</span>
+<span class="co"> */</span>
+
+<span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+
+<span class="co">/*</span>
+<span class="co"> * Idea of this data structure is that we have a sorted array</span>
+<span class="co"> * of pixels, where each pixel specifies a row, column, and character</span>
+<span class="co"> * to put in that position. The sort order is row then column.</span>
+<span class="co"> *</span>
+<span class="co"> * This is organized as a queue in the sense that we can push</span>
+<span class="co"> * new pixels on to the end of it, although as it happens we</span>
+<span class="co"> * never actually dequeue anything.</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> pixel {
+ <span class="dt">int</span> row;
+ <span class="dt">int</span> col;
+ <span class="dt">char</span> value;
+};
+
+<span class="kw">struct</span> queue {
+ size_t top; <span class="co">/* number of elements */</span>
+ size_t size; <span class="co">/* number of allocated slots */</span>
+ <span class="kw">struct</span> pixel *pixels; <span class="co">/* pixel values, sorted by row then column */</span>
+};
+
+<span class="ot">#define QUEUE_INITIAL_SIZE (16)</span>
+
+<span class="co">/* create new empty queue */</span>
+<span class="kw">struct</span> queue *
+queueCreate(<span class="dt">void</span>)
+{
+ <span class="kw">struct</span> queue *q;
+
+ q = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> queue));
+ assert(q);
+
+ q-&gt;top = <span class="dv">0</span>;
+ q-&gt;size = QUEUE_INITIAL_SIZE;
+
+ q-&gt;pixels = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> pixel) * q-&gt;size);
+ assert(q-&gt;pixels);
+
+ <span class="kw">return</span> q;
+}
+
+<span class="co">/* clean up queue */</span>
+<span class="dt">void</span>
+queueDestroy(<span class="kw">struct</span> queue *q)
+{
+ free(q-&gt;pixels);
+ free(q);
+}
+
+<span class="co">/* add a new pixel to queue */</span>
+<span class="dt">void</span>
+queuePush(<span class="kw">struct</span> queue *q, <span class="kw">struct</span> pixel p)
+{
+ <span class="kw">while</span>(q-&gt;top &gt;= q-&gt;size) {
+ q-&gt;size *= <span class="dv">2</span>;
+ q-&gt;pixels = realloc(q-&gt;pixels, <span class="kw">sizeof</span>(<span class="kw">struct</span> pixel) * q-&gt;size);
+ assert(q-&gt;pixels);
+ }
+
+ q-&gt;pixels[q-&gt;top++] = p;
+}
+
+<span class="co">/* returns malloc'd data, free with queueDestroy */</span>
+<span class="kw">struct</span> queue *
+queueRead(<span class="dt">const</span> <span class="dt">char</span> *filename)
+{
+ FILE *f;
+ <span class="kw">struct</span> queue *q;
+ <span class="kw">struct</span> pixel p;
+ <span class="dt">int</span> c;
+
+ q = queueCreate();
+
+ f = fopen(filename, <span class="st">"r"</span>);
+ <span class="kw">if</span>(f == <span class="dv">0</span>) {
+ perror(filename);
+ exit(<span class="dv">1</span>);
+ }
+
+ p.row = p.col = <span class="dv">0</span>;
+
+ <span class="kw">while</span>((c = getc(f)) != EOF) {
+ <span class="kw">switch</span>(c) {
+ <span class="kw">case</span> <span class="ch">'\n'</span>:
+ p.row++;
+ p.col = <span class="dv">0</span>;
+ <span class="kw">break</span>;
+ <span class="kw">case</span> ' ':
+ p.col++;
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ p.value = c;
+ queuePush(q, p);
+ p.col++;
+ <span class="kw">break</span>;
+ }
+ }
+
+ fclose(f);
+
+ <span class="kw">return</span> q;
+}
+
+<span class="co">/* write pixels in queue to stdout */</span>
+<span class="dt">void</span>
+queueWrite(<span class="dt">const</span> <span class="kw">struct</span> queue *q)
+{
+ <span class="dt">int</span> outputRow = <span class="dv">0</span>;
+ <span class="dt">int</span> outputCol = <span class="dv">0</span>;
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; q-&gt;top; i++) {
+ <span class="kw">while</span>(outputRow &lt; q-&gt;pixels[i].row) {
+ putchar(<span class="ch">'\n'</span>);
+ outputRow++;
+ outputCol = <span class="dv">0</span>;
+ }
+ <span class="kw">while</span>(outputCol &lt; q-&gt;pixels[i].col) {
+ putchar(' ');
+ outputCol++;
+ }
+ putchar(q-&gt;pixels[i].value);
+ outputCol++;
+ }
+
+ <span class="co">/* end last row */</span>
+ putchar(<span class="ch">'\n'</span>);
+}
+
+<span class="co">/* </span>
+<span class="co"> * Merge two queues, creating a new, freshly-allocated queue.</span>
+<span class="co"> * New queue is sorted. If there are pixels in both left </span>
+<span class="co"> * and right with the same row and column, the one from right</span>
+<span class="co"> * overwrites the one from left.</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> queue *
+queueMerge(<span class="dt">const</span> <span class="kw">struct</span> queue *left, <span class="dt">const</span> <span class="kw">struct</span> queue *right)
+{
+ <span class="dt">int</span> l = <span class="dv">0</span>;
+ <span class="dt">int</span> r = <span class="dv">0</span>;
+ <span class="kw">struct</span> queue *q;
+
+ q = queueCreate();
+
+ <span class="kw">while</span>(l &lt; left-&gt;top &amp;&amp; r &lt; right-&gt;top) {
+ <span class="kw">if</span>(left-&gt;pixels[l].row &lt; right-&gt;pixels[r].row) {
+ queuePush(q, left-&gt;pixels[l++]);
+ } <span class="kw">else</span> <span class="kw">if</span>(left-&gt;pixels[l].row == right-&gt;pixels[r].row) {
+ <span class="kw">if</span>(left-&gt;pixels[l].col &lt; right-&gt;pixels[r].col) {
+ queuePush(q, left-&gt;pixels[l++]);
+ } <span class="kw">else</span> <span class="kw">if</span>(left-&gt;pixels[l].col == right-&gt;pixels[r].col) {
+ <span class="co">/* right wins but both increment */</span>
+ queuePush(q, right-&gt;pixels[r++]);
+ l++;
+ } <span class="kw">else</span> {
+ <span class="co">/* right is earlier */</span>
+ queuePush(q, right-&gt;pixels[r++]);
+ }
+ } <span class="kw">else</span> {
+ <span class="co">/* right is earlier */</span>
+ queuePush(q, right-&gt;pixels[r++]);
+ }
+ }
+
+ <span class="co">/* clean out whichever tail is still nonempty */</span>
+ <span class="kw">while</span>(l &lt; left-&gt;top) {
+ queuePush(q, left-&gt;pixels[l++]);
+ }
+
+ <span class="kw">while</span>(r &lt; right-&gt;top) {
+ queuePush(q, right-&gt;pixels[r++]);
+ }
+
+ <span class="kw">return</span> q;
+}
+
+<span class="co">/* in-place offset by r rows and c columns */</span>
+<span class="dt">void</span>
+queueOffset(<span class="kw">struct</span> queue *q, <span class="dt">int</span> r, <span class="dt">int</span> c)
+{
+ <span class="dt">int</span> i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; q-&gt;top; i++) {
+ q-&gt;pixels[i].row += r;
+ q-&gt;pixels[i].col += c;
+ }
+}
+
+<span class="co">/* max filename size as promised in assignment text */</span>
+<span class="ot">#define BUFFER_SIZE (2048)</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">struct</span> queue *merged; <span class="co">/* holding place for result of merge */</span>
+ <span class="kw">struct</span> queue *left; <span class="co">/* accumulated picture */</span>
+ <span class="kw">struct</span> queue *right; <span class="co">/* new picture */</span>
+ <span class="dt">int</span> row; <span class="co">/* row offset for new picture */</span>
+ <span class="dt">int</span> col; <span class="co">/* column offset for new picture */</span>
+ <span class="dt">char</span> filename[BUFFER_SIZE]; <span class="co">/* filename for new picture */</span>
+
+ <span class="kw">if</span>(argc != <span class="dv">1</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ <span class="kw">for</span>(left = queueCreate(); scanf(<span class="st">"%d %d %s"</span>, &amp;row, &amp;col, filename) == <span class="dv">3</span>; left = merged) {
+ right = queueRead(filename);
+ queueOffset(right, row, col);
+
+ merged = queueMerge(left, right);
+
+ queueDestroy(left);
+ queueDestroy(right);
+ }
+
+ queueWrite(left);
+
+ queueDestroy(left);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/4/compositor.c" class="uri">examples/2015/hw/4/compositor.c</a>
+</div>
+<p>Here is a <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/4/Makefile">Makefile</a>.</p>
+<h2 id="hw5"><span class="header-section-number">8.5</span> Assignment 5, due Wednesday 2015-02-25, at 11:00pm</h2>
+<h3 id="build-a-turing-machine"><span class="header-section-number">8.5.1</span> Build a Turing machine!</h3>
+<p>A <strong>Turing machine</strong> is a hypothetical computational device that consists of a little trolley, called the <strong>controller</strong>,
+ that rolls around on an infinite paper tape on which it can write
+letters. The controller itself has a small number of states that affect
+its behavior, and a program that tells it what to do when it is in a
+particular state and sees a particular letter.</p>
+<p>In C terms, we can think of a Turing machine as consisting of an infinite array of <code>char</code>s (representing the tape), an integer index into the array (representing the current position of the controller), and an <code>int</code> (representing the current state of the controller). At each step, the machine</p>
+<ol style="list-style-type: decimal">
+<li>Looks at the symbol <span class="math inline"><em>s</em></span> on the current tape cell.</li>
+<li>Looks up its state <span class="math inline"><em>q</em></span> and symbol <span class="math inline"><em>s</em></span> in the table representing its program. This table will specify an action, which consists of three parts:
+<ol style="list-style-type: lower-alpha">
+<li>A new symbol to write to the current tape cell.</li>
+<li>A direction <code>-</code> or <code>+</code> to move.</li>
+<li>A new state to switch to.</li>
+</ol></li>
+</ol>
+<p>If the new state is 0, the machine halts and takes no more steps.</p>
+<p>For this assignment, you are to write a Turing machine simulator. The program for machine will be supplied in <code>argv</code>; <code>argv[i]</code> will give the behavior of the machine when in state <code>i</code>, where the first three characters specify what to do if the machine sees an <code>'a'</code>, the second three characters specify what to do if the machine sees a <code>'b'</code>, and so on. Each of these three-letter groups will look like <em>(new-symbol,direction,new-state)</em>, where <em>new-symbol</em> is the new symbol to write (a lower-case letter), <em>direction</em> is the direction to move (<code>'+'</code>, meaning one position to the right, or <code>'-'</code>, meaning one position to the left) and <em>new-state</em>
+ is the new state to go to (a digit). Your program should run this
+program starting in state 1 on some location in the middle of a tape
+initially made up entirely of <code>'a'</code> characters, and continue
+until the program enters the special halting state 0. It should then
+print the number of steps until this occurred.</p>
+<p>You may assume that the program is <code>argv</code> is complete in
+the sense that it includes rules for any combination of state and symbol
+ you will encounter while executing it. You are not required to detect
+if this assumption is violated.</p>
+<h3 id="example-1"><span class="header-section-number">8.5.2</span> Example</h3>
+<p>The program</p>
+<pre><code>b+2a-0 a-1a-1</code></pre>
+<p>gives instructions for what to do in state 1 (<code>b+2a-0</code>) and state 2 (<code>a-1a-1</code>). In state 1, if the controller reads an <code>a</code>, the triple <code>b+2</code> means that it should write <code>b</code>, move right (<code>+</code>), and switch to state 2. If instead it reads a <code>b</code>, the triple <code>a-0</code> means that it should write <code>a</code>, move left (<code>-</code>), and halt (<code>0</code>). In state 2, the machine always writes <code>a</code>, moves left, and switches to state 1.</p>
+<p>Below is a depiction of this machine's execution. It passes through 4
+ states (including both the initial state and the final halting state)
+using a total of 3 steps. The controller and its current state is shown
+above its current position on the tape at each point in time. To avoid
+having to put in infinitely long lines, only the middle three tape cells
+ are shown.</p>
+<pre><code> 1
+aaa
+
+ 2
+aba
+
+ 1
+aba
+
+0
+aaa</code></pre>
+<h3 id="your-task-2"><span class="header-section-number">8.5.3</span> Your task</h3>
+<p>You should submit a <code>Makefile</code> and whatever source files are needed to generate a program <code>./turing</code> when <code>make</code> is called with no arguments. The <code>turing</code>
+ program should simulate a Turing machine as described above and print
+the number of steps that it takes until it halts in decimal format,
+followed by a newline. It should not produce any other output. For
+example, using the program above, your program should print 3:</p>
+<pre><code>$ ./turing b+2a-0 a-1a-1
+3</code></pre>
+<p>For more complex programs you may get different results. Here is a 3 state, 3 symbol program that runs for a bit longer:</p>
+<pre><code>$ ./turing b+2a-0c-3 b-3c+2b-2 b-1a+2c-1
+92649163</code></pre>
+<p>You may assume that tape symbols can always be represented by
+lowercase letters, that states can always be represented by single
+digits, and that <code>argv</code> is in the correct format (although it may be worth including a few sanity checks in your program just in case).</p>
+<p>Not all Turing machine programs will halt. Your program is not
+required to detect if the Turing machine it is simulating will halt
+eventually or not (although it should notice if it does halt).</p>
+<h3 id="submitting-your-assignment-4"><span class="header-section-number">8.5.4</span> Submitting your assignment</h3>
+<p>Submit all files needed to build your program as usual using <code>/c/cs223/bin/submit 5</code> <em>filename</em>.</p>
+<p>There is a public test script in <code>/c/cs223/Hwk5/test.public</code>. You can run this on your submitted files with <code>/c/cs223/bin/testit 5 public</code>.</p>
+<h3 id="hw5Solution"><span class="header-section-number">8.5.5</span> Sample solution</h3>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Simple Turing machine simulator.</span>
+<span class="co"> *</span>
+<span class="co"> * Tape holds symbols 0 (default) through 2.</span>
+<span class="co"> *</span>
+<span class="co"> * Controller programming is specified in argv:</span>
+<span class="co"> *</span>
+<span class="co"> * argv[i] gives transitions for state i as six characters.</span>
+<span class="co"> *</span>
+<span class="co"> * Each triple of characters is &lt;action&gt;&lt;direction&gt;&lt;new-state&gt;</span>
+<span class="co"> *</span>
+<span class="co"> * where &lt;action&gt; is one of:</span>
+<span class="co"> *</span>
+<span class="co"> * a,b,c: write this value to tape</span>
+<span class="co"> *</span>
+<span class="co"> * &lt;direction&gt; is one of:</span>
+<span class="co"> *</span>
+<span class="co"> * -: go left</span>
+<span class="co"> * +: go right</span>
+<span class="co"> * .: stay put</span>
+<span class="co"> *</span>
+<span class="co"> * The three pairs give the transition for reading 0, 1, 2 from tape.</span>
+<span class="co"> *</span>
+<span class="co"> * State 0 is the halting state.</span>
+<span class="co"> *</span>
+<span class="co"> * On halting, prints number of transitions followed by contents</span>
+<span class="co"> * of all tape cells that have ever been visited by the </span>
+<span class="co"> * finite-state controller.</span>
+<span class="co"> */</span>
+
+<span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;sys/types.h&gt;</span>
+
+<span class="kw">struct</span> configuration {
+ <span class="dt">unsigned</span> <span class="dt">int</span> state;<span class="co">/* state of head */</span>
+ size_t leftmost; <span class="co">/* leftmost cell visited */</span>
+ size_t rightmost; <span class="co">/* rightmost cell visited */</span>
+ size_t current; <span class="co">/* current cell */</span>
+ size_t tapeLength; <span class="co">/* current allocated space for tape */</span>
+ <span class="dt">char</span> *tape; <span class="co">/* contents of cells */</span>
+};
+
+<span class="co">/* increase the size of the tape and recenter contents in middle */</span>
+<span class="dt">void</span>
+configurationExpand(<span class="kw">struct</span> configuration *c)
+{
+ size_t newTapeLength;
+ <span class="dt">char</span> *oldTape;
+ <span class="dt">char</span> *newTape;
+ size_t i;
+ ssize_t offset;
+
+ newTapeLength = <span class="dv">4</span>*c-&gt;tapeLength;
+ newTape = malloc(newTapeLength);
+ assert(newTape);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; newTapeLength; i++) {
+ newTape[i] = <span class="dv">0</span>;
+ }
+
+ <span class="co">/* copy old tape */</span>
+ offset = newTapeLength / <span class="dv">2</span> - c-&gt;current;
+
+ <span class="kw">for</span>(i = c-&gt;leftmost; i &lt;= c-&gt;rightmost; i++) {
+ newTape[i + offset] = c-&gt;tape[i];
+ }
+
+ oldTape = c-&gt;tape;
+ c-&gt;tape = newTape;
+ c-&gt;tapeLength = newTapeLength;
+ c-&gt;current += offset;
+ c-&gt;leftmost += offset;
+ c-&gt;rightmost += offset;
+
+ free(oldTape);
+}
+
+<span class="ot">#define INITIAL_TAPE_LENGTH (16)</span>
+
+<span class="kw">struct</span> configuration *
+configurationCreate(<span class="dt">void</span>)
+{
+ <span class="kw">struct</span> configuration *c;
+ size_t i;
+
+ c = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> configuration));
+ assert(c);
+
+ c-&gt;state = <span class="dv">1</span>;
+ c-&gt;tapeLength = INITIAL_TAPE_LENGTH;
+ c-&gt;leftmost = c-&gt;rightmost = c-&gt;current = c-&gt;tapeLength / <span class="dv">2</span>;
+ c-&gt;tape = malloc(c-&gt;tapeLength);
+ assert(c-&gt;tape);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; c-&gt;tapeLength; i++) {
+ c-&gt;tape[i] = <span class="dv">0</span>;
+ }
+
+ <span class="kw">return</span> c;
+}
+
+<span class="dt">void</span>
+configurationDestroy(<span class="kw">struct</span> configuration *c)
+{
+ free(c-&gt;tape);
+ free(c);
+}
+
+<span class="ot">#define SYMBOL_BASE ('a')</span>
+<span class="ot">#define STATE_BASE ('0')</span>
+
+<span class="co">/* used for debugging mostly */</span>
+<span class="dt">void</span>
+configurationPrint(<span class="dt">const</span> <span class="kw">struct</span> configuration *c)
+{
+ size_t i;
+
+ <span class="kw">for</span>(i = c-&gt;leftmost; i &lt; c-&gt;current; i++) {
+ putchar(' ');
+ }
+ putchar(STATE_BASE + c-&gt;state);
+ putchar(<span class="ch">'\n'</span>);
+
+ <span class="kw">for</span>(i = c-&gt;leftmost; i &lt;= c-&gt;rightmost; i++) {
+ putchar(SYMBOL_BASE + c-&gt;tape[i]);
+ }
+ putchar(<span class="ch">'\n'</span>);
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">struct</span> configuration *c;
+ <span class="dt">char</span> cellValue;
+ <span class="dt">const</span> <span class="dt">char</span> *transition;
+ size_t steps;
+
+ <span class="kw">if</span>(argc == <span class="dv">1</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s transitions</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ c = configurationCreate();
+ steps = <span class="dv">0</span>;
+
+ <span class="kw">while</span>(c-&gt;state != <span class="dv">0</span>) {
+ steps++;
+
+ <span class="co">/* execute the next transition */</span>
+ assert(c-&gt;state &lt; argc);
+
+ cellValue = c-&gt;tape[c-&gt;current];
+ assert(<span class="dv">0</span> &lt;= cellValue);
+ assert(<span class="dv">3</span>*(cellValue<span class="dv">+1</span>) &lt;= strlen(argv[c-&gt;state]));
+
+ transition = argv[c-&gt;state] + <span class="dv">3</span>*c-&gt;tape[c-&gt;current];
+
+ c-&gt;tape[c-&gt;current] = transition[<span class="dv">0</span>] - SYMBOL_BASE;
+
+ <span class="kw">switch</span>(transition[<span class="dv">1</span>]) {
+ <span class="kw">case</span> '-':
+ <span class="kw">if</span>(c-&gt;current == <span class="dv">0</span>) {
+ configurationExpand(c);
+ }
+ c-&gt;current--;
+ <span class="kw">if</span>(c-&gt;current &lt; c-&gt;leftmost) {
+ c-&gt;leftmost = c-&gt;current;
+ }
+ <span class="kw">break</span>;
+ <span class="kw">case</span> '+':
+ <span class="kw">if</span>(c-&gt;current == c-&gt;tapeLength - <span class="dv">1</span>) {
+ configurationExpand(c);
+ }
+ c-&gt;current++;
+ <span class="kw">if</span>(c-&gt;current &gt; c-&gt;rightmost) {
+ c-&gt;rightmost = c-&gt;current;
+ }
+ <span class="kw">break</span>;
+ <span class="kw">case</span> '.':
+ <span class="co">/* do nothing */</span>
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ fprintf(stderr, <span class="st">"Bad direction '%c'</span><span class="ch">\n</span><span class="st">"</span>, transition[<span class="dv">2</span>]);
+ exit(<span class="dv">2</span>);
+ <span class="kw">break</span>;
+ }
+
+ c-&gt;state = transition[<span class="dv">2</span>] - STATE_BASE;
+
+<span class="ot">#ifdef PRINT_CONFIGURATION</span>
+ configurationPrint(c);
+<span class="ot">#endif</span>
+ }
+
+ <span class="co">/* print number of steps */</span>
+ printf(<span class="st">"%zu</span><span class="ch">\n</span><span class="st">"</span>, steps);
+
+ configurationDestroy(c);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/5/turing.c" class="uri">examples/2015/hw/5/turing.c</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode makefile"><code class="sourceCode makefile"><span class="dt">CC</span><span class="ch">=</span><span class="st">gcc</span>
+<span class="dt">CFLAGS=-std</span><span class="ch">=</span><span class="st">c99 -Wall -pedantic -g3</span>
+
+<span class="dv">all:</span><span class="dt"> turing</span>
+
+<span class="dv">compositor:</span><span class="dt"> turing.o</span>
+ <span class="ch">$(</span><span class="dt">CC</span><span class="ch">)</span> <span class="ch">$(</span><span class="dt">CFLAGS</span><span class="ch">)</span> -o <span class="ch">$@</span> <span class="ch">$^</span>
+
+<span class="dv">clean:</span>
+ <span class="ch">$(</span><span class="dt">RM</span><span class="ch">)</span> turing *.o</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/5/Makefile" class="uri">examples/2015/hw/5/Makefile</a>
+</div>
+<h2 id="hw6"><span class="header-section-number">8.6</span> Assignment 6, due Wednesday 2015-03-25, at 11:00pm</h2>
+<h3 id="sinking-ships"><span class="header-section-number">8.6.1</span> Sinking ships</h3>
+<p>For this assignment, you are to implement a data structure for
+playing a game involving ships placed in a large square grid. Each ship
+occupies one more more squares in either a vertical or horizontal line,
+and has a name that consists of a single <code>char</code> other than a
+period (which will be used to report the absence of a ship). Ships have a
+ bounded maximum length; attempts to place ships longer than this length
+ have no effect.</p>
+<p>All type and constant definitions for the data type, and all function declarations, are given in the file <code>ships.h</code>, which is shown <a href="#hw6-2015-source-files">below</a>, and which you can also find in <code>/c/cs223/Hwk6/sourceFiles/ships.h</code>. The playing field is represented by a <code>struct field</code> (which you get to define). A new <code>struct field</code> is created by <code>fieldCreate</code>, and when no longer needed should be destroyed by <code>fieldDestroy</code>.</p>
+<p>These data types from <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/6/ships.h">ships.h</a> control ship naming and placement. Note that <code>uint32_t</code> is defined in <code>stdint.h</code> (which is also included by <code>inttypes.h</code>. You will need to include one of these files before <code>ships.h</code> to get this definition.</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="kw">typedef</span> <span class="dt">uint32_t</span> coord;
+
+<span class="kw">struct</span> position {
+ coord x;
+ coord y;
+};
+
+<span class="kw">struct</span> ship {
+ <span class="kw">struct</span> position topLeft; <span class="co">/* coordinates of top left corner */</span>
+ <span class="dt">int</span> direction; <span class="co">/* HORIZONTAL or VERTICAL */</span>
+ <span class="dt">unsigned</span> <span class="dt">int</span> length; <span class="co">/* length of ship */</span>
+ <span class="dt">char</span> name; <span class="co">/* name of ship */</span>
+};</code></pre></div>
+<p>Actual placement is done using the <code>fieldPlaceShip</code> function, declared as follows:</p>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="dt">void</span> fieldPlaceShip(<span class="kw">struct</span> field *f, <span class="kw">struct</span> ship s);</code></pre></div>
+<p>A ship of length <span class="math inline"><em>m</em></span> placed horizontally with its top left corner at position <span class="math inline">(<em>x</em>, <em>y</em>)</span> will occupy positions <span class="math inline">(<em>x</em>, <em>y</em>)</span> through <span class="math inline">(<em>x</em> + <em>m</em> − 1, <em>y</em>)</span>. If instead it is placed vertically, it will occupy positions <span class="math inline">(<em>x</em>, <em>y</em>)</span> through <span class="math inline">(<em>x</em>, <em>y</em> + <em>m</em> − 1)</span>. If any of these coordinates exceed the maximum coordinate <code>COORD_MAX</code> (defined in <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/6/ships.h"><code>ships.h</code></a>), the ship will not be placed. The ship will also not be placed if its <code>name</code> field is equal to <code>NO_SHIP_NAME</code> or if the length exceeds <code>MAX_SHIP_LENGTH</code>.</p>
+<p>If the new ship will occupy any position as a ship previously placed
+in the field, the previous ship will be removed. It is possible for many
+ ships to be removed at once in this way.</p>
+<p>The <code>fieldAttack</code> function can be used to remove a ship at
+ a particular location without placing a new ship. It returns the name
+of the removed ship, if any, or <code>NO_SHIP_NAME</code> if there is no ship at that location.</p>
+<p>Finally, the <code>fieldCountShips</code> returns the number of ships still present in the field.</p>
+<p>Your job is to write an implementation of these functions, which you should probably put in a file <code>ships.c</code>. You must also supply a <code>Makefile</code>, which, when <code>make</code> is called with no arguments, generates a test program <code>testShips</code> from your implementation and the file <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/6/testShips.c"><code>testShips.c</code></a> that we will provide. You should not count on precisely this version of <code>testShips.c</code> being supplied; your implementation should work with any main program that respects the interface in <code>ships.h</code>.</p>
+<h3 id="things-to-watch-out-for"><span class="header-section-number">8.6.2</span> Things to watch out for</h3>
+<p>You should write your implementation so that it will continue to work if the <code>typedef</code> for <code>coord</code>, or the definitions of the constants <code>COORD_MAX</code>, <code>NO_SHIP_NAME</code>, <code>SHIP_MAX_LENGTH</code>, <code>HORIZONTAL</code>, or <code>VERTICAL</code> change. You may, however, assume that <code>coord</code> is an unsigned integer type and the <code>COORD_MAX</code> is the largest value that can be represented by this type.</p>
+<p>If it helps in crafting your implementation, you may assume that <code>MAX_SHIP_LENGTH</code>
+ will alway be a reasonably small constant. You do not need to worry
+about implementing a data structure that will handle huge ships
+efficiently. On the other hand, <code>COORD_MAX</code> as defined in the default <code>ships.h</code> is <span class="math inline">2<sup>32</sup> − 1</span>, so you will need to be able to deal with a field with at least <span class="math inline">2<sup>64</sup></span> possible locations, a consideration you should take into account when choosing a data structure to represent a field.</p>
+<h3 id="the-testships-program"><span class="header-section-number">8.6.3</span> The <code>testShips</code> program</h3>
+<p>The supplied <code>testShips</code> program creates a field, processes commands from <code>stdin</code>, then destroys the field when it reaches <code>EOF</code> or a command that it can't parse. Each command is either of the form</p>
+<pre><code>+ x y vertical length name</code></pre>
+<p>which means to call <code>fieldPlaceShip</code> for a new ship at coordinates (<code>x</code>,<code>y</code>), with direction <code>HORIZONTAL</code> if <code>isVertical</code> is false and <code>VERTICAL</code> otherwise, and length <code>length</code> and name <code>name</code>.</p>
+<p>The command</p>
+<pre><code>- x y</code></pre>
+<p>calls <code>fieldAttack</code> on coordinates (<code>x</code>,<code>y</code>).</p>
+<p>Here is a small input file:</p>
+<pre><code>+ 0 0 0 3 a
++ 1 2 0 4 b
++ 3 0 1 3 c
+- 1 0</code></pre>
+<p>After processing the first line, the contents of the field should look like this:</p>
+<pre><code>aaa..
+.....
+.....
+.....
+.....</code></pre>
+<p>After processing the second line:</p>
+<pre><code>aaa..
+.....
+.bbbb
+.....
+.....</code></pre>
+<p>The third line places a ship vertically that intersects one of the previous ships, sinking it:</p>
+<pre><code>aaac.
+...c.
+...c.
+.....
+.....</code></pre>
+<p>Had ship <code>c</code> been shifted one position to the left, it would have sunken <code>a</code> as well.</p>
+<p>Finally, the last line drops a bomb at <span class="math inline">(1, 0)</span>, sinking <code>a</code>:</p>
+<pre><code>...c.
+...c.
+...c.
+.....
+.....</code></pre>
+<p>The input files used by <code>test.public</code> can be found in <code>/c/cs223/Hwk6/testFiles</code>. Some of these were generated randomly using the script <code>/c/cs223/Hwk6/makeRandom</code>, which you should feel free to use for your own nefarious purposes.</p>
+<p>Because the interface in <code>ships.h</code> gives no way to find
+out what ships are currently in the field, the test program will not
+actually produce pictures like the above. Instead, it prints after each
+command a line giving the name of the ship sunken by <code>fieldAttack</code> (or <code>NO_SHIP_NAME</code> if no ship is sunk or <code>fieldPlaceShip</code>
+ is called) and the number of ships left in the field following the
+attack. So the user must imagine the carnage as the 100000 ships in <code>randomSparseBig.in</code> somehow leave only 25336 survivors in <code>randomSparseBig.out</code>, demonstrating the importance of strict navigational rules in real life.</p>
+<div class="figure">
+<img src="" alt="Why you should respect people's space">
+<p class="caption">Why you should respect people's space</p>
+</div>
+<h3 id="submitting-your-assignment-5"><span class="header-section-number">8.6.4</span> Submitting your assignment</h3>
+<p>Submit your assignment as usual with</p>
+<pre><code>/c/cs223/bin/submit 6</code></pre>
+<p>You can run the public test script in <code>/c/cs223/Hwk6/test.public</code> on your submitted files with</p>
+<pre><code>/c/cs223/bin/testit 6 public</code></pre>
+<h3 id="hw6-2015-source-files"><span class="header-section-number">8.6.5</span> Provided source files</h3>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#define HORIZONTAL (0) </span><span class="co">/* place ship horizontally */</span>
+<span class="ot">#define VERTICAL (1) </span><span class="co">/* place ship vertically */</span>
+
+<span class="ot">#define MAX_SHIP_LENGTH (17) </span><span class="co">/* length of longest ship (width is always 1) */</span>
+
+<span class="ot">#define NO_SHIP_NAME ('.') </span><span class="co">/* what to return when hitting no ship */</span>
+
+<span class="co">/*</span>
+<span class="co"> * Type for coordinates, and their maximum possible value.</span>
+<span class="co"> *</span>
+<span class="co"> * Include &lt;stdint.h&gt; before this header file</span>
+<span class="co"> * to get the definition of uint32_t</span>
+<span class="co"> * and its maximum value UINT32_MAX.</span>
+<span class="co"> */</span>
+<span class="kw">typedef</span> <span class="dt">uint32_t</span> coord;
+<span class="ot">#define COORD_MAX (UINT32_MAX)</span>
+
+<span class="co">/*</span>
+<span class="co"> * Non-opaque structs for passing around positions and ship placements.</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> position {
+ coord x;
+ coord y;
+};
+
+<span class="kw">struct</span> ship {
+ <span class="kw">struct</span> position topLeft; <span class="co">/* coordinates of top left corner */</span>
+ <span class="dt">int</span> direction; <span class="co">/* HORIZONTAL or VERTICAL */</span>
+ <span class="dt">unsigned</span> <span class="dt">int</span> length; <span class="co">/* length of ship */</span>
+ <span class="dt">char</span> name; <span class="co">/* name of ship */</span>
+};
+
+<span class="co">/*</span>
+<span class="co"> * Create a playing field for holding ships.</span>
+<span class="co"> */</span>
+<span class="kw">struct</span> field *fieldCreate(<span class="dt">void</span>);
+
+<span class="co">/*</span>
+<span class="co"> * Free all space associated with a field.</span>
+<span class="co"> */</span>
+<span class="dt">void</span> fieldDestroy(<span class="kw">struct</span> field *);
+
+<span class="co">/*</span>
+<span class="co"> * Place a ship in a field with given placement and name.</span>
+<span class="co"> *</span>
+<span class="co"> * If placement.length is less than one or greater than MAX_SHIP_LENGTH, </span>
+<span class="co"> * or if some part of the ship would have a coordinate greater than COORD_MAX, </span>
+<span class="co"> * or if the ship's name is NO_SHIP_NAME,</span>
+<span class="co"> * the function returns without placing a ship.</span>
+<span class="co"> *</span>
+<span class="co"> * Placing a new ship that intersects any previously-placed ships</span>
+<span class="co"> * sinks the previous ships, removing them from the field.</span>
+<span class="co"> */</span>
+<span class="dt">void</span> fieldPlaceShip(<span class="kw">struct</span> field *f, <span class="kw">struct</span> ship s);
+
+<span class="co">/*</span>
+<span class="co"> * Attack!</span>
+<span class="co"> *</span>
+<span class="co"> * Drop a shell at given position.</span>
+<span class="co"> *</span>
+<span class="co"> * Returns NO_SHIP_NAME if attack misses (does not intersect any ship).</span>
+<span class="co"> *</span>
+<span class="co"> * Otherwise returns name of ship hit. </span>
+<span class="co"> *</span>
+<span class="co"> * Hitting a ship sinks it, removing it from the field.</span>
+<span class="co"> */</span>
+<span class="dt">char</span> fieldAttack(<span class="kw">struct</span> field *f, <span class="kw">struct</span> position p);
+
+<span class="co">/*</span>
+<span class="co"> * Return number of ships in the field.</span>
+<span class="co"> */</span>
+size_t fieldCountShips(<span class="dt">const</span> <span class="kw">struct</span> field *f);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/6/ships.h" class="uri">examples/2015/hw/6/ships.h</a>
+</div>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;stdint.h&gt;</span>
+<span class="ot">#include &lt;inttypes.h&gt;</span>
+
+<span class="ot">#include "ships.h"</span>
+
+<span class="ot">#define PLACE_SHIP ('+') </span><span class="co">/* command to place a new ship */</span>
+<span class="ot">#define ATTACK ('-') </span><span class="co">/* command to attack a location */</span>
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">struct</span> field *f; <span class="co">/* where we keep our ships */</span>
+ <span class="dt">int</span> command; <span class="co">/* command char */</span>
+ <span class="kw">struct</span> ship s; <span class="co">/* ship we are placing */</span>
+ <span class="kw">struct</span> position p; <span class="co">/* location to attack */</span>
+ <span class="dt">int</span> sank; <span class="co">/* ship we sank */</span>
+
+ <span class="kw">if</span>(argc != <span class="dv">1</span>) {
+ fprintf(stderr, <span class="st">"Usage: %s</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ f = fieldCreate();
+
+ <span class="kw">while</span>((command = getchar()) != EOF) {
+ <span class="kw">switch</span>(command) {
+ <span class="kw">case</span> PLACE_SHIP:
+ <span class="kw">if</span>(scanf(<span class="st">"%"</span> SCNu32 <span class="st">" %"</span> SCNu32 <span class="st">"%d %u %c "</span>, &amp;s.topLeft.x, &amp;s.topLeft.y, &amp;s.direction, &amp;s.length, &amp;s.name) != <span class="dv">5</span>) {
+ <span class="co">/* not enough args */</span>
+ fprintf(stderr, <span class="st">"Not enough enough args to %c</span><span class="ch">\n</span><span class="st">"</span>, PLACE_SHIP);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+ <span class="co">/* else */</span>
+
+ <span class="co">/* fix the direction to match actual definitions */</span>
+ s.direction = s.direction ? VERTICAL : HORIZONTAL;
+
+ fieldPlaceShip(f, s);
+ sank = NO_SHIP_NAME;
+
+ <span class="kw">break</span>;
+
+ <span class="kw">case</span> ATTACK:
+ <span class="kw">if</span>(scanf(<span class="st">"%"</span> SCNu32 <span class="st">" %"</span> SCNu32 <span class="st">" "</span>, &amp;p.x, &amp;p.y) != <span class="dv">2</span>) {
+ fprintf(stderr, <span class="st">"Not enough enough args to %c</span><span class="ch">\n</span><span class="st">"</span>, ATTACK);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+ <span class="co">/* else */</span>
+ sank = fieldAttack(f, p);
+
+ <span class="kw">break</span>;
+
+ <span class="kw">default</span>:
+ <span class="co">/* bad command */</span>
+ fprintf(stderr, <span class="st">"Bad command %c</span><span class="ch">\n</span><span class="st">"</span>, command);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ <span class="kw">break</span>;
+ }
+
+ printf(<span class="st">"%c %zu</span><span class="ch">\n</span><span class="st">"</span>, sank, fieldCountShips(f));
+ }
+
+ fieldDestroy(f);
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/6/testShips.c" class="uri">examples/2015/hw/6/testShips.c</a>
+</div>
+<h3 id="hw6Solution"><span class="header-section-number">8.6.6</span> Sample solution</h3>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+<span class="ot">#include &lt;stdint.h&gt;</span>
+
+<span class="ot">#include "ships.h"</span>
+
+<span class="co">/* basic hash table */</span>
+<span class="kw">struct</span> field {
+ size_t size; <span class="co">/* number of slots in table */</span>
+ size_t occupancy; <span class="co">/* number of elements in table */</span>
+ <span class="kw">struct</span> elt **table; <span class="co">/* hash table, malloc'd */</span>
+};
+
+<span class="kw">struct</span> elt {
+ <span class="kw">struct</span> elt *next; <span class="co">/* pointer to next element in linked list */</span>
+ <span class="kw">struct</span> ship ship; <span class="co">/* ship in this element */</span>
+};
+
+<span class="co">/* picked more or less at whim from http://planetmath.org/goodhashtableprimes */</span>
+<span class="ot">#define X_HASH_FACTOR (201326611)</span>
+<span class="ot">#define Y_HASH_FACTOR (3145739)</span>
+
+<span class="dt">static</span> size_t
+hash(<span class="kw">struct</span> position p)
+{
+ <span class="kw">return</span> X_HASH_FACTOR * p.x + Y_HASH_FACTOR * p.y;
+}
+
+<span class="ot">#define DEFAULT_INITIAL_SIZE (8)</span>
+
+<span class="co">/* like fieldCreate, but argument gives initial size */</span>
+<span class="dt">static</span> <span class="kw">struct</span> field *
+fieldCreateInternal(size_t initialSize)
+{
+ <span class="kw">struct</span> field *f;
+ size_t i;
+
+ f = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> field));
+ assert(f);
+
+ f-&gt;size = initialSize;
+ f-&gt;occupancy = <span class="dv">0</span>;
+
+ f-&gt;table = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> elt *) * f-&gt;size);
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; f-&gt;size; i++) {
+ f-&gt;table[i] = <span class="dv">0</span>;
+ }
+
+ <span class="kw">return</span> f;
+}
+
+<span class="kw">struct</span> field *
+fieldCreate(<span class="dt">void</span>)
+{
+ <span class="kw">return</span> fieldCreateInternal(DEFAULT_INITIAL_SIZE);
+}
+
+<span class="co">/* destroy contents of f but don't free f itself */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+fieldDestroyContents(<span class="kw">struct</span> field *f)
+{
+ size_t i;
+ <span class="kw">struct</span> elt *e;
+ <span class="kw">struct</span> elt *next;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; f-&gt;size; i++) {
+ <span class="kw">for</span>(e = f-&gt;table[i]; e != <span class="dv">0</span>; e = next) {
+ next = e-&gt;next;
+ free(e);
+ }
+ }
+
+ free(f-&gt;table);
+}
+
+<span class="dt">void</span>
+fieldDestroy(<span class="kw">struct</span> field *f)
+{
+ fieldDestroyContents(f);
+ free(f);
+}
+
+<span class="co">/* when to grow field */</span>
+<span class="ot">#define MAX_ALPHA (1)</span>
+
+<span class="co">/*</span>
+<span class="co"> * Helper for fieldPlaceShip.</span>
+<span class="co"> * </span>
+<span class="co"> * This skips all the sanity-checking in fieldPlaceShip,</span>
+<span class="co"> * and just performs the hash table insertion.</span>
+<span class="co"> */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+fieldInsertShip(<span class="kw">struct</span> field *f, <span class="kw">struct</span> ship s)
+{
+ size_t h; <span class="co">/* hashed coordinates */</span>
+ <span class="kw">struct</span> elt *e; <span class="co">/* new element to insert */</span>
+
+ h = hash(s.topLeft) % f-&gt;size;
+
+ e = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> elt));
+ assert(e);
+
+ e-&gt;ship = s;
+ e-&gt;next = f-&gt;table[h];
+ f-&gt;table[h] = e;
+ f-&gt;occupancy++;
+}
+
+
+<span class="dt">void</span>
+fieldPlaceShip(<span class="kw">struct</span> field *f, <span class="kw">struct</span> ship s)
+{
+ <span class="kw">struct</span> field *f2;
+ <span class="kw">struct</span> elt *e;
+ <span class="kw">struct</span> position pos;
+ size_t i;
+
+ <span class="co">/* test if we can just throw this away */</span>
+ <span class="kw">if</span>(s.name == NO_SHIP_NAME
+ || s.length == <span class="dv">0</span>
+ || s.length &gt; MAX_SHIP_LENGTH
+ || (s.direction == HORIZONTAL &amp;&amp; s.topLeft.x &gt; COORD_MAX - (s.length - <span class="dv">1</span>))
+ || (s.direction == VERTICAL &amp;&amp; s.topLeft.y &gt; COORD_MAX - (s.length - <span class="dv">1</span>))
+ )
+ {
+ <span class="kw">return</span>;
+ }
+ <span class="co">/* else */</span>
+
+ <span class="kw">if</span>(f-&gt;occupancy &gt;= f-&gt;size * MAX_ALPHA) {
+ <span class="co">/* grow the field */</span>
+ f2 = fieldCreateInternal(f-&gt;size * <span class="dv">2</span>);
+
+ <span class="co">/* copy to new field */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; f-&gt;size; i++) {
+ <span class="kw">for</span>(e = f-&gt;table[i]; e != <span class="dv">0</span>; e = e-&gt;next) {
+ <span class="co">/* skip testing for occupancy or intersections */</span>
+ fieldInsertShip(f2, e-&gt;ship);
+ }
+ }
+
+ <span class="co">/* transplant new field into old field */</span>
+ fieldDestroyContents(f);
+ *f = *f2;
+
+ free(f2);
+ }
+
+ <span class="co">/* check for intersections */</span>
+ pos = s.topLeft;
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; s.length; i++) {
+ <span class="kw">if</span>(s.direction == HORIZONTAL) {
+ pos.x = s.topLeft.x + i;
+ } <span class="kw">else</span> {
+ pos.y = s.topLeft.y + i;
+ }
+
+ fieldAttack(f, pos);
+ }
+
+ <span class="co">/* call helper to do the actual hash table insertion */</span>
+ fieldInsertShip(f, s);
+}
+
+<span class="co">/*</span>
+<span class="co"> * Helper for fieldAttack.</span>
+<span class="co"> *</span>
+<span class="co"> * If there is a ship with topLeft at given position, return pointer</span>
+<span class="co"> * to location in hash table that points to it (either table entry</span>
+<span class="co"> * or next component).</span>
+<span class="co"> *</span>
+<span class="co"> * If not, return null.</span>
+<span class="co"> */</span>
+<span class="dt">static</span> <span class="kw">struct</span> elt **
+fieldShipAt(<span class="kw">struct</span> field *f, <span class="kw">struct</span> position p)
+{
+ <span class="kw">struct</span> elt **prev; <span class="co">/* previous pointer */</span>
+
+ <span class="kw">for</span>(prev = &amp;f-&gt;table[hash(p) % f-&gt;size]; *prev != <span class="dv">0</span>; prev = &amp;((*prev)-&gt;next)) {
+ <span class="kw">if</span>((*prev)-&gt;ship.topLeft.x == p.x &amp;&amp; (*prev)-&gt;ship.topLeft.y == p.y) {
+ <span class="kw">return</span> prev;
+ }
+ }
+
+ <span class="co">/* didn't find anything */</span>
+ <span class="kw">return</span> <span class="dv">0</span>;
+}
+
+<span class="co">/*</span>
+<span class="co"> * Attack!</span>
+<span class="co"> *</span>
+<span class="co"> * Drop a shell at given position.</span>
+<span class="co"> *</span>
+<span class="co"> * Returns 0 if attack misses (does not intersect any ship).</span>
+<span class="co"> *</span>
+<span class="co"> * Otherwise returns name of ship hit, </span>
+<span class="co"> * which should be freed by caller when no longer needed.</span>
+<span class="co"> *</span>
+<span class="co"> * Hitting a ship sinks it, removing it from the field.</span>
+<span class="co"> */</span>
+<span class="dt">char</span>
+fieldAttack(<span class="kw">struct</span> field *f, <span class="kw">struct</span> position p)
+{
+ <span class="kw">struct</span> position p2;
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> direction;
+ <span class="kw">struct</span> elt **prev;
+ <span class="kw">struct</span> elt *freeMe;
+ <span class="dt">char</span> name;
+
+ <span class="kw">for</span>(direction = <span class="dv">0</span>; direction &lt;= <span class="dv">1</span>; direction++) {
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; MAX_SHIP_LENGTH &amp;&amp; i &lt;= (direction == HORIZONTAL ? p.x : p.y); i++) {
+ <span class="kw">if</span>(direction == HORIZONTAL) {
+ p2.x = p.x - i;
+ p2.y = p.y;
+ } <span class="kw">else</span> {
+ p2.x = p.x;
+ p2.y = p.y - i;
+ }
+
+ prev = fieldShipAt(f, p2);
+
+ <span class="kw">if</span>(prev) {
+ <span class="co">/* if we sink anybody, it will be this ship */</span>
+ <span class="co">/* but maybe it doesn't reach */</span>
+ <span class="co">/* or points in the wrong direction */</span>
+ <span class="kw">if</span>((*prev)-&gt;ship.length &gt; i &amp;&amp; (*prev)-&gt;ship.direction == direction) {
+ <span class="co">/* got it */</span>
+ freeMe = *prev;
+ *prev = freeMe-&gt;next;
+
+ name = freeMe-&gt;ship.name;
+ free(freeMe);
+
+ f-&gt;occupancy--;
+
+ <span class="kw">return</span> name;
+ } <span class="kw">else</span> {
+ <span class="co">/* didn't get it */</span>
+ <span class="co">/* maybe try again in other direction */</span>
+ <span class="kw">break</span>;
+ }
+ }
+ }
+ }
+
+ <span class="co">/* didn't get anything */</span>
+ <span class="kw">return</span> NO_SHIP_NAME;
+}
+
+<span class="co">/*</span>
+<span class="co"> * Return number of ships in the field.</span>
+<span class="co"> */</span>
+size_t
+fieldCountShips(<span class="dt">const</span> <span class="kw">struct</span> field *f)
+{
+ <span class="kw">return</span> f-&gt;occupancy;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/6/ships.c" class="uri">examples/2015/hw/6/ships.c</a>
+</div>
+<h2 id="hw7"><span class="header-section-number">8.7</span> Assignment 7, due Wednesday 2015-04-01, at 11:00pm</h2>
+<h3 id="solitaire-with-big-cards"><span class="header-section-number">8.7.1</span> Solitaire with big cards</h3>
+<p>For this assignment you are to implement a strategy for playing a card game involving moving cards (represented by <code>uint64_t</code>s) down through a sequence of <span class="math inline"><em>n</em></span> piles. The interface to your strategy is given in the file <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/7/strategy.h"><code>strategy.h</code></a>, shown below:</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Interface for card-playing strategy.</span>
+<span class="co"> *</span>
+<span class="co"> * The deal function supplies a new card to the strategy. Each possible card will only be dealt once.</span>
+<span class="co"> *</span>
+<span class="co"> * The play function should return a card that has been dealt previously but not yet played.</span>
+<span class="co"> * If asked for a card when the hand is empty, its behavior is undefined.</span>
+<span class="co"> */</span>
+
+<span class="ot">#include &lt;stdint.h&gt;</span>
+
+<span class="kw">typedef</span> <span class="dt">uint64_t</span> Card; <span class="co">/* representation of a card */</span>
+
+<span class="co">/* opaque type for strategy data */</span>
+<span class="kw">typedef</span> <span class="kw">struct</span> strategy Strategy;
+
+<span class="co">/* set up a new strategy for numPiles many piles */</span>
+Strategy *strategyCreate(<span class="dt">int</span> numPiles);
+
+<span class="co">/* clean up all space used by a strategy */</span>
+<span class="dt">void</span> strategyDestroy(Strategy *);
+
+<span class="co">/* add a card to the current hand */</span>
+<span class="dt">void</span> strategyDeal(Strategy *, Card);
+
+<span class="co">/* play a card from pile k */</span>
+Card strategyPlay(Strategy *, <span class="dt">int</span> k);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/7/strategy.h" class="uri">examples/2015/hw/7/strategy.h</a>
+</div>
+<p>Initially, the player has <span class="math inline"><em>n</em></span> piles, numbered <span class="math inline">1</span> through <span class="math inline"><em>n</em></span>. The <code>strategyDeal</code> function is called to indicate that a new card has been dealt to pile <span class="math inline"><em>n</em></span>. The <code>strategyPlay</code> function is called to indicate that a card should be moved from pile <code>k</code> to pile <code>k-1</code>; this function should return the card to move. Cards moved to pile <span class="math inline">0</span>
+ leave the game and are not used again. Each card is unique: once a card
+ is dealt, the same card will never be dealt again during the same play
+of the game.</p>
+<p>The choice of when to deal and when to play from pile is controlled
+by some external entity, which at some point will stop and compute the
+smallest card in each pile. The goal of the strategy is to make these
+smallest cards be as large as possible, giving priority to the
+highest-numbered piles: given two runs of the game, the better-scoring
+one is the one that has the larger smallest card in pile <span class="math inline"><em>n</em></span>, or, if both have the same smallest card in pile <span class="math inline"><em>n</em></span>, the one that has the larger smallest card in pile <span class="math inline"><em>n</em> − 1</span>, and so forth. A tie would require that both runs end with the same smallest card in every pile. An empty pile counts as <code>UINT64_MAX</code> for this purpose (although note that a strategy has no control over which piles are empty).</p>
+<p>Your job is to implement a strategy that produces the best possible result for any sequence of calls to <code>strategyDeal</code> and <code>strategyPlay</code>.
+ Your strategy implementation will most likely need to keep track of
+which cards are available in each pile, as this information is not
+provided by the caller. Your <code>strategyPlay</code> function should
+only make legal moves: that is, it should only play cards that are
+actually present in the appropriate pile. You may assume that <code>strategyPlay</code> is never called on an empty pile.</p>
+<p>Your implementation should consist of a file <code>strategy.c</code> and any supporting source and header files that you need other than <code>strategy.h</code>, which we have provided for you. You should also supply a file <code>Makefile</code> that generates a program <code>testStrategy</code> when <code>make</code> is called with no arguments, using your implementation and the <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/7/testStrategy.c"><code>testStrategy.c</code></a> file that you can find in <code>/c/cs223/Hwk7/sourceFiles/testStrategy.c</code>.</p>
+<h3 id="explanation-of-the-testing-program"><span class="header-section-number">8.7.2</span> Explanation of the testing program</h3>
+<p>The <code>testStrategy</code> program implements one of four rules for when you can play from each pile. The arguments to <code>testStrategy</code>
+ are a character indicating which rule to apply, the number of cards to
+deal (which can be pretty big), and the number of piles (which is much
+more limited, because <code>testStrategy.c</code> tracks the pile each card is in using a <code>char</code>
+ to save space). The actual cards dealt are generated deterministically
+and will be the same in every execution with the same arguments. The
+test files in <code>/c/cs223/Hwk7/testFiles</code> give the expected output when <code>testStrategy</code> is run with the arguments specified in the filename (after removing the <code>-</code> characters); this will always be the value, in hexadecimal, of the smallest card in each pile, starting with the top pile.</p>
+<p>For example, running the <em>harmonic</em> rule <code>h</code> with 1000 cards and 4 piles (not counting the 0 pile) gives the output</p>
+<pre><code>$ ./testStrategy h 1000 4
+5462035faf0d6fa1
+501ebb6268d39af3
+25732b5fee7c8ad7
+301e0f608d124ede</code></pre>
+<p>This output would appear in a filename <code>h-1000-4</code>, if this particular combination of parameters were one of the test cases.</p>
+<h3 id="submitting-your-assignment-6"><span class="header-section-number">8.7.3</span> Submitting your assignment</h3>
+<p>Submit your assignment as usual with <code>/c/cs223/bin/submit 7</code>. You should submit your source file(s), your <code>Makefile</code>, and any other files needed to build your program other than <code>strategy.h</code> and <code>testStrategy.c</code>, which will be supplied by the test script. You can test your submission using the public test script in <code>/c/cs223/Hwk7/test.public</code> using the command <code>/c/cs223/bin/testit 7 public</code>.</p>
+<h3 id="hw7Solution"><span class="header-section-number">8.7.4</span> Sample solution</h3>
+<p>I implemented a heap with <code>uint64_t</code> elements as a separate module (<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/7/heap.h">heap.h</a>, <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/7/heap.c">heap.c</a>) and then used in in a main module <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/7/strategy.c">strategy.c</a> that allocates a separate heap for each pile and manages the translation between <code>strategyDeal</code> and <code>strategyPlay</code> and the heap functions. The <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/7/Makefile">Makefile</a> is pretty much the usual.</p>
+<h2 id="hw8"><span class="header-section-number">8.8</span> Assignment 8, due Wednesday 2015-04-08, at 11:00pm</h2>
+<h3 id="an-ordered-set"><span class="header-section-number">8.8.1</span> An ordered set</h3>
+<p>For this assignment, you are to implement an ordered set data type
+for holding null-terminated strings. The interface to this data type is
+given in the file <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/8/orderedSet.h">orderedSet.h</a>, shown below.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="co">/*</span>
+<span class="co"> * Ordered set data structure.</span>
+<span class="co"> */</span>
+
+<span class="co">/* Make a new empty set */</span>
+<span class="kw">struct</span> orderedSet *orderedSetCreate(<span class="dt">void</span>);
+
+<span class="co">/* Destroy a set */</span>
+<span class="dt">void</span> orderedSetDestroy(<span class="kw">struct</span> orderedSet *);
+
+<span class="co">/* How many elements in this set? */</span>
+size_t orderedSetSize(<span class="dt">const</span> <span class="kw">struct</span> orderedSet *);
+
+<span class="co">/* Insert a new element. Has no effect if element is already present. */</span>
+<span class="dt">void</span> orderedSetInsert(<span class="kw">struct</span> orderedSet *, <span class="dt">const</span> <span class="dt">char</span> *);
+
+<span class="co">/* Delete an element. Has no effect if element is not already present. */</span>
+<span class="dt">void</span> orderedSetDelete(<span class="kw">struct</span> orderedSet *, <span class="dt">const</span> <span class="dt">char</span> *);
+
+<span class="co">/* Return a new ordered set containing all elements e</span>
+<span class="co"> * for which predicate(arg, x) != 0.</span>
+<span class="co"> * The predicate function should be applied to the elements in increasing order. */</span>
+<span class="kw">struct</span> orderedSet *orderedSetFilter(<span class="dt">const</span> <span class="kw">struct</span> orderedSet *, <span class="dt">int</span> (*predicate)(<span class="dt">void</span> *arg, <span class="dt">const</span> <span class="dt">char</span> *), <span class="dt">void</span> *arg);</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/8/orderedSet.h" class="uri">examples/2015/hw/8/orderedSet.h</a>
+</div>
+<p>In addition to the usual create and destroy functions, an ordered set
+ supports inserting and deleting elements, counting the number of
+distinct elements in the set, and filtering the set based on a predicate
+ function passed in as an argument. This filtering operation does not
+modify the input set, but instead generates a new ordered set containing
+ only those elements on which the predicate returns a nonzero value.</p>
+<p>The filtering operation is where most of the excitement happens; because the predicate function takes an argument of type <code>void *</code>
+ that is also passed to the filter function, it is possible for the
+predicate to compute an arbitrary function on the elements of the set as
+ a side-effect of processing each element to decide whether to put it in
+ the output set. This allows predicates to be abused to perform all
+sorts of computations, including printing out all elements of the set or
+ computing a hash of all the strings in the set concatenated together.
+These features are used by the test program <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/8">testOrderedSet.c</a>
+ that we have provided. To ensure that these traversals give consistent
+results, it is required that when your implementation of <code>orderedSetFilter</code> is executed, it calls the predicate function exactly once on each element of the set in increasing order as determined by <code>strcmp</code>.</p>
+<h3 id="the-testorderedset-wrapper"><span class="header-section-number">8.8.2</span> The <code>testOrderedSet</code> wrapper</h3>
+<p>The test program is a fairly thin wrapper over the implementation
+that allows you to call the various functions using one-line commands on
+ standard input. A command is given as the first character of the line,
+and the rest of the line contains the argument to the command if needed.
+ The <code>+</code> and <code>-</code> commands add or remove an element from the set, respectively, while the <code>p</code>, <code>s</code>, and <code>h</code> commands print the contents of the set, the size of the set, and a hash of the set (these commands ignore any argument). The <code>f</code> command removes all elements of the set that do not contain a particular substring.</p>
+<p>Here is a simple input to the program that inserts four strings, filters out the ones that don't contain <code>ee</code>, then prints various information about the results.</p>
+<pre><code>+feed
++the
++bees
++please
+fee
+s
+h
+p</code></pre>
+<p>This should produce the output</p>
+<pre><code>2
+15082778b3db8cb3
+bees
+feed</code></pre>
+<h3 id="hw8submission"><span class="header-section-number">8.8.3</span> Submitting your assignment</h3>
+<p>Submit, with the usual <code>/c/cs223/bin/submit 8 filename</code>, your <code>Makefile</code> and any supporting files needed to build the program <code>testOrderedSet</code> from <code>testOrderedSet.c</code> and <code>orderedSet.h</code> when <code>make</code> is called with no arguments. These last two files will be provided by the test script and you do not need to submit them.</p>
+<p>You can test your submission against the public test script in <code>/c/cs223/Hwk8/test.public</code> with <code>/c/cs223/bin/testit 8</code>.</p>
+<h3 id="hw8Solution"><span class="header-section-number">8.8.4</span> Sample solution</h3>
+<p>There were a lot of ways to do this. For the sample solution, I
+decided to do something unusual, and store the set as a hash table. This
+ is not ordered, but since the only operation that requires the set to
+be ordered is <code>orderedSetFilter</code>, which will take <span class="math inline"><em>Ω</em>(<em>n</em>)</span> time no matter how you implement it, the <span class="math inline"><em>O</em>(<em>n</em>log<em>n</em>)</span> cost to call <code>qsort</code> to sort the elements as needed does not add much overhead.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;stdint.h&gt;</span>
+<span class="ot">#include &lt;string.h&gt;</span>
+
+<span class="ot">#include "orderedSet.h"</span>
+
+<span class="co">/* We'll use a hash table with linear probing.</span>
+<span class="co"> * This is not actually ordered, but the only operations that</span>
+<span class="co"> * depend on order a linear-time anyway, so we can afford to sort as needed */</span>
+<span class="kw">struct</span> orderedSet {
+ size_t n; <span class="co">/* number of elements */</span>
+ size_t size; <span class="co">/* size of the table */</span>
+ <span class="dt">char</span> **table; <span class="co">/* hash table */</span>
+};
+
+<span class="ot">#define INITIAL_SIZE (16)</span>
+<span class="ot">#define MAX_ALPHA (0.75)</span>
+
+<span class="co">/* Make a new empty set with given size */</span>
+<span class="dt">static</span> <span class="kw">struct</span> orderedSet *
+orderedSetCreateInternal(size_t size)
+{
+ <span class="kw">struct</span> orderedSet *s;
+
+ s = malloc(<span class="kw">sizeof</span>(*s));
+ assert(s);
+
+ s-&gt;n = <span class="dv">0</span>;
+ s-&gt;size = size;
+ s-&gt;table = calloc(s-&gt;size, <span class="kw">sizeof</span>(<span class="dt">char</span> *));
+
+ <span class="kw">return</span> s;
+}
+
+<span class="kw">struct</span> orderedSet *
+orderedSetCreate(<span class="dt">void</span>)
+{
+ <span class="kw">return</span> orderedSetCreateInternal(INITIAL_SIZE);
+}
+
+<span class="co">/* Destroy a set */</span>
+<span class="dt">void</span>
+orderedSetDestroy(<span class="kw">struct</span> orderedSet *s)
+{
+ size_t i;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; s-&gt;size; i++) {
+ <span class="kw">if</span>(s-&gt;table[i]) {
+ free(s-&gt;table[i]);
+ }
+ }
+
+ free(s-&gt;table);
+ free(s);
+}
+
+<span class="co">/* How many elements in this set? */</span>
+size_t
+orderedSetSize(<span class="dt">const</span> <span class="kw">struct</span> orderedSet *s)
+{
+ <span class="kw">return</span> s-&gt;n;
+}
+
+<span class="dt">static</span> size_t
+hash(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ size_t h;
+
+ <span class="co">/* usual crummy hash function */</span>
+ <span class="kw">for</span>(h = <span class="dv">0</span>; *s; h = h * <span class="dv">97</span> + *s++);
+
+ <span class="kw">return</span> h;
+}
+
+<span class="dt">static</span> <span class="dt">char</span> *
+strMalloc(<span class="dt">const</span> <span class="dt">char</span> *s)
+{
+ <span class="dt">char</span> *s2;
+
+ s2 = malloc(strlen(s)+<span class="dv">1</span>);
+ strcpy(s2, s);
+
+ <span class="kw">return</span> s2;
+}
+
+<span class="co">/* Insert and element without doing size check or malloc */</span>
+<span class="co">/* Frees element if already present */</span>
+<span class="dt">static</span> <span class="dt">void</span>
+orderedSetInsertInternal(<span class="kw">struct</span> orderedSet *s, <span class="dt">char</span> *elt)
+{
+ size_t h;
+
+ assert(elt);
+
+ <span class="co">/* skip over non-empty slots with different values */</span>
+ <span class="kw">for</span>(h = hash(elt) % s-&gt;size; s-&gt;table[h] &amp;&amp; strcmp(s-&gt;table[h], elt); h = (h<span class="dv">+1</span>) % s-&gt;size);
+
+ <span class="co">/* check if not already present */</span>
+ <span class="kw">if</span>(s-&gt;table[h] == <span class="dv">0</span>) {
+ s-&gt;table[h] = elt;
+ s-&gt;n++;
+ } <span class="kw">else</span> {
+ free(elt);
+ }
+}
+
+<span class="co">/* Insert a new element. Has no effect if element is already present. */</span>
+<span class="dt">void</span>
+orderedSetInsert(<span class="kw">struct</span> orderedSet *s, <span class="dt">const</span> <span class="dt">char</span> *elt)
+{
+ size_t h;
+ <span class="kw">struct</span> orderedSet *s2;
+
+ <span class="kw">if</span>(s-&gt;n &gt;= s-&gt;size * MAX_ALPHA) {
+ <span class="co">/* rebuild the table */</span>
+ s2 = orderedSetCreateInternal(s-&gt;size * <span class="dv">2</span>);
+
+ <span class="co">/* copy all the elements */</span>
+ <span class="kw">for</span>(h = <span class="dv">0</span>; h &lt; s-&gt;size; h++) {
+ <span class="kw">if</span>(s-&gt;table[h]) {
+ orderedSetInsertInternal(s2, s-&gt;table[h]);
+ }
+ }
+
+ <span class="co">/* free the table and then do a brain transplant */</span>
+ free(s-&gt;table);
+ *s = *s2;
+ free(s2);
+ }
+
+ orderedSetInsertInternal(s, strMalloc(elt));
+}
+
+<span class="co">/* Delete an element. Has no effect if element is not already present. */</span>
+<span class="dt">void</span>
+orderedSetDelete(<span class="kw">struct</span> orderedSet *s, <span class="dt">const</span> <span class="dt">char</span> *elt)
+{
+ size_t h;
+ <span class="dt">char</span> *later;
+
+ <span class="co">/* skip over non-empty slots with different values */</span>
+ <span class="kw">for</span>(h = hash(elt) % s-&gt;size; s-&gt;table[h] &amp;&amp; strcmp(s-&gt;table[h], elt); h = (h<span class="dv">+1</span>) % s-&gt;size);
+
+ <span class="co">/* if we reached a nonempty slot, it must be our target */</span>
+ <span class="kw">if</span>(s-&gt;table[h] != <span class="dv">0</span>) {
+ <span class="co">/* remove the initial element */</span>
+ free(s-&gt;table[h]);
+ s-&gt;table[h] = <span class="dv">0</span>;
+ s-&gt;n--;
+
+ <span class="co">/* remove and reinsert any elements up to the next hole, in case they wanted to be earlier */</span>
+ <span class="kw">for</span>(h = (h<span class="dv">+1</span>) % s-&gt;size; s-&gt;table[h] ; h = (h<span class="dv">+1</span>) % s-&gt;size) {
+ later = s-&gt;table[h];
+ s-&gt;table[h] = <span class="dv">0</span>;
+ s-&gt;n--;
+ orderedSetInsertInternal(s, later);
+ }
+ }
+}
+
+<span class="dt">static</span> <span class="dt">int</span>
+compare(<span class="dt">const</span> <span class="dt">void</span> *s1, <span class="dt">const</span> <span class="dt">void</span> *s2)
+{
+ <span class="kw">return</span> strcmp(*((<span class="dt">const</span> <span class="dt">char</span> **) s1), *((<span class="dt">const</span> <span class="dt">char</span> **) s2));
+}
+
+<span class="co">/* Return a new ordered set containing all elements e</span>
+<span class="co"> * for which predicate(arg, x) != 0.</span>
+<span class="co"> * The predicate function should be applied to the elements in increasing order. */</span>
+<span class="kw">struct</span> orderedSet *
+orderedSetFilter(<span class="dt">const</span> <span class="kw">struct</span> orderedSet *s, <span class="dt">int</span> (*predicate)(<span class="dt">void</span> *arg, <span class="dt">const</span> <span class="dt">char</span> *), <span class="dt">void</span> *arg)
+{
+ size_t h;
+ <span class="dt">const</span> <span class="dt">char</span> **a; <span class="co">/* temporary array to sort */</span>
+ size_t top; <span class="co">/* where to put things in a */</span>
+ size_t i;
+ <span class="kw">struct</span> orderedSet *s2;
+
+ a = malloc(<span class="kw">sizeof</span>(<span class="dt">const</span> <span class="dt">char</span> *) * s-&gt;size);
+ assert(a);
+
+ top = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(h = <span class="dv">0</span>; h &lt; s-&gt;size; h++) {
+ <span class="kw">if</span>(s-&gt;table[h]) {
+ a[top++] = s-&gt;table[h];
+ }
+ }
+
+ qsort(a, top, <span class="kw">sizeof</span>(<span class="dt">const</span> <span class="dt">char</span> *), compare);
+
+ s2 = orderedSetCreate();
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; top; i++) {
+ <span class="kw">if</span>(predicate(arg, a[i])) {
+ orderedSetInsert(s2, a[i]);
+ }
+ }
+
+ free(a);
+
+ <span class="kw">return</span> s2;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/8/orderedSet.c" class="uri">examples/2015/hw/8/orderedSet.c</a>
+</div>
+<p><a href="#Makefile-1">Makefile</a>{examples/2015/hw/8/Makefile}</p>
+<h2 id="hw9"><span class="header-section-number">8.9</span> Assignment 9, due Wednesday 2015-04-15, at 11:00pm</h2>
+<h3 id="finding-a-cycle-in-a-maze"><span class="header-section-number">8.9.1</span> Finding a cycle in a maze</h3>
+<p>For this problem, you are given a rectangular maze consisting of <em>wall</em> squares (represented by 0) and <em>path</em>
+ squares (represented by 1). Two path squares are considered to be
+adjacent if they are at most one square away orthogonally or diagonally;
+ in chess terms, two path squares are adjacent if a king can move from
+one to the other in one turn. The input to your program is a maze in
+which the graph consisting of all path squares is connected and contains
+ at most one cycle, where a cycle is a sequence of distinct squares <span class="math inline"><em>s</em><sub>1</sub>, <em>s</em><sub>2</sub>, …, <em>s</em><sub><em>k</em></sub></span> where each <span class="math inline"><em>s</em><sub><em>i</em></sub></span> is adjacent to <span class="math inline"><em>s</em><sub><em>i</em> + 1</sub></span> and <span class="math inline"><em>s</em><sub><em>n</em></sub></span> is adjacent to <span class="math inline"><em>s</em><sub>1</sub></span>. Your job is to write a program <code>maze</code> that finds this cycle if it exists, and marks all of its squares as <em>cycle</em> squares (represented by 2).</p>
+<p>For example, here is a picture of a 200-by-100 maze that contains a small cycle:</p>
+<div class="figure">
+<img src="" alt="200 by 100 maze">
+<p class="caption">200 by 100 maze</p>
+</div>
+<p>and here is the same maze with the cycle highlighted in white:</p>
+<div class="figure">
+<img src="" alt="200 by 100 maze, showing cycle">
+<p class="caption">200 by 100 maze, showing cycle</p>
+</div>
+<h3 id="input-and-output-format"><span class="header-section-number">8.9.2</span> Input and output format</h3>
+<p>The input to your program should be taken from <code>stdin</code>, in a restricted version of <a href="http://netpbm.sourceforge.net/doc/pgm.html">raw PGM format</a>,
+ an old image format designed to be particularly easy to parse. The
+input file header will be a line that looks like it was generated by the
+ <code>printf</code> conversion string <code>"P5 %d %d 255\n"</code>, where the first <code>int</code>
+ value is the width of the image in columns and the second is the height
+ of the image in rows; the same conversion string can be given to <code>scanf</code>
+ to parse this line. Following the newline will be a long sequence of
+bytes, each representing one pixel of the image, with each row following
+ immediately after the previous one. These bytes will be either 0 or 1
+depending on whether that position in the maze is a wall or a path.</p>
+<p>The output to your program should be in the same format, with the
+difference that now some of the bytes in the image data may be 2,
+indicating the cycle. If there is no cycle, the output should be
+identical to the input. Your program is not required to detect or
+respond in any particular way to input mazes that violate the format or
+do not contain a connected graph of path squares, although you are
+encouraged to put in reasonable error checking for your own benefit
+during testing.</p>
+<p>For example, the maze depicted above is stored in the file <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/9/200-100-4.in.pgm">200-100-4.in.pgm</a>; the corresponding output is stored in the file <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/9/200-100-4.out.pgm">200-100-4.out.pgm</a>. Other sample inputs and outputs can be found in <code>/c/cs223/Hwk9/testFiles</code>.</p>
+<p>This file format is hard to read with the naked eye, even after loading into a text editor. The script <code>/c/cs223/Hwk9/toPng</code>
+ will generate a PNG file that doubles the pixel size and rescales the
+0, 1, 2 pixel values to more reasonable values for display. This can be
+called as <code>/c/cs223/Hwk9/toPng filename.pgm</code> to produce a new file <code>filename.pgm.png</code>. This works best if <code>filename.pgm</code> is already in a directory you can write to. PNG files can be displayed using most web browsers and image manipulation tools.</p>
+<h3 id="submitting-and-testing-your-program"><span class="header-section-number">8.9.3</span> Submitting and testing your program</h3>
+<p>Submit whatever files you need to build <code>maze</code> (including a <code>Makefile</code> that generates <code>maze</code> when called with no arguments) using <code>/c/cs223/bin/submit 9</code>. You can apply the public test script in <code>/c/cs223/Hwk9/test.public</code> to your submitted files using <code>/c/cs223/bin/testit 9 public</code>.</p>
+<h3 id="hw9solution"><span class="header-section-number">8.9.4</span> Sample solution</h3>
+<p>This uses breadth-first search, which makes the search a bit simpler
+than depth-first search but requires some more effort to compute the
+cycle. The program also includes code for generating random mazes.</p>
+<div>
+<div class="sourceCode"><pre class="sourceCode c"><code class="sourceCode c"><span class="ot">#include &lt;stdio.h&gt;</span>
+<span class="ot">#include &lt;stdlib.h&gt;</span>
+<span class="ot">#include &lt;assert.h&gt;</span>
+<span class="ot">#include &lt;math.h&gt;</span>
+<span class="ot">#include &lt;limits.h&gt;</span>
+
+<span class="kw">struct</span> direction {
+ <span class="dt">signed</span> <span class="dt">char</span> x;
+ <span class="dt">signed</span> <span class="dt">char</span> y;
+};
+
+<span class="ot">#define DIRECTIONS (8)</span>
+
+<span class="dt">const</span> <span class="kw">struct</span> direction directions[DIRECTIONS] = {
+ { -<span class="dv">1</span>, -<span class="dv">1</span> },
+ { -<span class="dv">1</span>, <span class="dv">0</span> },
+ { -<span class="dv">1</span>, <span class="dv">1</span> },
+ { <span class="dv">0</span>, -<span class="dv">1</span> },
+ { <span class="dv">0</span>, <span class="dv">1</span> },
+ { <span class="dv">1</span>, -<span class="dv">1</span> },
+ { <span class="dv">1</span>, <span class="dv">0</span> },
+ { <span class="dv">1</span>, <span class="dv">1</span> }
+};
+
+<span class="kw">struct</span> position {
+ <span class="dt">int</span> x;
+ <span class="dt">int</span> y;
+};
+
+<span class="dt">const</span> <span class="kw">struct</span> position NO_POSITION = { -<span class="dv">1</span>, -<span class="dv">1</span> };
+
+<span class="dt">static</span> <span class="kw">inline</span> <span class="dt">int</span>
+eqPosition(<span class="kw">struct</span> position p, <span class="kw">struct</span> position q)
+{
+ <span class="kw">return</span> p.x == q.x &amp;&amp; p.y == q.y;
+}
+
+<span class="ot">#define WALL (0)</span>
+<span class="ot">#define PATH (1)</span>
+<span class="ot">#define CYCLE (2)</span>
+
+<span class="kw">struct</span> square {
+ <span class="dt">int</span> contents;
+ <span class="kw">struct</span> position parent; <span class="co">/* used by search routine */</span>
+};
+
+<span class="kw">struct</span> maze {
+ <span class="kw">struct</span> position size; <span class="co">/* rows = size.x, columns = size.y */</span>
+ <span class="kw">struct</span> square *a; <span class="co">/* packed array of squares */</span>
+};
+
+<span class="co">/* look up a position in a maze */</span>
+<span class="ot">#define Mref(m, pos) ((m)-&gt;a[(pos).y * (m)-&gt;size.x + (pos).x])</span>
+<span class="ot">#define Mget(m, pos) (assert((pos).x &gt;= 0 &amp;&amp; (pos).y &gt;= 0 &amp;&amp; (pos).x &lt; (m)-&gt;size.x &amp;&amp; (pos).y &lt; (m)-&gt;size.y), Mref((m), (pos)))</span>
+
+<span class="co">/* add direction to source to get target */</span>
+<span class="co">/* returns 1 if target is in range */</span>
+<span class="dt">int</span>
+offset(<span class="dt">const</span> <span class="kw">struct</span> maze *m, <span class="kw">struct</span> position *target, <span class="kw">struct</span> position source, <span class="kw">struct</span> direction dir)
+{
+ target-&gt;x = source.x + dir.x;
+ target-&gt;y = source.y + dir.y;
+
+ <span class="kw">return</span> target-&gt;x &gt;= <span class="dv">0</span> &amp;&amp; target-&gt;y &gt;= <span class="dv">0</span> &amp;&amp; target-&gt;x &lt; m-&gt;size.x &amp;&amp; target-&gt;y &lt; m-&gt;size.y;
+}
+
+<span class="co">/* free a maze */</span>
+<span class="dt">void</span>
+destroyMaze(<span class="kw">struct</span> maze *m)
+{
+ free(m-&gt;a);
+ free(m);
+}
+
+<span class="co">/* load a maze in restricted PGM format */</span>
+<span class="kw">struct</span> maze *
+loadMaze(FILE *f)
+{
+ <span class="kw">struct</span> maze *m;
+ <span class="kw">struct</span> position i;
+
+ m = malloc(<span class="kw">sizeof</span>(*m));
+ assert(m);
+
+ fscanf(f, <span class="st">"P5 %d %d 255</span><span class="ch">\n</span><span class="st">"</span>, &amp;m-&gt;size.x, &amp;m-&gt;size.y);
+
+ m-&gt;a = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> square) * m-&gt;size.y * m-&gt;size.x);
+
+ <span class="kw">for</span>(i.y = <span class="dv">0</span>; i.y &lt; m-&gt;size.y; i.y++) {
+ <span class="kw">for</span>(i.x = <span class="dv">0</span>; i.x &lt; m-&gt;size.x; i.x++) {
+ Mref(m, i).contents = getchar();
+ assert(Mref(m, i).contents == <span class="dv">0</span> || Mref(m, i).contents == <span class="dv">1</span>);
+ }
+ }
+
+ <span class="kw">return</span> m;
+}
+
+<span class="dt">void</span>
+saveMaze(<span class="kw">struct</span> maze *m, FILE *f)
+{
+ <span class="kw">struct</span> position i;
+
+ fprintf(f, <span class="st">"P5 %d %d 255</span><span class="ch">\n</span><span class="st">"</span>, m-&gt;size.x, m-&gt;size.y);
+
+ <span class="kw">for</span>(i.y = <span class="dv">0</span>; i.y &lt; m-&gt;size.y; i.y++) {
+ <span class="kw">for</span>(i.x = <span class="dv">0</span>; i.x &lt; m-&gt;size.x; i.x++) {
+ putc(Mref(m, i).contents, f);
+ }
+ }
+}
+
+<span class="co">/* how many neighbors of position are PATH? */</span>
+<span class="dt">int</span>
+countNeighbors(<span class="dt">const</span> <span class="kw">struct</span> maze *m, <span class="kw">struct</span> position p)
+{
+ <span class="kw">struct</span> position q;
+ <span class="dt">int</span> i;
+ <span class="dt">int</span> count = <span class="dv">0</span>;
+
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; DIRECTIONS; i++) {
+ <span class="kw">if</span>(offset(m, &amp;q, p, directions[i]) &amp;&amp; Mget(m, q).contents == PATH) {
+ count++;
+ }
+ }
+
+ <span class="kw">return</span> count;
+}
+
+<span class="kw">struct</span> position
+randomPosition(<span class="dt">const</span> <span class="kw">struct</span> maze *m)
+{
+ <span class="kw">struct</span> position r;
+
+ r.x = rand() % m-&gt;size.x;
+ r.y = rand() % m-&gt;size.y;
+
+ <span class="kw">return</span> r;
+}
+
+<span class="ot">#define PATIENCE_MULTIPLIER (4)</span>
+
+<span class="co">/* generate a random connected maze with no cycles */</span>
+<span class="kw">struct</span> maze *
+generateMaze(<span class="kw">struct</span> position size)
+{
+ <span class="kw">struct</span> maze *m;
+ <span class="kw">struct</span> position r;
+ <span class="kw">struct</span> position i;
+ size_t countdown; <span class="co">/* how long to run before we get tired of not making progress */</span>
+ size_t maxCountdown; <span class="co">/* value to reset countdown to when we make progress */</span>
+
+ m = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> maze));
+ assert(m);
+
+ m-&gt;size = size;
+ m-&gt;a = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> square) * m-&gt;size.x * m-&gt;size.y);
+ assert(m-&gt;a);
+
+ <span class="co">/* start with all WALL */</span>
+ <span class="kw">for</span>(i.y = <span class="dv">0</span>; i.y &lt; m-&gt;size.y; i.y++) {
+ <span class="kw">for</span>(i.x = <span class="dv">0</span>; i.x &lt; m-&gt;size.x; i.x++) {
+ Mref(m, i).contents = WALL;
+ }
+ }
+
+ <span class="co">/* place a PATH on a random square */</span>
+ r = randomPosition(m);
+ Mref(m, r).contents = PATH;
+
+ maxCountdown = PATIENCE_MULTIPLIER * size.x * size.y * log(size.x * size.y);
+
+ <span class="kw">for</span>(countdown = maxCountdown; countdown &gt; <span class="dv">0</span>; countdown--) {
+ <span class="co">/* pick a random square */</span>
+ r = randomPosition(m);
+
+ <span class="co">/* add if we have exactly one neighbor already in the maze */</span>
+ <span class="kw">if</span>(Mget(m, r).contents == WALL &amp;&amp; countNeighbors(m, r) == <span class="dv">1</span>) {
+ Mref(m, r).contents = PATH;
+
+ <span class="co">/* reset countdown */</span>
+ countdown = maxCountdown;
+ }
+ }
+
+ <span class="kw">return</span> m;
+}
+
+<span class="co">/* create a cycle by adding one extra PATH square</span>
+<span class="co"> * that connects two existing squares */</span>
+<span class="dt">void</span>
+mazeAddCycle(<span class="kw">struct</span> maze *m)
+{
+ <span class="kw">struct</span> position r;
+
+ <span class="kw">do</span> {
+ r = randomPosition(m);
+ } <span class="kw">while</span>(Mget(m, r).contents != WALL || countNeighbors(m, r) != <span class="dv">2</span>);
+
+ Mref(m, r).contents = PATH;
+}
+
+<span class="co">/* Search for a cycle of PATH nodes.</span>
+<span class="co"> * If found, mark all nodes on the cycle as CYCLE. */</span>
+<span class="dt">void</span>
+mazeSearchForCycle(<span class="kw">struct</span> maze *m)
+{
+ <span class="kw">struct</span> position root; <span class="co">/* root of tree */</span>
+ <span class="kw">struct</span> position current; <span class="co">/* what we just popped */</span>
+ <span class="kw">struct</span> position parent ; <span class="co">/* current's parent */</span>
+ <span class="kw">struct</span> position neighbor; <span class="co">/* neighbor to push */</span>
+ <span class="kw">struct</span> position ancestor; <span class="co">/* for filling in CYCLE */</span>
+ <span class="dt">int</span> i;
+ <span class="kw">struct</span> position *queue;
+ size_t head; <span class="co">/* where to dequeue */</span>
+ size_t tail; <span class="co">/* where to enqueue */</span>
+
+ <span class="co">/* this is probably more space than we need */</span>
+ queue = malloc(<span class="kw">sizeof</span>(<span class="kw">struct</span> position) * m-&gt;size.x * m-&gt;size.y);
+ assert(queue);
+
+ head = tail = <span class="dv">0</span>;
+
+ <span class="co">/* clear out bookkeeping data */</span>
+ <span class="kw">for</span>(current.y = <span class="dv">0</span>; current.y &lt; m-&gt;size.y; current.y++) {
+ <span class="kw">for</span>(current.x = <span class="dv">0</span>; current.x &lt; m-&gt;size.x; current.x++) {
+ Mref(m, current).parent = NO_POSITION;
+
+ <span class="co">/* probably not necessary but will avoid trouble</span>
+<span class="co"> * if somebody calls this twice */</span>
+ <span class="kw">if</span>(Mget(m, current).contents != WALL) {
+ Mref(m, current).contents = PATH;
+ }
+ }
+ }
+
+ <span class="co">/* find a root */</span>
+ <span class="co">/* we don't care what this is, but it can't be a WALL */</span>
+ <span class="kw">do</span> {
+ root = randomPosition(m);
+ } <span class="kw">while</span>(Mget(m, root).contents != PATH);
+
+ <span class="co">/* push root */</span>
+ Mref(m, root).parent = root;
+ queue[tail++] = root;
+
+ <span class="co">/* now perform the BFS */</span>
+ <span class="co">/* if we ever find a neighbor that is already in the tree and not our parent,</span>
+<span class="co"> * we have found our cycle */</span>
+ <span class="kw">while</span>(head &lt; tail) {
+ current = queue[head++];
+ parent = Mget(m, current).parent;
+
+ <span class="co">/* push all neighbors not already in tree */</span>
+ <span class="co">/* if one is in the tree, we win */</span>
+ <span class="kw">for</span>(i = <span class="dv">0</span>; i &lt; DIRECTIONS; i++) {
+ <span class="kw">if</span>(offset(m, &amp;neighbor, current, directions[i]) &amp;&amp; Mget(m, neighbor).contents == PATH &amp;&amp; !eqPosition(neighbor, parent)) {
+ <span class="co">/* is it already in the tree? */</span>
+ <span class="kw">if</span>(!eqPosition(Mget(m, neighbor).parent, NO_POSITION)) {
+ <span class="co">/* we win */</span>
+ <span class="co">/* cycle consists of all ancestors of neighbor and current</span>
+<span class="co"> * up to common ancestor */</span>
+ <span class="kw">for</span>(ancestor = neighbor; !eqPosition(ancestor, root); ancestor = Mget(m, ancestor).parent) {
+ Mref(m, ancestor).contents = CYCLE;
+ }
+
+ <span class="co">/* also mark root */</span>
+ Mref(m, root).contents = CYCLE;
+
+ <span class="co">/* now work up from current */</span>
+ <span class="kw">for</span>(ancestor = current; !eqPosition(ancestor, root); ancestor = Mget(m, ancestor).parent) {
+ <span class="kw">if</span>(Mget(m, ancestor).contents == PATH) {
+ <span class="co">/* add to the cycle */</span>
+ Mref(m, ancestor).contents = CYCLE;
+ } <span class="kw">else</span> {
+ <span class="co">/* this is the common ancestor, which is not root */</span>
+ <span class="co">/* mark all proper ancestors as PATH */</span>
+ <span class="kw">do</span> {
+ ancestor = Mget(m, ancestor).parent;
+ Mref(m, ancestor).contents = PATH;
+ } <span class="kw">while</span>(!eqPosition(ancestor, root));
+
+ <span class="co">/* can't just break, too many loops */</span>
+ <span class="kw">goto</span> doneWithSearch;
+ }
+ }
+ } <span class="kw">else</span> {
+ Mref(m, neighbor).parent = current;
+ queue[tail++] = neighbor;
+ }
+ }
+ }
+ }
+
+doneWithSearch:
+ free(queue);
+}
+
+<span class="dt">int</span>
+main(<span class="dt">int</span> argc, <span class="dt">char</span> **argv)
+{
+ <span class="kw">struct</span> maze *m;
+ <span class="kw">struct</span> position size = { <span class="dv">80</span>, <span class="dv">60</span> };
+ <span class="dt">int</span> seed;
+
+ <span class="kw">switch</span>(argc) {
+ <span class="kw">case</span> <span class="dv">1</span>:
+ <span class="co">/* sample solution for the assignment */</span>
+ m = loadMaze(stdin);
+ mazeSearchForCycle(m);
+ saveMaze(m, stdout);
+ destroyMaze(m);
+ <span class="kw">break</span>;
+ <span class="kw">case</span> <span class="dv">4</span>:
+ <span class="co">/* generate a new test image */</span>
+ <span class="co">/* usage is ./maze width height seed */</span>
+ <span class="co">/* if seed is negative, use absolute value and don't put in cycle */</span>
+ size.x = atoi(argv[<span class="dv">1</span>]);
+ size.y = atoi(argv[<span class="dv">2</span>]);
+ seed = atoi(argv[<span class="dv">3</span>]);
+
+ srand(seed &lt; <span class="dv">0</span> ? -seed : seed);
+ m = generateMaze(size);
+ <span class="kw">if</span>(seed &gt;= <span class="dv">0</span>) { mazeAddCycle(m); }
+ saveMaze(m, stdout);
+ destroyMaze(m);
+ <span class="kw">break</span>;
+ <span class="kw">default</span>:
+ fprintf(stderr, <span class="st">"Usage %s or %s width height seed</span><span class="ch">\n</span><span class="st">"</span>, argv[<span class="dv">0</span>], argv[<span class="dv">0</span>]);
+ <span class="kw">return</span> <span class="dv">1</span>;
+ }
+
+ <span class="kw">return</span> <span class="dv">0</span>;
+}</code></pre></div>
+<a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/9/maze.c" class="uri">examples/2015/hw/9/maze.c</a>
+</div>
+<p>And the <a href="http://cs.yale.edu/homes/aspnes/classes/223/examples/2015/hw/9/Makefile">Makefile</a>.</p>
+<h1 id="codingHints"><span class="header-section-number">9</span> Common C coding and debugging issues</h1>
+<p>Here are some notes from a helpful Zoo denizen about debugging
+programs in 223. (Note: these have been edited slightly from the
+original.)</p>
+<pre><code>Date: Thu, 10 Feb 2005 06:02:23 -0500 (EST)
+From: James Terry &lt;james.c.terry@yale.edu&gt;
+Subject: 223 coding feedback
+
+Hi Jim,
+
+Several of your students for 223 were up late last night in the Zoo
+working on their assignments, and they seemed to be getting hung up on
+some coding issues. They were pretty frustrated with some standard
+language/debugging issues, so I helped them get the type-checker and
+Valgrind to stop yelling at them. I noticed some recurring problems and I
+thought I'd pass them on to you. They're pretty standard mistakes, and
+I've made most of them myself at some point, either in your class or in
+Stan's. It occurred to me that there might be more confused people than
+were around last night, and they'd probably appreciate it if someone told
+them about these sort of things. I'm not trying to intrude on how you
+teach the class; I just thought this feedback would be helpful and I
+wasn't sure that it would find its way to you otherwise. I'm sure you've
+already taught them several of these, and I understand that sometimes
+students just don't pay attention. Still, these seem like good points to
+hammer down:
+
+Recurring debugging/coding problems:
+
+1. If you want a debugger/Valgrind to give you line numbers, you must
+compile with debugging info turned on, i. e. using the -g[level] flag.
+2. On the Zoo, pointers and int's are 4 bytes; char's are 1. (Some
+people didn't seem to realize that a char* is 4 bytes rather than 1.)
+3. I think it would be helpful if you explained why, when using
+realloc(), it's a good idea to increase the allocated size
+multiplicatively rather than additively. Besides, everyone loves the
+"tearing down the hotel" metaphor. :)
+4. If they use call-by-reference, they had better make sure that they
+keep the same reference. So if they pass in a pointer as an argument to a
+function, they shouldn't call malloc() or realloc() on that function.
+(Mention the double pointer as another option.) Most people will make
+this mistake eventually if no one warns them about it. When I was
+learning C, I sort of viewed malloc() and realloc() as magical
+memory-increasing functions; that is to say, I didn't think very hard
+about the meaning of assigning a pointer to malloc()'s return value. I
+suspect some of your students would benefit from having the details
+spelled out. (Or spelled out again, if you've already done that.)
+5. It's possible to get through a lot (but not all) of the CS major
+without learning basic Unix shell syntax, but that's really just wasted
+time. Pipes, backgrounding, man, scp, and grep really help even at the
+intro level. I realize the purpose of the class isn't to teach Unix, but
+in past years I think there was a TA help session on these things. They
+don't need to know how to write their own Emacs modes, but the basics
+would definitely be helpful.
+6. malloc/free -- If Valgrind/gdb reports a problem inside of malloc() or
+free(), chances are that the student has *not* discovered a bug in gcc.
+(I just heard how one of Zhong's students' proved the correctness of the
+libraries for his thesis; that's pretty cool.) Explain why you can't
+malloc() twice on the same pointer. Explain how with multidimensional
+pointers, you must malloc/free each dimension separately. Drill down the
+one-to-one correspondence between malloc'ing and free'ing.
+7. Null characters: It's not obvious to newbies that some library functions
+require them, particularly null-terminated strings. Tell them that
+char*'s must be null terminated in order for &lt;string.h&gt;
+functions to work.
+8. Off-by-one errors: Tell people that when all else fails, take a hard
+look at their comparison operators; i. e. make sure that &gt; shouldn't
+really be a &gt;=.
+9. This is probably another thing for a help session or workshop, but I
+feel almost everyone could benefit from basic software engineering
+methodology. Stylistic awkwardness I noticed:
+ --Using a mess of if-then-else's instead of nested control
+structures.
+ --Using while-loops with iterators that get initialized right
+before the beginning of the loop and get incremented with each iteration,
+when they could be using for-loops.
+ --Doing the setup work for a loop right before the beginning of
+the loop and then at the end of every iteration, instead of at the
+beginning of every iteration. Conversely: doing the cleanup work at the
+beginning of every iteration and then after the loop has completed.
+10. Tell them to use assert(). (Frequently.) When you cover binary
+search, using placement of debugging statements in code in order to pin
+down an error might be an instructive example.
+11. Tell them to use either printf statements or a debugger to debug. I
+think they can figure out how to do this on their own, they just need to
+be told it's a good idea.
+
+Hopefully some of these suggestions will be helpful. I won't be offended
+if you don't pass them on to your students, and I understand if you put a
+higher teaching priority on non-coding subjects, but all the things I
+mentioned were things I wish someone had told me. I'd be happy to run a
+help session on this stuff if you and the TAs are too busy, but otherwise
+I wouldn't presume.
+
+Best,
+Jim</code></pre>
+<div class="footnotes">
+<hr>
+<ol>
+<li id="fn1"><p>I would like to thank David Galles for making this site available and Xiao Shi for pointing me to it.<a href="#fnref1">↩</a></p></li>
+<li id="fn2"><p>Note that because each row is in the same <code>malloc</code>-ed block as its adjacent rows, <code>valgrind</code> will not detect if you run off the end of a row in this implementation.<a href="#fnref2">↩</a></p></li>
+<li id="fn3"><p>The compiler is GCC version 4.8.2-19ubuntu1 running on a
+ Linux 3.13.0-44-generic kernel running inside VirtualBox on a Windows
+8.1 machine with a 3.30-Ghz AMD FX-6100 CPU, so don't be surprised if
+you get different numbers on a real machine.<a href="#fnref3">↩</a></p></li>
+<li id="fn4"><p>The pattern here is that <code class="backtick">HEAD</code> is the most recent commit, <code class="backtick">HEAD^</code> the one before it, <code class="backtick">HEAD^^</code> the one before that, and so on. This is sometimes nicer than having to pull hex gibberish out of the output of <code class="backtick">git&nbsp;log</code>.<a href="#fnref4">↩</a></p></li>
+<li id="fn5"><p>Technically I can use <code class="backtick">git&nbsp;reset</code> to get rid of the commit, but <code class="backtick">git&nbsp;reset</code> can be dangerous, since it throws away information.<a href="#fnref5">↩</a></p></li>
+<li id="fn6"><p>This convention was not always followed in the early days of computing. For example, the <a href="http://www.linfo.org/pdp-7.html">PDP-7</a>
+ on which UNIX was first developed used 18-bit words, which conveniently
+ translated into six octal digits back in the pre-hexadecimal era.<a href="#fnref6">↩</a></p></li>
+<li id="fn7"><p>Certain ancient versions of C ran on machines with a different character set encoding, like <a href="http://en.wikipedia.org/wiki/EBCDIC" title="WikiPedia">EBCDIC</a>. The C standard does not guarantee ASCII encoding.<a href="#fnref7">↩</a></p></li>
+<li id="fn8"><p>C++ programmers will prefer <code>++x</code> if they are not otherwise using the return value, because if <code>x</code> is some very complicated type with overloaded <code>++</code>, using preincrement avoids having to save a copy of the old value.<a href="#fnref8">↩</a></p></li>
+<li id="fn9"><p>Exception: Global variables and static local variables
+are guaranteed to be initialized to an all-0 pattern, which will give
+the value 0 for most types.<a href="#fnref9">↩</a></p></li>
+<li id="fn10"><p>The reason for excluding <code>char *</code> and <code>void *</code> is that these are often used to represent pointers to objects with arbitrary types.<a href="#fnref10">↩</a></p></li>
+<li id="fn11"><p>In this case you will get lucky most of the time, since
+ the odds are that malloc will give you a block that is slightly bigger
+than <code class="backtick">strlen(s)</code> anyway. But bugs that only
+manifest themselves occasionally are even worse than bugs that kill your
+ program every time, because they are much harder to track down.<a href="#fnref11">↩</a></p></li>
+<li id="fn12"><p>Some programs (e.g. <code class="backtick">/c/cs223/bin/submit</code>) will use this to change their behavior depending on what name you call them with.<a href="#fnref12">↩</a></p></li>
+<li id="fn13"><p>There are various ways to work around this. The simplest is to put a <code>union</code> inside a larger <code>struct</code> that includes an explicit type tag.<a href="#fnref13">↩</a></p></li>
+<li id="fn14"><p>Arguably, this is a bug in the design of the language: if the compiler knows that <code class="backtick">sp</code> has type <code class="backtick">struct&nbsp;string&nbsp;*</code>, there is no particular reason why it can't interpret <code class="backtick">sp.length</code> as <code class="backtick">sp-&gt;length</code>. But it doesn't do this, so you will have to remember to write <code class="backtick">sp-&gt;length</code> instead.<a href="#fnref14">↩</a></p></li>
+<li id="fn15"><p>This is also the simplest way to deal with the
+inconsistencies between different compilers in how they handle inline
+functions. For an extensive discussion of the terrifying portability
+issues that arise in pre-C99 C compilers, see <a href="http://www.greenend.org.uk/rjk/tech/inline.html" class="uri">http://www.greenend.org.uk/rjk/tech/inline.html</a>.<a href="#fnref15">↩</a></p></li>
+<li id="fn16"><p>To make the example work, we are violating our usual rule of always using braces in <code>if</code> statements.<a href="#fnref16">↩</a></p></li>
+<li id="fn17"><p>The <code>#</code> operator looks like it ought to be
+useful here, but it only works for expanding arguments to macros and not
+ for expanding macros themselves. Attempting to get around this by
+wrapping <code>MESSAGE</code> in a macro that applies the <code>#</code> operator to its first argument will end in tears if <code>MESSAGE</code> contains any special characters like commas or right parentheses. The C preprocessor has many unfortunate limitations.<a href="#fnref17">↩</a></p></li>
+<li id="fn18"><p>This is an abuse of notation, where the equals sign is
+really acting like set membership. The general rule is that an
+expression <span class="math inline"><em>O</em>(<em>f</em>(<em>n</em>)) = <em>O</em>(<em>g</em>(<em>n</em>))</span> is true if for any choice of a function in <span class="math inline"><em>O</em>(<em>f</em>(<em>n</em>))</span>, that function is in <span class="math inline"><em>O</em>(<em>g</em>(<em>n</em>))</span>. This relation is transitive and symmetric, but unlike real equality it's not symmetric.<a href="#fnref18">↩</a></p></li>
+<li id="fn19"><p>The example below uses the <code>offsetof</code> macro, defined in <code>stddef.h</code>,
+ to allocate a truncated head that doesn't include this extra space.
+This is probably more trouble than it is worth in this case, but might
+be useful if we were creating a lot of dummy heads and the contents were
+ more than 4 bytes long.<a href="#fnref19">↩</a></p></li>
+<li id="fn20"><p>A small child of my acquaintance once explained that this wouldn't work, because you would hit your head on the ceiling.<a href="#fnref20">↩</a></p></li>
+<li id="fn21"><p>A summary of the state of this problem as of 2013 can be found in <a href="http://arxiv.org/pdf/1306.0207v1.pdf" class="uri">http://arxiv.org/pdf/1306.0207v1.pdf</a>.<a href="#fnref21">↩</a></p></li>
+<li id="fn22"><p>This only works if the graph is undirected, which means that for every edge <span class="math inline"><em>u</em><em>v</em></span> there is a matching edge <span class="math inline"><em>v</em><em>u</em></span> with the same weight.<a href="#fnref22">↩</a></p></li>
+<li id="fn23"><p>But it's linear in the numerical value of the output, which means that <code>fib(n)</code> will actually terminate in a reasonable amount of time on a typical modern computer when run on any <span class="math inline"><em>n</em></span> small enough that <span class="math inline"><em>F</em>(<em>n</em>)</span> fits in 32 bits. Running it using 64-bit (or larger) integer representations will be slower.<a href="#fnref23">↩</a></p></li>
+<li id="fn24"><p>The actual analysis is pretty complicated, since we are
+ more likely to land in a bigger pile, but it's not hard to show that on
+ average even the bigger pile has no more than 3/4 of the elements.<a href="#fnref24">↩</a></p></li>
+<li id="fn25"><p>This otherwise insane-looking modification is useful for modeling scheduling problems, where <code class="backtick">a+b</code> is the time to do <code class="backtick">a</code> and <code class="backtick">b</code> in parallel, and <code class="backtick">a*b</code> is the time to do <code class="backtick">a</code> and <code class="backtick">b</code> sequentially. The reason for making the first case <code class="backtick">+</code> and the second case <code class="backtick">*</code> is because this makes the distributive law <code class="backtick">a*(b+c)&nbsp;=&nbsp;(a*b)+(a*c)</code> work. It also allows tricks like matrix multiplication using the standard definition. See <a href="http://maxplus.org/" class="uri">http://maxplus.org</a> for more than you probably want to know about this.<a href="#fnref25">↩</a></p></li>
+<li id="fn26"><p>Not intended as legal advice.<a href="#fnref26">↩</a></p></li>
+<li id="fn27"><p>Stratfordians, Oxfordians, and other conspiracy
+theorists might object that these results depend critically on the
+precise formatting of the text. We counter this objection by observing
+that we used the <a href="http://www.gutenberg.org/ebooks/2235">Project Gutenberg e-text of <em>The Tempest</em></a>,
+ which, while not necessarily the most favored by academic Shakespeare
+scholars, is the easiest version to obtain on-line. We consider it
+further evidence of Sir Francis Bacon's genius that not only was he able
+ to subtly encode his name throughout his many brilliant plays, but he
+was even able to anticipate the effects of modern spelling and
+punctuation on this encoding.<a href="#fnref27">↩</a></p></li>
+<li id="fn28"><p>Normally this is a dangerous thing to assume, but this assignment is complicated enough already.<a href="#fnref28">↩</a></p></li>
+</ol>
+</div>
+
+
+</body></html> \ No newline at end of file
diff --git a/Computer_Science/data_structures/chapter_4/a.out b/Computer_Science/data_structures/chapter_4/a.out
deleted file mode 100755
index b1815f0..0000000
--- a/Computer_Science/data_structures/chapter_4/a.out
+++ /dev/null
Binary files differ
diff --git a/Computer_Science/data_structures/chapter_4/avl_tree b/Computer_Science/data_structures/chapter_4/avl_tree
index 1d5d102..95b05bd 100755
--- a/Computer_Science/data_structures/chapter_4/avl_tree
+++ b/Computer_Science/data_structures/chapter_4/avl_tree
Binary files differ
diff --git a/Computer_Science/data_structures/chapter_4/avl_tree.c b/Computer_Science/data_structures/chapter_4/avl_tree.c
index da6ab0d..f84db7b 100644
--- a/Computer_Science/data_structures/chapter_4/avl_tree.c
+++ b/Computer_Science/data_structures/chapter_4/avl_tree.c
@@ -134,6 +134,63 @@ AvlTree insert(elem_t x, AvlTree t)
return t;
}
+AvlTree insert_nonrecursive(elem_t x, AvlTree t)
+{
+ Position tmp;
+ Position prev[100] ;
+ Position p = t;
+ int i = 0;
+ int j;
+
+ /* index 0 not used */
+ prev[i] = p;
+ while(p) {
+ if(x < p->elem) {
+ prev[++i] = p;
+ p = p->left;
+ } else if(x > p->elem) {
+ prev[++i] = p;
+ p = p->right;
+ } else
+ return t;
+ }
+
+ tmp = malloc(sizeof(struct AvlNode));
+ tmp->elem = x;
+ tmp->left = tmp->right = NULL;
+ tmp->height = 0;
+
+ if(!prev[i]) {
+ return tmp;
+ } else if(x < prev[i]->elem) {
+ prev[i]->left = tmp;
+ prev[i]->height++;
+ if(i - 1 > 0) {
+ if(prev[i - 1]->left == prev[i]
+ && prev[i]->height - height(prev[i - 1]->right) == 2)
+ prev[i - 2] = single_rotate_with_left(prev[i - 1]);
+ else if(prev[i - 1]->right == prev[i]
+ && prev[i]->height - height(prev[i - 1]->left) == 2)
+ prev[i - 2] = double_rotate_with_left(prev[i - 1]);
+ }
+ }
+ else {
+ prev[i]->right = tmp;
+ prev[i]->height++;
+ if(i - 1 > 0) {
+ if(prev[i - 1]->right == prev[i]
+ && prev[i]->height - height(prev[i - 1]->left) == 2)
+ prev[i - 2] = single_rotate_with_right(prev[i - 1]);
+ else if(prev[i - 1]->left == prev[i]
+ && prev[i]->height - height(prev[i - 1]->right) == 2) {
+ prev[i - 2] = double_rotate_with_right(prev[i - 2]);
+ }
+ }
+ }
+
+ return t;
+}
+
/* lazy delete ? */
AvlTree delete(elem_t x, AvlTree t)
{
@@ -142,18 +199,15 @@ AvlTree delete(elem_t x, AvlTree t)
void test()
{
+ int i;
AvlTree t = NULL;
- t = insert(4, t);
- t = insert(3, t);
- t = insert(2, t);
- t = insert(1, t);
- t = insert(8, t);
- t = insert(9, t);
+ for(i = 0; i < 3; i++) {
+ t = insert_nonrecursive(i + 1, t);
+ }
printf("min:%d\n", find_min(t)->elem);
printf("max:%d\n", find_max(t)->elem);
- printf("find 2:%d\n", find(2, t)->elem);
print_ascii_tree(t);
}
diff --git a/Computer_Science/data_structures/chapter_4/avl_tree.c.out b/Computer_Science/data_structures/chapter_4/avl_tree.c.out
new file mode 100755
index 0000000..005cc26
--- /dev/null
+++ b/Computer_Science/data_structures/chapter_4/avl_tree.c.out
Binary files differ
diff --git a/Computer_Science/data_structures/chapter_4/binary_search_tree.c.out b/Computer_Science/data_structures/chapter_4/binary_search_tree.c.out
new file mode 100755
index 0000000..231ea0b
--- /dev/null
+++ b/Computer_Science/data_structures/chapter_4/binary_search_tree.c.out
Binary files differ
diff --git a/Computer_Science/data_structures/chapter_4/depth_or_random_binary_search_tree.pdf b/Computer_Science/data_structures/chapter_4/depth_or_random_binary_search_tree.pdf
new file mode 100644
index 0000000..04ba606
--- /dev/null
+++ b/Computer_Science/data_structures/chapter_4/depth_or_random_binary_search_tree.pdf
Binary files differ
diff --git a/Computer_Science/data_structures/chapter_4/depth_or_random_binary_search_tree_handout.pdf b/Computer_Science/data_structures/chapter_4/depth_or_random_binary_search_tree_handout.pdf
new file mode 100644
index 0000000..e2ea753
--- /dev/null
+++ b/Computer_Science/data_structures/chapter_4/depth_or_random_binary_search_tree_handout.pdf
Binary files differ
diff --git a/Computer_Science/leetcode/15-3_sum.c b/Computer_Science/leetcode/15-3_sum.c
new file mode 100644
index 0000000..cb60873
--- /dev/null
+++ b/Computer_Science/leetcode/15-3_sum.c
@@ -0,0 +1,37 @@
+/**
+ * Return an array of arrays of size *returnSize.
+ * Note: The returned array must be malloced, assume caller calls free().
+ */
+void helper(int **result, int *nums, int start, int numsSize, int *tmp, int count, int *returnSize)
+{
+ if(start >= numsSize) return;
+ if(count < 3) {
+ tmp[count] = nums[start];
+ helper(result, nums, start + 1, numsSize, tmp, count + 1, returnSize);
+ }
+ else if(count == 3) {
+ if(tmp[0] + tmp[1] + tmp[2] == 0) {
+ *(result + *returnSize) = malloc(sizeof(int) * 3);
+ **(result + *returnSize) = tmp[0];
+ *(*(result + *returnSize) + 1) = tmp[1];
+ *(*(result + *returnSize) + 2) = tmp[2];
+ ++*returnSize;
+ }
+ helper(result, nums, start, numsSize, tmp, count - 1, returnSize);
+ helper(result, nums, start, numsSize, tmp, count - 2, returnSize);
+ }
+
+}
+
+int** threeSum(int* nums, int numsSize, int* returnSize) {
+ *returnSize = 0;
+ int **result = malloc(sizeof(int *) * 100);
+ int tmp[3];
+ int count = 0;
+
+ for(int i = 0; i < numsSize; i++) {
+ helper(result, nums, i, numsSize, tmp, 0, returnSize);
+ }
+
+ return result;
+}
diff --git a/Computer_Science/leetcode/15-3_sum.c~ b/Computer_Science/leetcode/15-3_sum.c~
new file mode 100644
index 0000000..54dbbd2
--- /dev/null
+++ b/Computer_Science/leetcode/15-3_sum.c~
@@ -0,0 +1,7 @@
+/**
+ * Return an array of arrays of size *returnSize.
+ * Note: The returned array must be malloced, assume caller calls free().
+ */
+int** threeSum(int* nums, int numsSize, int* returnSize) {
+
+}
diff --git a/Computer_Science/leetcode/17-letter_combinations_of_a_phone_number.c b/Computer_Science/leetcode/17-letter_combinations_of_a_phone_number.c
new file mode 100644
index 0000000..eeabd21
--- /dev/null
+++ b/Computer_Science/leetcode/17-letter_combinations_of_a_phone_number.c
@@ -0,0 +1,46 @@
+/**
+ * Return an array of size *returnSize.
+ * Note: The returned array must be malloced, assume caller calls free().
+ */
+void helper(char **result, char *map[10], char *digits, char *tmp, int index, int *returnSize)
+{
+ char *p = map[*digits - '0'];
+ for(; *p; ++p) {
+ tmp[index] = *p;
+ helper(result, map, digits + 1, tmp, index + 1, returnSize);
+ }
+ if(*digits == '\0') {
+ *(result + *returnSize) = malloc(sizeof(char) * index + 1);
+ for(int i = 0; i < index; i++) {
+ *(*(result + *returnSize) + i) = tmp[i];
+ }
+ *(*(result + *returnSize) + index) = '\0';
+ ++*returnSize;
+ }
+}
+char** letterCombinations(char* digits, int* returnSize) {
+ int len = strlen(digits);
+ int index = 0;
+ char tmp[len + 1];
+ char **result = malloc(sizeof(int *) * 100);
+ char *map[10] = {
+ " ", " ", "abc", "def", "ghi", "jkl", "mno", "pqrs", "tuv", "wxyz"
+ };
+
+ *returnSize = 0;
+
+ if(len < 1)
+ return result;
+
+ while(*digits) {
+ if(*digits >= '2' && *digits <= '9') {
+ helper(result, map, digits, tmp, index, returnSize);
+ } else {
+ *returnSize = 0;
+ break;
+ }
+ ++digits;
+ }
+
+ return result;
+}
diff --git a/Computer_Science/leetcode/17-letter_combinations_of_a_phone_number.c~ b/Computer_Science/leetcode/17-letter_combinations_of_a_phone_number.c~
new file mode 100644
index 0000000..d31867b
--- /dev/null
+++ b/Computer_Science/leetcode/17-letter_combinations_of_a_phone_number.c~
@@ -0,0 +1,11 @@
+/**
+ * Return an array of size *returnSize.
+ * Note: The returned array must be malloced, assume caller calls free().
+ */
+char** letterCombinations(char* digits, int* returnSize) {
+ while(*digits) {
+ if(*digits >= '2' && *digits <= '9') {
+ } else
+ return "";
+ }
+}
diff --git a/Computer_Science/leetcode/5-longest_palindromic_substring.c b/Computer_Science/leetcode/5-longest_palindromic_substring.c
new file mode 100644
index 0000000..ee65b8e
--- /dev/null
+++ b/Computer_Science/leetcode/5-longest_palindromic_substring.c
@@ -0,0 +1,34 @@
+void extend(char *s, int len, int i, int j, int *lo, int *hi)
+{
+ while(i >= 0 && i < len && s[i] == s[j]) {
+ i--;
+ j++;
+ }
+
+ if(j - i > *hi - *lo) {
+ *lo = i;
+ *hi = j;
+ }
+}
+
+char* longestPalindrome(char* s) {
+ int *lo, *hi;
+ int len = strlen(s);
+
+ lo = malloc(sizeof(int));
+ hi = malloc(sizeof(int));
+
+ *lo = *hi = 0;
+ if(len < 2)
+ return s;
+
+ for(int i = 0; i < len - 1; i++) {
+ extend(s, len, i, i, lo, hi);
+ extend(s, len, i, i + 1, lo, hi);
+ }
+
+ *(s + (*hi)) = '\0';
+ return s + *lo + 1;
+}
+
+
diff --git a/Computer_Science/leetcode/5-longest_palindromic_substring.c~ b/Computer_Science/leetcode/5-longest_palindromic_substring.c~
new file mode 100644
index 0000000..99c44fc
--- /dev/null
+++ b/Computer_Science/leetcode/5-longest_palindromic_substring.c~
@@ -0,0 +1,7 @@
+char* longestPalindrome(char* s) {
+ int *lo, *hi;
+
+ *(p + (*hi) + 1) = '0';
+ return p + *lo;
+}
+
diff --git a/Computer_Science/leetcode/60-permutation_sequence.c b/Computer_Science/leetcode/60-permutation_sequence.c
index 4eadf4c..259d00b 100644
--- a/Computer_Science/leetcode/60-permutation_sequence.c
+++ b/Computer_Science/leetcode/60-permutation_sequence.c
@@ -1,18 +1,35 @@
-char* getPermutation(int n, int k) {
- char* result = malloc(sizeof(char) * n + 1);
+int getKth(int k, int* nums, int len)
+{
+ int result = nums[k];
+
+ for(int i = k; i < len - 1; i++)
+ nums[i] = nums[i + 1];
+
+ return result;
+}
+char* getPermutation(int n, int k)
+{
+ if(n == 0) return NULL;
+
+ char* result = malloc(sizeof(char) * (n + 1));
int use;
- int used[n];
+ int nums[n];
int fac[n];
+ int len = n;
+
fac[0] = 1;
- fac[1] = 1;
- for(int i = 2; i < n; ++i) {
- fac[i] = fac[i - 1] * n;
+ for(int i = 1; i < n; ++i) {
+ nums[i - 1] = i;
+ fac[i] = fac[i - 1] * i;
}
+ nums[n - 1] = n;
for(int i = 0; i < n; ++i) {
- use = 1 + ((k - 1) / fac[n-i]);
- k -= fac[n-i];
+ /* here k = n - 1, i.e. get 2th, index = 1 */
+ use = getKth(((k - 1) / fac[n - 1 - i]), nums, len);
+ len--;
+ k -= ((k - 1) / fac[n - 1 - i]) * fac[n - 1 - i];
result[i] = use + '0';
}
diff --git a/Computer_Science/leetcode/67-add_binary.c b/Computer_Science/leetcode/67-add_binary.c
new file mode 100644
index 0000000..48e4744
--- /dev/null
+++ b/Computer_Science/leetcode/67-add_binary.c
@@ -0,0 +1,75 @@
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+char* addBinary(char* a, char* b) {
+ int lenA = strlen(a);
+ int lenB = strlen(b);
+
+ int len = MAX(lenA, lenB) + 1;
+ char* result = malloc(sizeof(char) * (len + 1));
+ result[len--] = '\0';
+ char in = '0';
+ int sum = 0;
+
+ lenA--;
+ lenB--;
+
+ if(len == 0) return "0";
+
+ while(lenA >= 0 && lenB >=0) {
+ sum = a[lenA] + b[lenB] + in - 3 * '0';
+ switch(sum) {
+ case 0:
+ in = '0';
+ result[len] = '0';
+ break;
+ case 1:
+ in = '0';
+ result[len] = '1';
+ break;
+ case 2:
+ in = '1';
+ result[len] = '0';
+ break;
+ case 3:
+ in = '1';
+ result[len] = '1';
+ break;
+ }
+ lenA--;
+ lenB--;
+ len--;
+ }
+
+ while(lenA >= 0) {
+ if(in == '0') {
+ result[len] = a[lenA];
+ } else if(a[lenA] == '1') {
+ result[len] = '0';
+ in = '1';
+ } else {
+ result[len] = '1';
+ in = '0';
+ }
+ lenA--;
+ len--;
+ }
+
+ while(lenB >= 0) {
+ if(in == '0') {
+ result[len] = b[lenB];
+ } else if(b[lenB] == '1') {
+ result[len] = '0';
+ in = '1';
+ } else {
+ result[len] = '1';
+ in = '0';
+ }
+ lenB--;
+ len--;
+ }
+
+ if(in == '0')
+ return result + 1;
+
+ *result = '1';
+ return result;
+}
diff --git a/Computer_Science/leetcode/67-add_binary.c~ b/Computer_Science/leetcode/67-add_binary.c~
new file mode 100644
index 0000000..1fdffec
--- /dev/null
+++ b/Computer_Science/leetcode/67-add_binary.c~
@@ -0,0 +1,30 @@
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+char* addBinary(char* a, char* b) {
+ char* result = malloc(sizeof(char) * (MAX(strlen(a), strlen(b)) + 2));
+ char in = '0';
+ int sum = 0;
+ while(*a && *b) {
+ sum = *a + *b + in - 3 * '0';
+ switch(sum) {
+ case 0:
+ in = '0';
+ *result = '0';
+ break;
+ case 1:
+ in = '0';
+ *result = '1';
+ break;
+ case 2:
+ in = '1';
+ *result = '0';
+ break;
+ case 3:
+ in = '1';
+ *result = '1';
+ break;
+ }
+
+ result++;
+ }
+
+}
diff --git a/Computer_Science/leetcode/73-set_matrix_zeros.c b/Computer_Science/leetcode/73-set_matrix_zeros.c
new file mode 100644
index 0000000..31d530f
--- /dev/null
+++ b/Computer_Science/leetcode/73-set_matrix_zeros.c
@@ -0,0 +1,46 @@
+void helper(int** matrix, int rowOffset, int colOffset, int rowSize, int colSize, int maxRowSize, int maxColSize)
+{
+ if(rowOffset >= maxRowSize || colOffset >= maxColSize
+ || rowOffset < 0 || colOffset < 0
+ || rowSize <= 0 || colSize <= 0)
+ return;
+ for(int i = 0; i < rowSize; i++)
+ for(int j = 0; j < colSize; j++)
+ if(matrix[rowOffset + i][colOffset + j] == 0) {
+ for(int m = 0; m < colSize; m++)
+ matrix[rowOffset + i][colOffset + m] = 0;
+ for(int m = 0; m < rowSize; m++)
+ matrix[rowOffset + m][colOffset + j] = 0;
+ //left
+ helper(matrix, rowOffset + 1, colOffset, rowSize - 1 - i, j, maxRowSize, maxColSize);
+ //right
+ helper(matrix, rowOffset + 1, colOffset + j + 1, rowSize - 1 - i, colSize - 1 - j, maxRowSize, maxColSize);
+ return;
+ }
+}
+void setZeroes(int** matrix, int matrixRowSize, int matrixColSize) {
+ helper(matrix, 0, 0, matrixRowSize, matrixColSize, matrixRowSize, matrixColSize);
+}
+
+
+//Flag version need to be improved
+
+
+void setZeroes(int** matrix, int matrixRowSize, int matrixColSize) {
+ int flag = INT_MIN;
+
+ for(int i = 0; i < matrixRowSize; i++)
+ for(int j = 0; j < matrixColSize; j++) {
+ if(matrix[i][j] == 0) {
+ for(int m = 0; m < matrixRowSize; m++)
+ matrix[m][j] = matrix[m][j] == 0 ? 0 : flag;
+ for(int m = 0; m < matrixColSize; m++)
+ matrix[i][m] = matrix[i][m] == 0 ? 0 : flag;
+ }
+ }
+
+ for(int i = 0; i < matrixRowSize; i++)
+ for(int j = 0; j < matrixColSize; j++) {
+ if(matrix[i][j] == flag) matrix[i][j] = 0;
+ }
+}
diff --git a/Computer_Science/leetcode/73-set_matrix_zeros.c~ b/Computer_Science/leetcode/73-set_matrix_zeros.c~
new file mode 100644
index 0000000..e9f222b
--- /dev/null
+++ b/Computer_Science/leetcode/73-set_matrix_zeros.c~
@@ -0,0 +1,7 @@
+void helper(int** matrix, int offset, int rowSize, int colSize)
+{
+ if(offset > rowSize || offset > colSize)
+ return;
+}
+void setZeroes(int** matrix, int matrixRowSize, int matrixColSize) {
+}
diff --git a/Computer_Science/leetcode/746-min_cost_climbing_stairs.c b/Computer_Science/leetcode/746-min_cost_climbing_stairs.c
new file mode 100644
index 0000000..74cc72b
--- /dev/null
+++ b/Computer_Science/leetcode/746-min_cost_climbing_stairs.c
@@ -0,0 +1,14 @@
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+int minCostClimbingStairs(int* cost, int costSize) {
+ int dp[costSize + 1];
+ if(costSize == 1)
+ return cost[0];
+ dp[0] = 0;
+ dp[1] = 0;
+
+ for(int i = 2; i < costSize + 1; i++) {
+ dp[i] = MIN(dp[i - 1] + cost[i - 1], dp[i - 2] + cost[i - 2]);
+ }
+
+ return dp[costSize];
+}
diff --git a/Computer_Science/leetcode/746-min_cost_climbing_stairs.c~ b/Computer_Science/leetcode/746-min_cost_climbing_stairs.c~
new file mode 100644
index 0000000..44ffb6b
--- /dev/null
+++ b/Computer_Science/leetcode/746-min_cost_climbing_stairs.c~
@@ -0,0 +1,14 @@
+#define MIN(a, b) ((a) > (b) ? (a) : (b))
+int minCostClimbingStairs(int* cost, int costSize) {
+ int dp[costSize];
+ if(costSize == 1)
+ return cost[0];
+ dp[0] = 0;
+ dp[1] = 0;
+
+ for(i = 2; i < costSize; i++) {
+ dp[i] = MIN(dp[i - 1] + cost[i - 1], dp[i - 2] + cost[i - 2]);
+ }
+
+ return dp[costSize - 1];
+}
diff --git a/Computer_Science/leetcode/75-sort_colors.c b/Computer_Science/leetcode/75-sort_colors.c
new file mode 100644
index 0000000..cc8fe51
--- /dev/null
+++ b/Computer_Science/leetcode/75-sort_colors.c
@@ -0,0 +1,41 @@
+void swap(int* a, int*b)
+{
+ int tmp;
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+void sortColors(int* nums, int numsSize) {
+ int wIndex, bIndex;
+ wIndex = 0;
+ bIndex = numsSize - 1;
+
+ while(nums[bIndex] == 2)
+ bIndex--;
+
+ // here <= or < ?
+ for(int i = 0; i <= bIndex; i++) {
+ if(nums[i] == 0) {
+ swap(nums + i, nums + wIndex);
+ wIndex++;
+ } else if(nums[i] == 2) {
+ if(nums[bIndex] == 0) {
+ swap(nums + i, nums + bIndex);
+ swap(nums + i, nums + wIndex);
+ wIndex++;
+ bIndex--;
+ } else if(nums[bIndex] == 1) {
+ swap(nums + i, nums + bIndex);
+ bIndex--;
+ } else if(i != bIndex) {
+ swap(nums + i, nums + bIndex - 1);
+ if(nums[i] == 0) {
+ swap(nums + i, nums + wIndex);
+ wIndex++;
+ }
+ bIndex -= 2;
+ }
+ }
+ }
+}
diff --git a/Computer_Science/leetcode/75-sort_colors.c~ b/Computer_Science/leetcode/75-sort_colors.c~
new file mode 100644
index 0000000..1729e67
--- /dev/null
+++ b/Computer_Science/leetcode/75-sort_colors.c~
@@ -0,0 +1,7 @@
+void sortColors(int* nums, int numsSize) {
+ int wIndex, bIndex;
+ wIndex = 0;
+ bIndex = numsSize - 1;
+
+ for(int i = 0; i < numsSize && ; i++)
+}
diff --git a/Personal/Plan/plan.org b/Personal/Plan/plan.org
index 9ea16d9..e66f036 100644
--- a/Personal/Plan/plan.org
+++ b/Personal/Plan/plan.org
@@ -21,14 +21,57 @@ Six days a week(except Saturday, it's plan to be empty, but still study in that
* Courses
+** Database
+
+*** TODO chap1
+ SCHEDULED: <2017-12-25 一>
+*** TODO chap2
+ SCHEDULED: <2017-12-26 二>
+*** TODO chap3
+ SCHEDULED: <2017-12-27 三>
+*** TODO chap4
+ SCHEDULED: <2017-12-28 四>
+*** TODO chap5
+ SCHEDULED: <2017-12-29 五>
+*** TODO chap6
+*** TODO chap7
+ SCHEDULED: <2017-12-30 六>
+*** TODO chap8
+ SCHEDULED: <2017-12-31 日>
+*** TODO
+*** TODO
+ SCHEDULED: <2017-12-31 日
+*** TODO lab-finnal
+ SCHEDULED: <2018-01-01 一>
+
** Computer Networks
+*** TODO hm6
+ SCHEDULED: <2017-12-26 二>
+*** TODO lab1 chap1
+ SCHEDULED: <2017-12-25 一>
+*** TODO lab2 chap2
+ SCHEDULED: <2017-12-26 二>
+*** TODO lab3 chap3
+ SCHEDULED: <2017-12-27 三>
+*** TODO lab4 chap4
+ SCHEDULED: <2017-12-28 四>
+*** TODO lab5 chap5
+ SCHEDULED: <2017-12-29 五>
+*** TODO lab6 chap6
+ SCHEDULED: <2017-12-30 六>
+*** TODO lab7 chap7
+ SCHEDULED: <2017-12-31 日>
** Operating System
* Coding
** Leetcode
+*** TODO ac 8 problems a day
+ SCHEDULED: <2017-12-25 一>
+*** TODO ac 8 problemsj a day
+
* Healthy
Take 3 times exercise a week.